import flask from flask import request, jsonify from transformers import pipeline, AutoTokenizer # Added AutoTokenizer import torch import warnings # Suppress minor warnings that occur on CPU runs warnings.filterwarnings("ignore") app = flask.Flask(__name__) # =========================== # LOAD MODEL (SmolLM-1.7B-Chat) # This model is small (1.7B) and fully open-access. # =========================== model_id = "HuggingFaceTB/SmolLM-1.7B" print("🔄 Loading model...") # CPU/GPU device set device = 0 if torch.cuda.is_available() else -1 # Use float32 for CPU (or bfloat16 for GPU) dtype = torch.float32 if device == -1 else torch.bfloat16 try: # 1. Load Tokenizer and set pad_token for stability tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) if tokenizer.pad_token is None: # Set pad_token to eos_token to fix generation warning/error tokenizer.pad_token = tokenizer.eos_token # 2. Load Pipeline with the fixed tokenizer ai = pipeline( "text-generation", model=model_id, tokenizer=tokenizer, # Passing the configured tokenizer here max_new_tokens=200, device=device, torch_dtype=dtype, trust_remote_code=True ) print("✅ Model loaded!") except Exception as e: print(f"❌ Error loading model: {e}") ai = None # =========================== # CHAT API # =========================== @app.route('/chat', methods=['POST']) def chat(): if ai is None: return jsonify({"error": "Model initialization failed."}), 500 try: data = request.get_json() msg = data.get("message", "") if not msg: return jsonify({"error": "No message sent"}), 400 # Instruction Format: Using a simple template for this model prompt = f"User: {msg}\nAssistant:" output = ai(prompt)[0]["generated_text"] # Clean the output to extract only the model's reply # We split based on the 'Assistant:' tag in the prompt template if "Assistant:" in output: reply = output.split("Assistant:")[-1].strip() elif "User:" in output: # Sometimes the model repeats the prompt reply = output.split("User:")[0].strip() else: reply = output.strip() # Remove any remaining instruction markers from the start if reply.startswith(msg): reply = reply[len(msg):].strip() return jsonify({"reply": reply}) except Exception as e: return jsonify({"error": str(e)}), 500 # =========================== # RUN SERVER # =========================== if __name__ == "__main__": app.run(host='0.0.0.0', port=7860)