|
|
import flask |
|
|
from flask import request, jsonify |
|
|
from transformers import pipeline, AutoTokenizer |
|
|
import torch |
|
|
import warnings |
|
|
|
|
|
|
|
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
app = flask.Flask(__name__) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_id = "HuggingFaceTB/SmolLM-1.7B" |
|
|
print("🔄 Loading model...") |
|
|
|
|
|
|
|
|
device = 0 if torch.cuda.is_available() else -1 |
|
|
|
|
|
dtype = torch.float32 if device == -1 else torch.bfloat16 |
|
|
|
|
|
try: |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) |
|
|
if tokenizer.pad_token is None: |
|
|
|
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
|
|
|
|
|
ai = pipeline( |
|
|
"text-generation", |
|
|
model=model_id, |
|
|
tokenizer=tokenizer, |
|
|
max_new_tokens=200, |
|
|
device=device, |
|
|
torch_dtype=dtype, |
|
|
trust_remote_code=True |
|
|
) |
|
|
print("✅ Model loaded!") |
|
|
except Exception as e: |
|
|
print(f"❌ Error loading model: {e}") |
|
|
ai = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/chat', methods=['POST']) |
|
|
def chat(): |
|
|
if ai is None: |
|
|
return jsonify({"error": "Model initialization failed."}), 500 |
|
|
|
|
|
try: |
|
|
data = request.get_json() |
|
|
msg = data.get("message", "") |
|
|
if not msg: |
|
|
return jsonify({"error": "No message sent"}), 400 |
|
|
|
|
|
|
|
|
prompt = f"User: {msg}\nAssistant:" |
|
|
|
|
|
output = ai(prompt)[0]["generated_text"] |
|
|
|
|
|
|
|
|
|
|
|
if "Assistant:" in output: |
|
|
reply = output.split("Assistant:")[-1].strip() |
|
|
elif "User:" in output: |
|
|
reply = output.split("User:")[0].strip() |
|
|
else: |
|
|
reply = output.strip() |
|
|
|
|
|
|
|
|
if reply.startswith(msg): |
|
|
reply = reply[len(msg):].strip() |
|
|
|
|
|
return jsonify({"reply": reply}) |
|
|
except Exception as e: |
|
|
return jsonify({"error": str(e)}), 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.run(host='0.0.0.0', port=7860) |