Smllm

Sleeping

File size: 1,182 Bytes

from flask import Flask, request, jsonify
from transformers import pipeline
import torch

app = Flask(__name__)

# ===========================
# LOAD LOCAL/FREE MODEL (HF TOKEN NAHI CHAHIYE)
# ===========================
# Apna model ID yaha daal sakte ho, ya HuggingFace free model
model_id = "google/gemma-2b"  # Example free model
print("🔄 Loading model...")

# CPU/GPU device set
device = 0 if torch.cuda.is_available() else -1

# pipeline me device=-1 -> CPU, device=0 -> GPU
ai = pipeline("text-generation", model=model_id, max_new_tokens=200, device=device)
print("✅ Model loaded!")

# ===========================
# CHAT API
# ===========================
@app.route('/chat', methods=['POST'])
def chat():
    try:
        data = request.get_json()
        msg = data.get("message", "")
        if not msg:
            return jsonify({"error": "No message sent"}), 400

        output = ai(msg)[0]["generated_text"]
        return jsonify({"reply": output})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

# ===========================
# RUN SERVER
# ===========================
if __name__ == "__main__":
    app.run(host='0.0.0.0', port=7860)