from flask import Flask, request, jsonify from transformers import pipeline import torch app = Flask(__name__) # =========================== # LOAD LOCAL/FREE MODEL (HF TOKEN NAHI CHAHIYE) # =========================== # Apna model ID yaha daal sakte ho, ya HuggingFace free model model_id = "google/gemma-2b" # Example free model print("🔄 Loading model...") # CPU/GPU device set device = 0 if torch.cuda.is_available() else -1 # pipeline me device=-1 -> CPU, device=0 -> GPU ai = pipeline("text-generation", model=model_id, max_new_tokens=200, device=device) print("✅ Model loaded!") # =========================== # CHAT API # =========================== @app.route('/chat', methods=['POST']) def chat(): try: data = request.get_json() msg = data.get("message", "") if not msg: return jsonify({"error": "No message sent"}), 400 output = ai(msg)[0]["generated_text"] return jsonify({"reply": output}) except Exception as e: return jsonify({"error": str(e)}), 500 # =========================== # RUN SERVER # =========================== if __name__ == "__main__": app.run(host='0.0.0.0', port=7860)