Smllm / app.py
ghosthets's picture
Create app.py
a5e6555 verified
raw
history blame
1.74 kB
import flask # Gradio nahi, Flask hi rakhte hain
from flask import request, jsonify
# from transformers import pipeline # अब इसकी ज़रूरत नहीं
# import torch # अब इसकी ज़रूरत नहीं
from ctransformers import AutoModelForCausalLM # ctransformers से मॉडल लोड करेंगे
app = flask.Flask(__name__)
# ===========================
# LOAD MODEL
# ===========================
model_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
print("🔄 Loading model...")
try:
# ctransformers का उपयोग करके GGUF मॉडल को CPU पर लोड करें
ai = AutoModelForCausalLM.from_pretrained(
model_id,
model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf", # GGUF फ़ाइल का नाम
model_type="mistral",
gpu_layers=0 # CPU पर चलाने के लिए
)
print("✅ Model loaded!")
except Exception as e:
print(f"❌ Error loading model: {e}")
# Fallback/Exit strategy here if loading fails
# ===========================
# CHAT API
# ===========================
@app.route('/chat', methods=['POST'])
def chat():
try:
data = request.get_json()
msg = data.get("message", "")
if not msg:
return jsonify({"error": "No message sent"}), 400
# ctransformers से response generate करें
output = ai(msg, max_new_tokens=200, temperature=0.7)
return jsonify({"reply": output})
except Exception as e:
return jsonify({"error": str(e)}), 500
# ===========================
# RUN SERVER
# ===========================
if __name__ == "__main__":
app.run(host='0.0.0.0', port=7860)