ghosthets commited on
Commit
2fd8041
·
verified ·
1 Parent(s): bedec45

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+ import os
5
+
6
+ app = Flask(__name__)
7
+
8
+ model_id = "HuggingFaceH4/zephyr-7b-beta"
9
+
10
+ print("🚀 Loading model:", model_id)
11
+
12
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
13
+ model = AutoModelForCausalLM.from_pretrained(model_id)
14
+
15
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+ model.to(device)
17
+
18
+ print("✅ Model Loaded Successfully")
19
+
20
+ @app.route('/chat', methods=['POST'])
21
+ def chat():
22
+ try:
23
+ data = request.get_json()
24
+ message = data.get("message", "").strip()
25
+
26
+ if not message:
27
+ return jsonify({"error": "Empty message"}), 400
28
+
29
+ # Simple clean prompt
30
+ prompt = f"Human: {message}\nAssistant:"
31
+
32
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
33
+
34
+ output = model.generate(
35
+ inputs.input_ids,
36
+ max_length=300,
37
+ do_sample=True,
38
+ top_k=40,
39
+ top_p=0.92,
40
+ temperature=0.72,
41
+ pad_token_id=tokenizer.eos_token_id
42
+ )
43
+
44
+ full = tokenizer.decode(output[0], skip_special_tokens=True)
45
+ reply = full.split("Assistant:")[-1].strip()
46
+
47
+ return jsonify({"reply": reply})
48
+
49
+ except Exception as e:
50
+ return jsonify({"error": str(e)}), 500
51
+
52
+
53
+ @app.route('/')
54
+ def home():
55
+ return "LLM Space Active."
56
+
57
+ if __name__ == "__main__":
58
+ app.run(host="0.0.0.0", port=7860)