DSDUDEd commited on
Commit
9bc4c0b
·
verified ·
1 Parent(s): 4ec1d9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -12
app.py CHANGED
@@ -1,14 +1,16 @@
1
  import time
 
 
2
  import gradio as gr
3
- from llama_cpp import Llama
4
 
5
  # --------------------------
6
- # Load CPU-friendly 4B model
7
  # --------------------------
8
- llm = Llama.from_pretrained(
9
- repo_id="DavidAU/Gemma-3-it-4B-Uncensored-DBL-X-GGUF",
10
- filename="Gemma-3-it-4B-Uncensored-D_AU-F16.gguf",
11
- )
 
12
 
13
  # --------------------------
14
  # Chat history
@@ -20,21 +22,50 @@ chat_history = []
20
  # --------------------------
21
  def generate_response(prompt):
22
  global chat_history
23
- # Combine previous conversation
24
  context = ""
25
  for user_msg, ai_msg in chat_history:
26
  context += f"User: {user_msg}\nAI: {ai_msg}\n"
27
  context += f"User: {prompt}\nAI:"
28
 
29
- # Generate text
30
- output = llm(prompt=context, max_tokens=200)
31
- response = output['choices'][0]['text'].strip()
32
-
33
- # Update history
 
 
 
34
  chat_history.append((prompt, response))
35
  return response
36
 
37
  # --------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # Live typing simulation
39
  # --------------------------
40
  def live_typing(prompt):
 
1
  import time
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import gradio as gr
 
5
 
6
  # --------------------------
7
+ # Load model
8
  # --------------------------
9
+ model_name = "Fredithefish/Guanaco-3B-Uncensored-v2"
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = AutoModelForCausalLM.from_pretrained(model_name)
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ model.to(device)
14
 
15
  # --------------------------
16
  # Chat history
 
22
  # --------------------------
23
  def generate_response(prompt):
24
  global chat_history
25
+ # Combine previous messages
26
  context = ""
27
  for user_msg, ai_msg in chat_history:
28
  context += f"User: {user_msg}\nAI: {ai_msg}\n"
29
  context += f"User: {prompt}\nAI:"
30
 
31
+ inputs = tokenizer(context, return_tensors="pt").to(device)
32
+ outputs = model.generate(
33
+ **inputs,
34
+ max_new_tokens=150,
35
+ do_sample=True,
36
+ temperature=0.7
37
+ )
38
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("AI:")[-1].strip()
39
  chat_history.append((prompt, response))
40
  return response
41
 
42
  # --------------------------
43
+ # Simulate live typing
44
+ # --------------------------
45
+ def live_typing(prompt):
46
+ response = generate_response(prompt)
47
+ displayed_text = ""
48
+ for char in response:
49
+ displayed_text += char
50
+ time.sleep(0.02) # typing speed
51
+ yield displayed_text
52
+
53
+ # --------------------------
54
+ # Gradio UI
55
+ # --------------------------
56
+ with gr.Blocks() as demo:
57
+ gr.Markdown("## 🤖 Guanaco-3B Chatbot with Avatars and Live Typing")
58
+
59
+ with gr.Row():
60
+ with gr.Column(scale=1):
61
+ user_avatar = gr.Image("user_avatar.png", elem_id="user-avatar")
62
+ with gr.Column(scale=4):
63
+ user_input = gr.Textbox(label="Your Message", placeholder="Type something...")
64
+
65
+ with gr.Row():
66
+ with gr.Column(scale=1):
67
+ ai_avatar = gr.Image("ai_avatar.png", elem_id="ai-av_
68
+ # --------------------------
69
  # Live typing simulation
70
  # --------------------------
71
  def live_typing(prompt):