import time import torch from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr # -------------------------- # Load model # -------------------------- model_name = "Fredithefish/Guanaco-3B-Uncensored-v2" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) # -------------------------- # Chat history # -------------------------- chat_history = [] # -------------------------- # Generate response # -------------------------- def generate_response(prompt): global chat_history # Combine previous conversation context = "" for user_msg, ai_msg in chat_history: context += f"User: {user_msg}\nAI: {ai_msg}\n" context += f"User: {prompt}\nAI:" inputs = tokenizer(context, return_tensors="pt").to(device) outputs = model.generate( **inputs, max_new_tokens=150, do_sample=True, temperature=0.7 ) response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("AI:")[-1].strip() chat_history.append((prompt, response)) return response # -------------------------- # Live typing effect # -------------------------- def live_typing(prompt): response = generate_response(prompt) displayed_text = "" for char in response: displayed_text += char time.sleep(0.02) # typing speed yield displayed_text # -------------------------- # Gradio UI # -------------------------- with gr.Blocks() as demo: gr.Markdown("## 🤖 Guanaco-3B Chatbot with Live Typing") # User input user_input = gr.Textbox(label="Your Message", placeholder="Type something...") # Output box output_box = gr.Textbox(label="AI is typing...", lines=8) # Submit event user_input.submit(live_typing, inputs=[user_input], outputs=[output_box]) demo.launch() response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("AI:")[-1].strip() chat_history.append((prompt, response)) return response # -------------------------- # Simulate live typing # -------------------------- def live_typing(prompt): response = generate_response(prompt) displayed_text = "" for char in response: displayed_text += char time.sleep(0.02) # typing speed yield displayed_text # -------------------------- # Gradio UI # -------------------------- with gr.Blocks() as demo: gr.Markdown("## 🤖 Guanaco-3B Chatbot with Avatars and Live Typing") with gr.Row(): with gr.Column(scale=1): user_avatar = gr.Image("user_avatar.png", elem_id="user-avatar") with gr.Column(scale=4): user_input = gr.Textbox(label="Your Message", placeholder="Type something...") with gr.Row(): with gr.Column(scale=1): ai_avatar = gr.Image("ai_avatar.png", elem_id="ai-av_ # -------------------------- # Live typing simulation # -------------------------- def live_typing(prompt): response = generate_response(prompt) displayed_text = "" for char in response: displayed_text += char time.sleep(0.02) # typing speed yield displayed_text # -------------------------- # Gradio UI # -------------------------- with gr.Blocks() as demo: gr.Markdown("## 🤖 Gemma-3 Chatbot (CPU-Friendly) with Avatars and Live Typing") with gr.Row(): with gr.Column(scale=1): user_avatar = gr.Image("user_avatar.png", elem_id="user-avatar") with gr.Column(scale=4): user_input = gr.Textbox(label="Your Message", placeholder="Type something...") with gr.Row(): with gr.Column(scale=1): ai_avatar = gr.Image("ai_avatar.png", elem_id="ai-avatar") with gr.Column(scale=4): output_box = gr.Textbox(label="AI is typing...", lines=8) user_input.submit(live_typing, inputs=[user_input], outputs=[output_box]) demo.launch()