Spaces:

DSDUDEd
/

Firefox.AI.Model

Running

App Files Files Community

DSDUDEd commited on Oct 6

Commit

1c686e6

verified ·

1 Parent(s): 06059b4

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -15

app.py CHANGED Viewed

@@ -1,23 +1,23 @@
-# app.py
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# Load model and tokenizer
 model_name = "DSDUDEd/firebase"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name)
-# Ensure model uses GPU if available
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = model.to(device)
-# Function to generate responses
 def chat_with_model(user_input, chat_history=[]):
-    # Append user input to history
     chat_history.append({"role": "user", "content": user_input})
-    # Prepare prompt
     prompt = ""
     for turn in chat_history:
         if turn["role"] == "user":
@@ -25,7 +25,8 @@ def chat_with_model(user_input, chat_history=[]):
         else:
             prompt += f"AI: {turn['content']}\n"
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
     outputs = model.generate(
         **inputs,
         max_new_tokens=150,
@@ -33,14 +34,14 @@ def chat_with_model(user_input, chat_history=[]):
         top_p=0.9,
         temperature=0.7,
     )
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract AI response (assume last part after "AI: ")
     response_text = response.split("AI:")[-1].strip()
     chat_history.append({"role": "ai", "content": response_text})
-    # Prepare chat history for Gradio
     chat_for_gradio = [(turn["content"], "") if turn["role"]=="user" else ("", turn["content"]) for turn in chat_history]
     return chat_for_gradio, chat_history
@@ -51,7 +52,8 @@ with gr.Blocks() as demo:
     chatbot = gr.Chatbot()
     msg = gr.Textbox(label="Enter your message")
     submit = gr.Button("Send")
     submit.click(chat_with_model, inputs=[msg, chat_history_state], outputs=[chatbot, chat_history_state])
 demo.launch()

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+# Model name
 model_name = "DSDUDEd/firebase"
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    device_map="auto",      # automatically assigns to GPU if available
+    load_in_8bit=True       # load in 8-bit to save memory
+)
+# Function to generate AI responses
 def chat_with_model(user_input, chat_history=[]):
     chat_history.append({"role": "user", "content": user_input})
+    # Build the prompt from chat history
     prompt = ""
     for turn in chat_history:
         if turn["role"] == "user":
         else:
             prompt += f"AI: {turn['content']}\n"
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
         **inputs,
         max_new_tokens=150,
         top_p=0.9,
         temperature=0.7,
     )
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Get only the AI's response
     response_text = response.split("AI:")[-1].strip()
     chat_history.append({"role": "ai", "content": response_text})
+    # Prepare Gradio chat format
     chat_for_gradio = [(turn["content"], "") if turn["role"]=="user" else ("", turn["content"]) for turn in chat_history]
     return chat_for_gradio, chat_history
     chatbot = gr.Chatbot()
     msg = gr.Textbox(label="Enter your message")
     submit = gr.Button("Send")
     submit.click(chat_with_model, inputs=[msg, chat_history_state], outputs=[chatbot, chat_history_state])
 demo.launch()