Spaces:

StefanCoder1
/

LoreChat

Sleeping

Stefan Ivchenko commited on 27 days ago

Commit

5c4cc72

1 Parent(s): db33913

DIE DIE DIE

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,36 +1,24 @@
 import gradio as gr
-from huggingface_hub import hf_hub_download
 from ctransformers import AutoModelForCausalLM
-# Download your GGUF from HF Hub
-model_path = hf_hub_download(
-    repo_id="StefanCoder1/Scalable-tuned-GGUF",
-    filename="model-f16.gguf",
-    # token=True,  # uncomment + set HF_TOKEN secret if the repo is private
-)
-# Load model with ctransformers
 llm = AutoModelForCausalLM.from_pretrained(
-    model_path,
-    model_type="llama",   # adjust if needed (e.g. "llama", "mistral", etc.)
-    gpu_layers=0,         # CPU only; tweak if you later have GPU
 )
 def respond(message, history):
-    # history: list of (user, assistant) pairs
     prompt = ""
     for user_msg, assistant_msg in (history or []):
         prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
     prompt += f"User: {message}\nAssistant:"
-    # Generate response
     reply = llm(
         prompt,
         max_new_tokens=256,
         temperature=0.7,
     )
-    # ctransformers returns a string directly
     return reply
 chat = gr.ChatInterface(

 import gradio as gr
 from ctransformers import AutoModelForCausalLM
 llm = AutoModelForCausalLM.from_pretrained(
+    "StefanCoder1/Scalable-tuned-GGUF",  # HF repo id
+    model_file="model-f16.gguf",         # exact GGUF filename
+    model_type="llama",                  # Llama architecture
+    gpu_layers=0,                        # CPU only
 )
 def respond(message, history):
     prompt = ""
     for user_msg, assistant_msg in (history or []):
         prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
     prompt += f"User: {message}\nAssistant:"
     reply = llm(
         prompt,
         max_new_tokens=256,
         temperature=0.7,
     )
     return reply
 chat = gr.ChatInterface(