Spaces:

Bahaedev
/

ghctf2-prompt-cracker-1

Sleeping

App Files Files Community

Bahaedev commited on Jul 31

Commit

3543359

verified ·

1 Parent(s): d094312

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -18

app.py CHANGED Viewed

@@ -1,47 +1,61 @@
 import os
-from transformers import pipeline
-import gradio as gr
 from fastapi import FastAPI
 from pydantic import BaseModel
 import threading
 import uvicorn
 # =======================
 # Load Secrets
 # =======================
-# SYSTEM_PROMPT (with the flag) must be added in HF Space secrets
 SYSTEM_PROMPT = os.environ.get(
     "prompt",
     "You are a placeholder Sovereign. No secrets found in environment."
 )
 # =======================
-# Initialize Falcon-3B
 # =======================
-pipe = pipeline(
-    "text-generation",
-    model="tiiuae/Falcon3-3B-Instruct",
-    torch_dtype="auto",
-    device_map="auto",
 )
 # =======================
 # Core Chat Function
 # =======================
 def chat_fn(user_input: str) -> str:
-    """
-    Concatenate system and user messages, run the model,
-    and strip the system prompt from the output.
-    """
     messages = [
         {"role": "system", "content": SYSTEM_PROMPT},
         {"role": "user",   "content": f"User: {user_input}"}
     ]
-    # Falcon is not chat-native; we just join roles with newlines
     prompt_text = "\n".join(f"{m['role'].capitalize()}: {m['content']}" for m in messages)
-    result = pipe(prompt_text, max_new_tokens=256, do_sample=False)
-    generated_text = result[0]["generated_text"]
-    return generated_text[len(prompt_text):].strip()
 # =======================
 # Gradio UI
@@ -57,6 +71,10 @@ iface = gr.Interface(
     description="Does he really think he is the king?"
 )
 # =======================
 # FastAPI for API access
 # =======================
@@ -73,4 +91,5 @@ def generate(req: Request):
 # Launch Both Servers
 # =======================
 if __name__ == "__main__":
-    iface.launch(server_name="0.0.0.0", share=True)

 import os
 from fastapi import FastAPI
 from pydantic import BaseModel
+import gradio as gr
 import threading
 import uvicorn
 # =======================
 # Load Secrets
 # =======================
 SYSTEM_PROMPT = os.environ.get(
     "prompt",
     "You are a placeholder Sovereign. No secrets found in environment."
 )
 # =======================
+# Initialize Unsloth-optimized Falcon-3B
 # =======================
+# Install via: pip install unsloth torch transformers
+from unsloth import FastLanguageModel
+from transformers import AutoTokenizer
+MODEL_NAME = "tiiuae/Falcon3-3B-Instruct"
+# 1) Load model and tokenizer with 4-bit quantization
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name=MODEL_NAME,
+    max_seq_length=2048,
+    load_in_4bit=True,
+    dtype=None,
 )
+# 2) Apply inference optimizations (fused kernels, streaming, etc.)
+FastLanguageModel.for_inference(model)
 # =======================
 # Core Chat Function
 # =======================
 def chat_fn(user_input: str) -> str:
     messages = [
         {"role": "system", "content": SYSTEM_PROMPT},
         {"role": "user",   "content": f"User: {user_input}"}
     ]
     prompt_text = "\n".join(f"{m['role'].capitalize()}: {m['content']}" for m in messages)
+    # Tokenize and run generation
+    inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)
+    output_ids = model.generate(
+        **inputs,
+        max_new_tokens=256,
+        do_sample=False,
+        eos_token_id=tokenizer.eos_token_id
+    )
+    # Decode only the newly generated tokens
+    gen_tokens = output_ids[0][inputs.input_ids.shape[-1]:]
+    generated_text = tokenizer.decode(gen_tokens, skip_special_tokens=True)
+    return generated_text.strip()
 # =======================
 # Gradio UI
     description="Does he really think he is the king?"
 )
+# Run Gradio in a separate thread so FastAPI can also start
+def run_gradio():
+    iface.launch(server_name="0.0.0.0", share=True)
 # =======================
 # FastAPI for API access
 # =======================
 # Launch Both Servers
 # =======================
 if __name__ == "__main__":
+    threading.Thread(target=run_gradio, daemon=True).start()
+    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 8000)))