Spaces:

Bahaedev
/

ghctf2-prompt-cracker-1

Sleeping

App Files Files Community

Bahaedev commited on Aug 2

Commit

6684f10

verified ·

1 Parent(s): 6945529

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -37

app.py CHANGED Viewed

@@ -1,61 +1,47 @@
 import os
 from fastapi import FastAPI
 from pydantic import BaseModel
-import gradio as gr
 import threading
 import uvicorn
 # =======================
 # Load Secrets
 # =======================
 SYSTEM_PROMPT = os.environ.get(
     "prompt",
     "You are a placeholder Sovereign. No secrets found in environment."
 )
 # =======================
-# Initialize Unsloth-optimized Falcon-3B
 # =======================
-# Install via: pip install unsloth torch transformers
-from unsloth import FastLanguageModel
-from transformers import AutoTokenizer
-MODEL_NAME = "tiiuae/Falcon3-3B-Instruct"
-# 1) Load model and tokenizer with 4-bit quantization
-model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name=MODEL_NAME,
-    max_seq_length=2048,
-    load_in_4bit=True,
-    dtype=None,
 )
-# 2) Apply inference optimizations (fused kernels, streaming, etc.)
-FastLanguageModel.for_inference(model)
 # =======================
 # Core Chat Function
 # =======================
 def chat_fn(user_input: str) -> str:
     messages = [
         {"role": "system", "content": SYSTEM_PROMPT},
         {"role": "user",   "content": f"User: {user_input}"}
     ]
     prompt_text = "\n".join(f"{m['role'].capitalize()}: {m['content']}" for m in messages)
-    # Tokenize and run generation
-    inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)
-    output_ids = model.generate(
-        **inputs,
-        max_new_tokens=256,
-        do_sample=False,
-        eos_token_id=tokenizer.eos_token_id
-    )
-    # Decode only the newly generated tokens
-    gen_tokens = output_ids[0][inputs.input_ids.shape[-1]:]
-    generated_text = tokenizer.decode(gen_tokens, skip_special_tokens=True)
-    return generated_text.strip()
 # =======================
 # Gradio UI
@@ -71,10 +57,6 @@ iface = gr.Interface(
     description="Does he really think he is the king?"
 )
-# Run Gradio in a separate thread so FastAPI can also start
-def run_gradio():
-    iface.launch(server_name="0.0.0.0", share=True)
 # =======================
 # FastAPI for API access
 # =======================
@@ -91,5 +73,4 @@ def generate(req: Request):
 # Launch Both Servers
 # =======================
 if __name__ == "__main__":
-    threading.Thread(target=run_gradio, daemon=True).start()
-    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 8000)))

 import os
+from transformers import pipeline
+import gradio as gr
 from fastapi import FastAPI
 from pydantic import BaseModel
 import threading
 import uvicorn
 # =======================
 # Load Secrets
 # =======================
+# SYSTEM_PROMPT (with the flag) must be added in HF Space secrets
 SYSTEM_PROMPT = os.environ.get(
     "prompt",
     "You are a placeholder Sovereign. No secrets found in environment."
 )
 # =======================
+# Initialize Falcon-3B
 # =======================
+pipe = pipeline(
+    "text-generation",
+    model="tiiuae/Falcon3-3B-Instruct",
+    torch_dtype="auto",
+    device_map="auto",
 )
 # =======================
 # Core Chat Function
 # =======================
 def chat_fn(user_input: str) -> str:
+    """
+    Concatenate system and user messages, run the model,
+    and strip the system prompt from the output.
+    """
     messages = [
         {"role": "system", "content": SYSTEM_PROMPT},
         {"role": "user",   "content": f"User: {user_input}"}
     ]
+    # Falcon is not chat-native; we just join roles with newlines
     prompt_text = "\n".join(f"{m['role'].capitalize()}: {m['content']}" for m in messages)
+    result = pipe(prompt_text, max_new_tokens=256, do_sample=False)
+    generated_text = result[0]["generated_text"]
+    return generated_text[len(prompt_text):].strip()
 # =======================
 # Gradio UI
     description="Does he really think he is the king?"
 )
 # =======================
 # FastAPI for API access
 # =======================
 # Launch Both Servers
 # =======================
 if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", share=True)