Spaces:

StefanCoder1
/

LoreChat

Sleeping

App Files Files Community

Stefan Ivchenko commited on 18 days ago

Commit

2f117e9

1 Parent(s): 21dfaad

Rag

Browse files

Files changed (5) hide show

.gitignore +1 -0
app.py +39 -12
data/odysee.txt +0 -0
rag_db.py +89 -0
requirements.txt +6 -1

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

app.py CHANGED Viewed

@@ -1,42 +1,69 @@
 import gradio as gr
 from huggingface_hub import hf_hub_download
 import subprocess
 import sys, platform
 from importlib import metadata as md
 subprocess.run("pip install -V llama_cpp_python==0.3.15", shell=True)
 from llama_cpp import Llama
 # Download your GGUF from HF Hub
 model_path = hf_hub_download(
     repo_id="StefanCoder1/Scalable-tuned-GGUF",
-    filename="model-f16.gguf",
     # token=True,  # uncomment + set HF_TOKEN in Space secrets if repo is private
 )
 # Create llama.cpp LLM instance
 llm = Llama(
     model_path=model_path,
-    n_ctx=4096,
-    n_threads=4,   # adjust if you want
 )
 def respond(message, history):
-    # history is a list of (user, assistant) pairs from ChatInterface
-    prompt = ""
     for user_msg, assistant_msg in (history or []):
-        prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
-    prompt += f"User: {message}\nAssistant:"
     output = llm(
         prompt,
-        max_tokens=256,
         temperature=0.7,
-        stop=["User:", "Assistant:"],
     )
-    reply = output["choices"][0]["text"]
     return reply
 chat = gr.ChatInterface(

 import gradio as gr
 from huggingface_hub import hf_hub_download
 import subprocess
 import sys, platform
 from importlib import metadata as md
+from rag_db import *
 subprocess.run("pip install -V llama_cpp_python==0.3.15", shell=True)
 from llama_cpp import Llama
 # Download your GGUF from HF Hub
 model_path = hf_hub_download(
     repo_id="StefanCoder1/Scalable-tuned-GGUF",
+    filename="model-Q4_K_M.gguf",
     # token=True,  # uncomment + set HF_TOKEN in Space secrets if repo is private
 )
+db = init_vectorstore()
+retriever = db.as_retriever(search_kwargs={"k": 1}) # how much to retrive
 # Create llama.cpp LLM instance
 llm = Llama(
     model_path=model_path,
+    n_ctx=2048, #org 4096
+    n_threads=2,   # org 4
+    n_batch=64,        # ny
+    use_mmap=True,     # ny
+    use_mlock=False,    #ny
 )
 def respond(message, history):
+    # 1. Retrieve Context
+    context = ask(message)
+    print(context)
+    # 2. Define System/Contextual Prompt
+    system_instruction = (
+        "You are an expert on mythology and fantasy creatures. "
+        "Use the provided CONTEXT to answer the USER's question accurately. "
+        "If the CONTEXT does not contain the answer, state that you don't know "
+        "based on the available information."
+    )
+    # 3. Start building the prompt with the system instruction and RAG context
+    prompt = f"System Instruction: {system_instruction}\n\n"
+    prompt += f"CONTEXT:\n---\n{context}\n---\n\n"
+    # 4. Add Conversation History
+    prompt += "CONVERSATION HISTORY:\n"
     for user_msg, assistant_msg in (history or []):
+        # Use clear labels for history
+        prompt += f"User: {user_msg} \n Assistant: {assistant_msg}\n"
+    # 5. Add the final turn
+    prompt += f"User: {message} \nAssistant:"
     output = llm(
         prompt,
+        max_tokens=400,
         temperature=0.7,
+        stop=["User:", "Assistant:", "CONVERSATION HISTORY:", "CONTEXT:"],
     )
+    reply = output["choices"][0]["text"].strip()
     return reply
 chat = gr.ChatInterface(

data/odysee.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

rag_db.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import os
+import glob
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_openai import OpenAIEmbeddings
+from langchain_chroma import Chroma
+from langchain_community.document_loaders import TextLoader, PyPDFLoader
+from dotenv import load_dotenv
+BASE_DIR = os.path.dirname(__file__)
+DATA_PATH = os.path.join(BASE_DIR, "/data")
+DB_DIR   = os.path.join(BASE_DIR, "/rag_db")
+load_dotenv(override=True)
+OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
+def load_and_chunk():
+    docs = []
+    # Iterate over all files in the folder
+    for file_path in glob.glob(os.path.join(DATA_PATH, "*")):
+        ext = file_path.lower()
+        print(file_path)
+        if ext.endswith(".txt") or ext.endswith(".md"):
+            loader = TextLoader(file_path)
+        elif ext.endswith(".pdf"):
+            loader = PyPDFLoader(file_path)
+        else:
+            print(f"Skipping unsupported file: {file_path}")
+            continue
+        docs.extend(loader.load())
+    # 2. Chunk them
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=800,
+        chunk_overlap=150
+    )
+    return splitter.split_documents(docs)
+def init_vectorstore():  # vectorizes our embeddings
+    """
+    Function that initializes the vectorstore, could be used in eg main loop
+    """
+    def db_is_empty(db_path):
+        # Check if chroma sqlite file exists and is > 0 bytes
+        sqlite_file = os.path.join(db_path, "chroma.sqlite3")
+        return not os.path.exists(sqlite_file) or os.path.getsize(sqlite_file) == 0
+    emb = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
+    if db_is_empty(DB_DIR) is False:
+        # Load existing DB (no re-embedding)
+        return Chroma(
+            persist_directory=DB_DIR,
+            embedding_function=emb
+        )
+    # First-time: create DB
+    print("first time creating the vec store")
+    chunks = load_and_chunk()
+    db = Chroma.from_documents(
+        documents=chunks,
+        embedding=emb,
+        persist_directory=DB_DIR
+    )
+    return db
+# Initialize
+# Format documents for the prompt
+def format_docs(docs):
+    """Format retrieved documents into a string."""
+    return "\n\n".join(
+        f"Source: {doc.metadata.get('source', 'Unknown')}\n{doc.page_content}"
+        for doc in docs
+    )
+# This is what will be used!
+def ask(query: str) -> str:
+    """Simple call for external modules (like narrator)."""
+    docs = retriever.invoke(query)
+    return docs

requirements.txt CHANGED Viewed

	@@ -1 +1,6 @@
1	- huggingface_hub

+huggingface_hub
+langchain-text-splitters
+langchain-openai
+langchain-chroma
+langchain-community
+pypdf