Stefan Ivchenko commited on
Commit
2f117e9
·
1 Parent(s): 21dfaad
Files changed (5) hide show
  1. .gitignore +1 -0
  2. app.py +39 -12
  3. data/odysee.txt +0 -0
  4. rag_db.py +89 -0
  5. requirements.txt +6 -1
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
app.py CHANGED
@@ -1,42 +1,69 @@
1
  import gradio as gr
2
  from huggingface_hub import hf_hub_download
3
-
4
-
5
  import subprocess
6
  import sys, platform
7
  from importlib import metadata as md
 
8
 
9
  subprocess.run("pip install -V llama_cpp_python==0.3.15", shell=True)
10
  from llama_cpp import Llama
11
  # Download your GGUF from HF Hub
12
  model_path = hf_hub_download(
13
  repo_id="StefanCoder1/Scalable-tuned-GGUF",
14
- filename="model-f16.gguf",
15
  # token=True, # uncomment + set HF_TOKEN in Space secrets if repo is private
16
  )
17
 
 
 
 
 
 
 
18
  # Create llama.cpp LLM instance
19
  llm = Llama(
20
  model_path=model_path,
21
- n_ctx=4096,
22
- n_threads=4, # adjust if you want
 
 
 
23
  )
24
 
25
  def respond(message, history):
26
- # history is a list of (user, assistant) pairs from ChatInterface
27
- prompt = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  for user_msg, assistant_msg in (history or []):
29
- prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
30
- prompt += f"User: {message}\nAssistant:"
 
 
 
31
 
32
  output = llm(
33
  prompt,
34
- max_tokens=256,
35
  temperature=0.7,
36
- stop=["User:", "Assistant:"],
37
  )
38
 
39
- reply = output["choices"][0]["text"]
40
  return reply
41
 
42
  chat = gr.ChatInterface(
 
1
  import gradio as gr
2
  from huggingface_hub import hf_hub_download
 
 
3
  import subprocess
4
  import sys, platform
5
  from importlib import metadata as md
6
+ from rag_db import *
7
 
8
  subprocess.run("pip install -V llama_cpp_python==0.3.15", shell=True)
9
  from llama_cpp import Llama
10
  # Download your GGUF from HF Hub
11
  model_path = hf_hub_download(
12
  repo_id="StefanCoder1/Scalable-tuned-GGUF",
13
+ filename="model-Q4_K_M.gguf",
14
  # token=True, # uncomment + set HF_TOKEN in Space secrets if repo is private
15
  )
16
 
17
+ db = init_vectorstore()
18
+ retriever = db.as_retriever(search_kwargs={"k": 1}) # how much to retrive
19
+
20
+
21
+
22
+
23
  # Create llama.cpp LLM instance
24
  llm = Llama(
25
  model_path=model_path,
26
+ n_ctx=2048, #org 4096
27
+ n_threads=2, # org 4
28
+ n_batch=64, # ny
29
+ use_mmap=True, # ny
30
+ use_mlock=False, #ny
31
  )
32
 
33
  def respond(message, history):
34
+ # 1. Retrieve Context
35
+ context = ask(message)
36
+ print(context)
37
+
38
+ # 2. Define System/Contextual Prompt
39
+ system_instruction = (
40
+ "You are an expert on mythology and fantasy creatures. "
41
+ "Use the provided CONTEXT to answer the USER's question accurately. "
42
+ "If the CONTEXT does not contain the answer, state that you don't know "
43
+ "based on the available information."
44
+ )
45
+
46
+ # 3. Start building the prompt with the system instruction and RAG context
47
+ prompt = f"System Instruction: {system_instruction}\n\n"
48
+ prompt += f"CONTEXT:\n---\n{context}\n---\n\n"
49
+
50
+ # 4. Add Conversation History
51
+ prompt += "CONVERSATION HISTORY:\n"
52
  for user_msg, assistant_msg in (history or []):
53
+ # Use clear labels for history
54
+ prompt += f"User: {user_msg} \n Assistant: {assistant_msg}\n"
55
+
56
+ # 5. Add the final turn
57
+ prompt += f"User: {message} \nAssistant:"
58
 
59
  output = llm(
60
  prompt,
61
+ max_tokens=400,
62
  temperature=0.7,
63
+ stop=["User:", "Assistant:", "CONVERSATION HISTORY:", "CONTEXT:"],
64
  )
65
 
66
+ reply = output["choices"][0]["text"].strip()
67
  return reply
68
 
69
  chat = gr.ChatInterface(
data/odysee.txt ADDED
The diff for this file is too large to render. See raw diff
 
rag_db.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ from langchain_openai import OpenAIEmbeddings
5
+ from langchain_chroma import Chroma
6
+ from langchain_community.document_loaders import TextLoader, PyPDFLoader
7
+ from dotenv import load_dotenv
8
+
9
+
10
+ BASE_DIR = os.path.dirname(__file__)
11
+ DATA_PATH = os.path.join(BASE_DIR, "/data")
12
+ DB_DIR = os.path.join(BASE_DIR, "/rag_db")
13
+
14
+
15
+ load_dotenv(override=True)
16
+ OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
17
+
18
+
19
+ def load_and_chunk():
20
+ docs = []
21
+ # Iterate over all files in the folder
22
+ for file_path in glob.glob(os.path.join(DATA_PATH, "*")):
23
+ ext = file_path.lower()
24
+ print(file_path)
25
+ if ext.endswith(".txt") or ext.endswith(".md"):
26
+ loader = TextLoader(file_path)
27
+ elif ext.endswith(".pdf"):
28
+ loader = PyPDFLoader(file_path)
29
+ else:
30
+ print(f"Skipping unsupported file: {file_path}")
31
+ continue
32
+
33
+ docs.extend(loader.load())
34
+
35
+ # 2. Chunk them
36
+ splitter = RecursiveCharacterTextSplitter(
37
+ chunk_size=800,
38
+ chunk_overlap=150
39
+ )
40
+ return splitter.split_documents(docs)
41
+
42
+
43
+ def init_vectorstore(): # vectorizes our embeddings
44
+ """
45
+ Function that initializes the vectorstore, could be used in eg main loop
46
+ """
47
+
48
+ def db_is_empty(db_path):
49
+ # Check if chroma sqlite file exists and is > 0 bytes
50
+ sqlite_file = os.path.join(db_path, "chroma.sqlite3")
51
+ return not os.path.exists(sqlite_file) or os.path.getsize(sqlite_file) == 0
52
+
53
+ emb = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
54
+
55
+ if db_is_empty(DB_DIR) is False:
56
+ # Load existing DB (no re-embedding)
57
+ return Chroma(
58
+ persist_directory=DB_DIR,
59
+ embedding_function=emb
60
+ )
61
+
62
+ # First-time: create DB
63
+ print("first time creating the vec store")
64
+ chunks = load_and_chunk()
65
+ db = Chroma.from_documents(
66
+ documents=chunks,
67
+ embedding=emb,
68
+ persist_directory=DB_DIR
69
+ )
70
+
71
+ return db
72
+
73
+ # Initialize
74
+
75
+ # Format documents for the prompt
76
+ def format_docs(docs):
77
+ """Format retrieved documents into a string."""
78
+ return "\n\n".join(
79
+ f"Source: {doc.metadata.get('source', 'Unknown')}\n{doc.page_content}"
80
+ for doc in docs
81
+ )
82
+
83
+
84
+ # This is what will be used!
85
+ def ask(query: str) -> str:
86
+ """Simple call for external modules (like narrator)."""
87
+ docs = retriever.invoke(query)
88
+ return docs
89
+
requirements.txt CHANGED
@@ -1 +1,6 @@
1
- huggingface_hub
 
 
 
 
 
 
1
+ huggingface_hub
2
+ langchain-text-splitters
3
+ langchain-openai
4
+ langchain-chroma
5
+ langchain-community
6
+ pypdf