File size: 4,786 Bytes
5e462af
0e8c6f1
fca5c55
 
 
2f117e9
fca5c55
 
21dfaad
0e8c6f1
 
 
2f117e9
0e8c6f1
 
 
2f117e9
 
 
 
 
 
0e8c6f1
 
 
2f117e9
 
 
 
 
2d953c7
5e462af
2d953c7
2f117e9
adfa010
2f117e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d953c7
2f117e9
 
 
 
 
5e462af
0e8c6f1
2d953c7
edb6651
2d953c7
2f117e9
2d953c7
0e8c6f1
2f117e9
db33913
5e462af
c0c6c45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d953c7
c0c6c45
 
 
 
 
 
 
 
5e462af
 
 
2d953c7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import gradio as gr
from huggingface_hub import hf_hub_download
import subprocess
import sys, platform
from importlib import metadata as md
from rag_db import * 

subprocess.run("pip install -V llama_cpp_python==0.3.15", shell=True)
from llama_cpp import Llama
# Download your GGUF from HF Hub
model_path = hf_hub_download(
    repo_id="StefanCoder1/Scalable-tuned-GGUF",
    filename="model-Q4_K_M.gguf",
    # token=True,  # uncomment + set HF_TOKEN in Space secrets if repo is private
)

db = init_vectorstore()
retriever = db.as_retriever(search_kwargs={"k": 1}) # how much to retrive




# Create llama.cpp LLM instance
llm = Llama(
    model_path=model_path,
    n_ctx=2048, #org 4096
    n_threads=2,   # org 4
    n_batch=64,        # ny
    use_mmap=True,     # ny
    use_mlock=False,    #ny
)

def respond(message, history):
    # 1. Retrieve Context
    context = ask(message, retriver_moedel=retriever)
    print(context)
    
    # 2. Define System/Contextual Prompt
    system_instruction = (
        "You are an expert on mythology and fantasy creatures. "
        "Use the provided CONTEXT to answer the USER's question accurately. "
        "If the CONTEXT does not contain the answer, state that you don't know "
        "based on the available information."
    )
    
    # 3. Start building the prompt with the system instruction and RAG context
    prompt = f"System Instruction: {system_instruction}\n\n"
    prompt += f"CONTEXT:\n---\n{context}\n---\n\n"
    
    # 4. Add Conversation History
    prompt += "CONVERSATION HISTORY:\n"
    for user_msg, assistant_msg in (history or []):
        # Use clear labels for history
        prompt += f"User: {user_msg} \n Assistant: {assistant_msg}\n"
    
    # 5. Add the final turn
    prompt += f"User: {message} \nAssistant:"

    output = llm(
        prompt,
        max_tokens=256,
        temperature=0.7,
        stop=["User:", "Assistant:", "CONVERSATION HISTORY:", "CONTEXT:"],
    )

    reply = output["choices"][0]["text"].strip()
    return reply

dark_academia_css = """
body {
    background: radial-gradient(circle at top, #151521 0, #050509 55%, #000000 100%);
    color: #e0d9c6;
    font-family: "Georgia", "Times New Roman", serif;
}

/* Main container */
.gradio-container {
    background: transparent !important;
}

/* Title & description */
h1, h2, h3, .prose h1, .prose h2, .prose h3 {
    color: #f5f1e6 !important;
    letter-spacing: 0.08em;
    text-transform: uppercase;
}

.prose, .prose p, .dark .prose, .dark .prose p {
    color: #e0d9c6 !important;
}

/* Chat box card */
.gr-chat-interface {
    background: rgba(5, 5, 12, 0.85) !important;
    border-radius: 18px !important;
    border: 1px solid rgba(196, 164, 110, 0.4) !important;
    box-shadow:
        0 0 25px rgba(0, 0, 0, 0.9),
        0 0 60px rgba(84, 63, 140, 0.4);
}

/* Chat messages */
.gr-chat-message {
    border-radius: 12px !important;
    border: 1px solid rgba(255, 255, 255, 0.04) !important;
    backdrop-filter: blur(6px);
}

.gr-chat-message.user {
    background: radial-gradient(circle at top left,
        rgba(196, 164, 110, 0.16),
        rgba(15, 15, 25, 0.95)
    ) !important;
    border-left: 3px solid #c4a46e !important;
}

.gr-chat-message.bot {
    background: radial-gradient(circle at top left,
        rgba(127, 90, 240, 0.18),
        rgba(8, 8, 18, 0.96)
    ) !important;
    border-left: 3px solid #7f5af0 !important;
}

/* Input area */
textarea, .gr-text-input, .gr-textbox {
    background: rgba(10, 10, 18, 0.95) !important;
    border-radius: 999px !important;
    border: 1px solid rgba(196, 164, 110, 0.5) !important;
    color: #f5f1e6 !important;
}

/* Buttons */
button, .gr-button {
    background: linear-gradient(135deg, #7f5af0, #c4a46e) !important;
    border-radius: 999px !important;
    border: none !important;
    color: #fdfaf0 !important;
    font-weight: 600 !important;
    letter-spacing: 0.08em;
    text-transform: uppercase;
    box-shadow: 0 0 18px rgba(0, 0, 0, 0.8);
}

button:hover, .gr-button:hover {
    filter: brightness(1.06);
    box-shadow:
        0 0 18px rgba(127, 90, 240, 0.7),
        0 0 30px rgba(196, 164, 110, 0.6);
}

/* Scrollbar */
::-webkit-scrollbar {
    width: 8px;
}
::-webkit-scrollbar-track {
    background: transparent;
}
::-webkit-scrollbar-thumb {
    background: rgba(196, 164, 110, 0.6);
    border-radius: 999px;
}
"""


chat = gr.ChatInterface(
    fn=respond,
    title="Obsidian Oracle: Archives of Myth",
    description=(
        "Enter the candlelit archives of forgotten epics. "
        "The Obsidian Oracle answers in a dark-academia tone, drawing on "
        "the Iliad, the Odyssey, and other fragments of ancient lore."
    ),
    css=dark_academia_css,
)

if __name__ == "__main__":
    chat.launch()