Spaces:
Running
Running
Tested rag.py
Browse files- app/rag.py +4 -3
app/rag.py
CHANGED
|
@@ -21,7 +21,8 @@ from dotenv import load_dotenv
|
|
| 21 |
|
| 22 |
session_histories: dict[str, list] = {}
|
| 23 |
# %%
|
| 24 |
-
LLM_MODEL_PATH = "
|
|
|
|
| 25 |
COLLECTION_NAME = "wellness_docs"
|
| 26 |
EMBEDDING_MODEL = "intfloat/e5-large-v2"
|
| 27 |
QDRANT_URL = os.getenv('QDRANT_URL')
|
|
@@ -34,8 +35,8 @@ REVOLUTION_COLLECTION = "revolution"
|
|
| 34 |
|
| 35 |
# %%
|
| 36 |
llm = AutoModelForCausalLM.from_pretrained(
|
| 37 |
-
model_path_or_repo_id=
|
| 38 |
-
model_file=
|
| 39 |
model_type="mistral",
|
| 40 |
gpu_layers=0 # or tweak this if you want GPU acceleration
|
| 41 |
)
|
|
|
|
| 21 |
|
| 22 |
session_histories: dict[str, list] = {}
|
| 23 |
# %%
|
| 24 |
+
LLM_MODEL_PATH = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
|
| 25 |
+
LLM_MODEL = "mistral-7b-instruct-v0.1.Q2_K.gguf"
|
| 26 |
COLLECTION_NAME = "wellness_docs"
|
| 27 |
EMBEDDING_MODEL = "intfloat/e5-large-v2"
|
| 28 |
QDRANT_URL = os.getenv('QDRANT_URL')
|
|
|
|
| 35 |
|
| 36 |
# %%
|
| 37 |
llm = AutoModelForCausalLM.from_pretrained(
|
| 38 |
+
model_path_or_repo_id=LLM_MODEL_PATH,
|
| 39 |
+
model_file=LLM_MODEL,
|
| 40 |
model_type="mistral",
|
| 41 |
gpu_layers=0 # or tweak this if you want GPU acceleration
|
| 42 |
)
|