Spaces:
Runtime error
Runtime error
| from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext | |
| import torch | |
| documents = SimpleDirectoryReader("SansarChat").load_data() | |
| """New sectiond""" | |
| from llama_index.core.prompts.prompts import SimpleInputPrompt | |
| from llama_index.llms.llama_cpp import LlamaCPP | |
| system_prompt = "You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided." | |
| # This will wrap the default prompts that are internal to llama-index | |
| query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>") | |
| # model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_0.bin" | |
| model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_0.gguf" | |
| llm = LlamaCPP( | |
| # optionally, you can set the path to a pre-downloaded model instead of model_url | |
| model_path="model.gguf", | |
| temperature=0.1, | |
| max_new_tokens=256, | |
| # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room | |
| context_window=4096, | |
| # kwargs to pass to __call__() | |
| generate_kwargs={}, | |
| # kwargs to pass to __init__() | |
| # set to at least 1 to use GPU | |
| model_kwargs={"n_gpu_layers": 0}, | |
| verbose=True | |
| ) | |
| """HuggingFace Embeddings""" | |
| from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
| # loads BAAI/bge-small-en-v1.5 | |
| embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") | |
| service_context = ServiceContext.from_defaults( | |
| chunk_size=256, | |
| llm=llm, | |
| embed_model=embed_model | |
| ) | |
| """predict""" | |
| index = VectorStoreIndex.from_documents(documents, service_context=service_context) | |
| query_engine = index.as_query_engine() | |
| def predict(input, history): | |
| response = query_engine.query(input) | |
| return str(response) | |
| """Gradio""" | |
| import gradio as gr | |
| gr.ChatInterface(predict).launch(share=True) |