Spaces:

SansarK
/

SansarChat

Runtime error

App Files Files Community

SansarChat / rag_with_mircosoftphi2_and_hf_embeddings.py

SansarK

Update rag_with_mircosoftphi2_and_hf_embeddings.py

2666732 verified over 1 year ago

raw

history blame

1.93 kB

	from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext
	import torch

	documents = SimpleDirectoryReader("SansarChat").load_data()

	"""New sectiond"""

	from llama_index.core.prompts.prompts import SimpleInputPrompt
	from llama_index.llms.llama_cpp import LlamaCPP

	system_prompt = "You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided."

	# This will wrap the default prompts that are internal to llama-index
	query_wrapper_prompt = SimpleInputPrompt("<\|USER\|>{query_str}<\|ASSISTANT\|>")

	# model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_0.bin"
	model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_0.gguf"

	llm = LlamaCPP(
	# optionally, you can set the path to a pre-downloaded model instead of model_url
	model_path="model.gguf",
	temperature=0.1,
	max_new_tokens=256,
	# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
	context_window=4096,
	# kwargs to pass to __call__()
	generate_kwargs={},
	# kwargs to pass to __init__()
	# set to at least 1 to use GPU
	model_kwargs={"n_gpu_layers": 0},
	verbose=True
	)

	"""HuggingFace Embeddings"""

	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	# loads BAAI/bge-small-en-v1.5
	embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

	service_context = ServiceContext.from_defaults(
	chunk_size=256,
	llm=llm,
	embed_model=embed_model
	)

	"""predict"""

	index = VectorStoreIndex.from_documents(documents, service_context=service_context)

	query_engine = index.as_query_engine()

	def predict(input, history):
	response = query_engine.query(input)
	return str(response)

	"""Gradio"""

	import gradio as gr

	gr.ChatInterface(predict).launch(share=True)