Spaces:
Build error
Build error
| import os | |
| from langchain.chains.llm import LLMChain | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.prompts import PromptTemplate | |
| from langchain.document_loaders import PDFPlumberLoader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain | |
| from langchain.chains.combine_documents.stuff import StuffDocumentsChain | |
| os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY") | |
| llm = ChatOpenAI(temperature=0, model_name="gpt-4-0125-preview") | |
| def get_summ(path): | |
| loader = PDFPlumberLoader(path) | |
| docs = loader.load() | |
| # Map | |
| map_template = """The following is a set of documents | |
| {docs} | |
| Based on this list of docs, please identify the main themes and determine the genes relevant or irrelevant to the discussed disease followed by any associated p-values if available. | |
| Helpful Answer:""" | |
| map_prompt = PromptTemplate.from_template(map_template) | |
| map_chain = LLMChain(llm=llm, prompt=map_prompt) | |
| # Reduce | |
| reduce_template = """The following is set of summaries: | |
| {doc_summaries} | |
| Take these and distill it into a final, consolidated summary of the main themes. | |
| Helpful Answer:""" | |
| reduce_prompt = PromptTemplate.from_template(reduce_template) | |
| # Run chain | |
| reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt) | |
| # Takes a list of documents, combines them into a single string, and passes this to an LLMChain | |
| combine_documents_chain = StuffDocumentsChain( | |
| llm_chain=reduce_chain, document_variable_name="doc_summaries" | |
| ) | |
| # Combines and iteravely reduces the mapped documents | |
| reduce_documents_chain = ReduceDocumentsChain( | |
| # This is final chain that is called. | |
| combine_documents_chain=combine_documents_chain, | |
| # If documents exceed context for `StuffDocumentsChain` | |
| collapse_documents_chain=combine_documents_chain, | |
| # The maximum number of tokens to group documents into. | |
| token_max=100000, | |
| ) | |
| # Combining documents by mapping a chain over them, then combining results | |
| map_reduce_chain = MapReduceDocumentsChain( | |
| # Map chain | |
| llm_chain=map_chain, | |
| # Reduce chain | |
| reduce_documents_chain=reduce_documents_chain, | |
| # The variable name in the llm_chain to put the documents in | |
| document_variable_name="docs", | |
| # Return the results of the map steps in the output | |
| return_intermediate_steps=False, | |
| ) | |
| text_splitter = CharacterTextSplitter.from_tiktoken_encoder( | |
| chunk_size=100000, chunk_overlap=0 | |
| ) | |
| split_docs = text_splitter.split_documents(docs) | |
| return map_reduce_chain.run(split_docs) |