# %%
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance

from langchain_core.documents import Document
from langchain_qdrant import QdrantVectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader

import os
from pathlib import Path
from uuid import uuid4

# %%
QDRANT_URL = os.getenv('QDRANT_URL')
QDRANT_API_KEY = os.getenv('QDRANT_API_KEY')

# %%
FAQ_COLLECTION = "faqs"
BLOGS_COLLECTION = "blogs"
TECHNOLOGY_COLLECTION = "technology"
REVOLUTION_COLLECTION = "revolution"
SUPPORT_COLLECTION = "support"
PRODUCT_COLLECTION = "product"

# %%
client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
embedding_model = "intfloat/e5-base-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model)

# %%
data_directory = Path(__file__).parent / "data"
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)

# %%
#Delete Collection
def delete_collection(collection_name):
    if client.collection_exists(collection_name):
        client.delete_collection(collection_name)
        print(f"Collection '{collection_name}' deleted.")

# %%
#Create Collection
def create_collection(collection_name):
    if not client.collection_exists(collection_name):
        client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
        )
        print(f"Created Collection: {collection_name}")

# %%
def load_documents_from_folder(folder_path):
    documents = []

    for file_path in folder_path.rglob("*.txt"):
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()

            if not lines:
                print(f"{file_path} is empty")
                continue

            source_url = lines[0].replace("Source URL:","").strip()
            content = "".join(lines[1:]).strip()
            topic = file_path.parent.name

            if content:
                doc = Document(
                    page_content=content,
                    metadata={'source': source_url,
                            'topic': topic}
                )
                documents.append(doc)

    for file_path in folder_path.rglob("*.pdf"):
        try:
            loader = PyPDFLoader(file_path)
            docs = loader.load()
            for doc in docs:
                doc.metadata["topic"] = file_path.parent.name
            documents.extend(docs)
        except Exception as e:
            print(f"Failed to load PDF {file_path}: {e}")

    return documents

# %%
def split_and_upload_to_qdrant(collection_name, documents):
    splits = text_splitter.split_documents(documents)
    uuids = [str(uuid4()) for _ in range(len(splits))]

    vector_store = QdrantVectorStore(
    client=client,
    collection_name=collection_name,
    embedding=embeddings
    )

    vector_store.add_documents(documents=splits, ids=uuids)
    print(f"Uploaded {len(splits)} chunks to {collection_name}")

# %%
sub_folders = [sub_folder for sub_folder in data_directory.iterdir() if sub_folder.is_dir()]

for topic in sub_folders:
    collection_name = topic.name
    print(f"Processing: {topic.name}")

    delete_collection(collection_name)
    create_collection(collection_name)

    docs = load_documents_from_folder(topic)
    print(f"Loaded {len(docs)} docs from {topic}")

    if docs:
        split_and_upload_to_qdrant(collection_name, docs)

    print('\n')

# %%
"""collection_name = 'wellness_docs'
delete_collection(collection_name)
create_collection(collection_name)

sub_folders = [sub_folder for sub_folder in data_directory.iterdir() if sub_folder.is_dir()]
for topic in sub_folders:
    print(f"Processing: {topic.name}")
    docs = load_documents_from_folder(topic)
    print(f"Loaded {len(docs)} docs from {topic}")

    if docs:
        split_and_upload_to_qdrant(collection_name, docs)

    print('\n')"""