import re
def clean_text(text: str) -> str:
    text = text.replace('\r','\n')
    text = re.sub(r"\n{2,}", '\n\n', text)
    return text.strip()

def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50):
    words = text.split()
    chunks = []
    i = 0
    while i < len(words):
        chunk = words[i:i+chunk_size]
        chunks.append(' '.join(chunk))
        i += chunk_size - overlap
    return chunks