Spaces:
Build error
Build error
| import gradio as gr | |
| from pathlib import Path | |
| import datetime | |
| import re | |
| import os | |
| import shutil | |
| import fitz # PyMuPDF | |
| from PIL import Image | |
| from collections import defaultdict | |
| import io | |
| from pypdf import PdfWriter | |
| from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage | |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
| from reportlab.lib.pagesizes import letter, A4, legal, landscape | |
| from reportlab.lib.units import inch | |
| from reportlab.lib import colors | |
| from reportlab.pdfbase import pdfmetrics | |
| from reportlab.pdfbase.ttfonts import TTFont | |
| # --- Configuration & Setup --- | |
| CWD = Path.cwd() | |
| LAYOUTS = { | |
| "A4 Portrait": {"size": A4}, | |
| "A4 Landscape": {"size": landscape(A4)}, | |
| "Letter Portrait": {"size": letter}, | |
| "Letter Landscape": {"size": landscape(letter)}, | |
| "Legal Portrait": {"size": legal}, | |
| "Legal Landscape": {"size": landscape(legal)}, | |
| } | |
| OUTPUT_DIR = CWD / "generated_pdfs" | |
| PREVIEW_DIR = CWD / "previews" | |
| FONT_DIR = CWD | |
| # Create necessary directories | |
| OUTPUT_DIR.mkdir(exist_ok=True) | |
| PREVIEW_DIR.mkdir(exist_ok=True) | |
| # --- Font & Emoji Handling --- | |
| def register_local_fonts(): | |
| """Finds and registers all .ttf files from the application's base directory.""" | |
| print("--- Font Registration Process Starting ---") | |
| text_font_names = [] | |
| emoji_font_name = None | |
| print(f"Scanning for fonts in: {FONT_DIR.absolute()}") | |
| font_files = list(FONT_DIR.glob("*.ttf")) | |
| print(f"Found {len(font_files)} .ttf files: {[f.name for f in font_files]}") | |
| for font_path in font_files: | |
| try: | |
| font_name = font_path.stem | |
| pdfmetrics.registerFont(TTFont(font_name, str(font_path))) | |
| pdfmetrics.registerFont(TTFont(f"{font_name}-Bold", str(font_path))) | |
| pdfmetrics.registerFont(TTFont(f"{font_name}-Italic", str(font_path))) | |
| pdfmetrics.registerFont(TTFont(f"{font_name}-BoldItalic", str(font_path))) | |
| pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=f"{font_name}-Bold", italic=f"{font_name}-Italic", boldItalic=f"{font_name}-BoldItalic") | |
| if "notocoloremoji-regular" in font_name.lower(): | |
| emoji_font_name = font_name | |
| elif "notoemoji" not in font_name.lower(): # Exclude other symbol fonts from text selection | |
| text_font_names.append(font_name) | |
| except Exception as e: | |
| print(f"Could not register font {font_path.name}: {e}") | |
| if not text_font_names: | |
| print("WARNING: No text fonts found. Adding 'Helvetica' as a default.") | |
| text_font_names.append('Helvetica') | |
| print(f"Successfully registered user-selectable fonts: {text_font_names}") | |
| print(f"Emoji font set to: {emoji_font_name}") | |
| print("--- Font Registration Process Finished ---") | |
| return sorted(text_font_names), emoji_font_name | |
| def apply_emoji_font(text: str, emoji_font_name: str) -> str: | |
| """ | |
| Intelligently wraps emoji characters in a <font> tag, preserving existing HTML-like tags. | |
| This prevents invalid nested tags like <b><font>...</font></b> which ReportLab handles poorly. | |
| """ | |
| if not emoji_font_name: | |
| return text | |
| # Regex to find emojis | |
| emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}" | |
| f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}" | |
| f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}" | |
| f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}" | |
| f"{re.escape(''.join(map(chr, range(0x2700, 0x27bf))))}]+)") | |
| # Regex to split the string by existing tags (<b>, <i>) | |
| tag_pattern = re.compile(r"(<[^>]+>)") | |
| parts = tag_pattern.split(text) | |
| result = [] | |
| for part in parts: | |
| if tag_pattern.match(part): | |
| # It's a tag, append it as is | |
| result.append(part) | |
| else: | |
| # It's text, apply emoji font to any emojis within this segment | |
| result.append(emoji_pattern.sub(fr'<font name="{emoji_font_name}">\1</font>', part)) | |
| return "".join(result) | |
| # --- PDF Generation & Handling --- | |
| def markdown_to_story(markdown_text: str, font_name: str, emoji_font: str): | |
| """ | |
| Converts markdown to a ReportLab story, with enhanced styling and page breaks. | |
| This version correctly separates structural parsing from content formatting. | |
| """ | |
| styles = getSampleStyleSheet() | |
| # Define styles for various markdown elements | |
| style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, leading=14, fontSize=10) | |
| style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name, spaceBefore=12, fontSize=24, leading=28, textColor=colors.darkblue) | |
| style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name, fontSize=18, leading=22, spaceBefore=10) | |
| style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name, fontSize=14, leading=18, spaceBefore=8) | |
| style_code = ParagraphStyle('Code', fontName='Courier', backColor=colors.whitesmoke, textColor=colors.darkred, borderWidth=1, borderColor=colors.lightgrey, padding=8, leading=12, fontSize=9) | |
| style_table_header = ParagraphStyle('TableHeader', parent=style_normal, fontName=font_name + "-Bold" if font_name != 'Helvetica' else 'Helvetica-Bold') | |
| story = [] | |
| lines = markdown_text.split('\n') | |
| in_code_block, in_table = False, False | |
| code_block_text, table_data = "", [] | |
| first_heading = True | |
| for line in lines: | |
| stripped_line = line.strip() | |
| if stripped_line.startswith("```"): | |
| if in_code_block: | |
| story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code)); story.append(Spacer(1, 0.1 * inch)) | |
| in_code_block = False; code_block_text = "" | |
| else: in_code_block = True | |
| continue | |
| if in_code_block: | |
| code_block_text += line.replace('&', '&').replace('<', '<').replace('>', '>') + '\n' | |
| continue | |
| if stripped_line.startswith('|'): | |
| if not in_table: in_table = True | |
| if all(c in '-|: ' for c in stripped_line): continue | |
| cells = [cell.strip() for cell in stripped_line.strip('|').split('|')] | |
| table_data.append(cells) | |
| continue | |
| if in_table: | |
| in_table = False | |
| if table_data: | |
| header_content = [apply_emoji_font(re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', cell), emoji_font) for cell in table_data[0]] | |
| header = [Paragraph(cell, style_table_header) for cell in header_content] | |
| formatted_rows = [] | |
| for row in table_data[1:]: | |
| formatted_cells = [apply_emoji_font(re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', cell), emoji_font) for cell in row] | |
| formatted_rows.append([Paragraph(cell, style_normal) for cell in formatted_cells]) | |
| table = Table([header] + formatted_rows, hAlign='LEFT', repeatRows=1) | |
| table.setStyle(TableStyle([('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey), ('GRID', (0, 0), (-1, -1), 1, colors.darkgrey), ('VALIGN', (0,0), (-1,-1), 'MIDDLE')])) | |
| story.append(table); story.append(Spacer(1, 0.2 * inch)) | |
| table_data = [] | |
| if not stripped_line: | |
| story.append(Spacer(1, 0.1 * inch)) | |
| continue | |
| # Default content is the whole stripped line | |
| content = stripped_line | |
| style = style_normal | |
| extra_args = {} | |
| # Detect structural elements and extract the raw content | |
| if stripped_line.startswith("# "): | |
| if not first_heading: story.append(PageBreak()) | |
| content = stripped_line.lstrip('# '); style = style_h1; first_heading = False | |
| elif stripped_line.startswith("## "): | |
| content = stripped_line.lstrip('## '); style = style_h2 | |
| elif stripped_line.startswith("### "): | |
| content = stripped_line.lstrip('### '); style = style_h3 | |
| elif stripped_line.startswith(("- ", "* ")): | |
| content = stripped_line[2:]; extra_args['bulletText'] = 'β’' | |
| # Now, format the extracted content | |
| # Apply markdown formatting for bold/italic | |
| formatted_content = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', content)) | |
| # Then apply emoji font to the already formatted line | |
| final_content = apply_emoji_font(formatted_content, emoji_font) | |
| story.append(Paragraph(final_content, style, **extra_args)) | |
| return story | |
| def create_pdf_preview(pdf_path: Path): | |
| preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png" | |
| try: | |
| doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap() | |
| pix.save(str(preview_path)); doc.close() | |
| return str(preview_path) | |
| except Exception as e: | |
| print(f"Could not create preview for {pdf_path.name}: {e}"); return None | |
| # --- Main API Function --- | |
| def generate_pdfs_api(files, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)): | |
| if not files: raise gr.Error("Please upload at least one Markdown or Image file.") | |
| if not layouts: raise gr.Error("Please select at least one page layout.") | |
| if not fonts: raise gr.Error("Please select at least one font.") | |
| shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True) | |
| OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir() | |
| grouped_files = defaultdict(lambda: {'md': [], 'img': []}) | |
| for f in files: | |
| file_path = Path(f.name) | |
| stem = file_path.stem.split('_')[0] if '_' in file_path.stem else file_path.stem | |
| if file_path.suffix.lower() == '.md': grouped_files[stem]['md'].append(file_path) | |
| elif file_path.suffix.lower() in ['.png', '.jpg', '.jpeg']: grouped_files[stem]['img'].append(file_path) | |
| log_updates, generated_pdf_paths = "", [] | |
| for stem, assets in progress.tqdm(grouped_files.items(), desc="Processing File Groups"): | |
| for layout_name in layouts: | |
| for font_name in fonts: | |
| merger = PdfWriter() | |
| if assets['md']: | |
| md_content = "\n".join([p.read_text(encoding='utf-8') for p in assets['md']]) | |
| md_buffer = io.BytesIO() | |
| story = markdown_to_story(md_content, font_name, EMOJI_FONT_NAME) | |
| base_w, base_h = LAYOUTS[layout_name]["size"] | |
| pagesize = (base_w * page_w_mult, base_h * page_h_mult) | |
| if num_columns > 1: | |
| doc = BaseDocTemplate(md_buffer, pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch) | |
| frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch | |
| frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)] | |
| doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)]) | |
| else: | |
| doc = SimpleDocTemplate(md_buffer, pagesize=pagesize) | |
| doc.build(story) | |
| merger.append(fileobj=md_buffer) | |
| for img_path in assets['img']: | |
| with Image.open(img_path) as img: img_width, img_height = img.size | |
| img_buffer = io.BytesIO() | |
| doc = SimpleDocTemplate(img_buffer, pagesize=(img_width, img_height), leftMargin=0, rightMargin=0, topMargin=0, bottomMargin=0) | |
| doc.build([ReportLabImage(img_path, width=img_width, height=img_height)]) | |
| merger.append(fileobj=img_buffer) | |
| if len(merger.pages) > 0: | |
| time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper() | |
| filename = f"{stem}_{time_str}_{layout_name.replace(' ','-')}_{page_w_mult}x{page_h_mult}_{font_name}_Cols{num_columns}.pdf" | |
| output_path = OUTPUT_DIR / filename | |
| with open(output_path, "wb") as f: merger.write(f) | |
| generated_pdf_paths.append(output_path) | |
| log_updates += f"Generated: {filename}\n" | |
| gallery_previews = [create_pdf_preview(p) for p in generated_pdf_paths] | |
| final_gallery = [g for g in gallery_previews if g is not None] | |
| return final_gallery, log_updates, [str(p) for p in generated_pdf_paths] | |
| # --- Gradio UI Definition --- | |
| AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts() | |
| SAMPLE_MARKDOWN = """# Deities Guide: Mythology and Moral Lessons | |
| 1. π **Introduction** | |
| - **Purpose**: Explore deities, spirits, saints, and beings with their epic stories and morals! | |
| - **Usage**: A guide for learning and storytelling across traditions. οΈ | |
| - **Themes**: Justice βοΈ, faith π, hubris ποΈ, redemption β¨, cosmic order π. | |
| 2. π οΈ **Core Concepts of Divinity** | |
| - **Powers**: Creation π, omniscience ποΈβπ¨οΈ, shapeshifting π¦ across entities. | |
| - **Life Cycle**: Mortality β³, immortality βΎοΈ, transitions like saints and avatars π. | |
| - **Communication**: Omens ποΈ, visions ποΈ, miracles β¨ from gods and spirits. | |
| # βοΈ Arthurian Legends | |
| - **Merlin, Morgan le Fay, Arthur**: Mentor π§, rival π§ββοΈ, son π. | |
| - **Relation**: Family tests loyalty π€. | |
| - **Lesson**: Honor ποΈ vs. betrayal π‘οΈ. | |
| # ποΈ Greek Mythology | |
| - **Zeus, Hera, Athena**: Father β‘, mother π, daughter π¦. | |
| - **Relation**: Family rules with tension π©οΈ. | |
| - **Lesson**: Hubris ΰ€ ΰ€Ήΰ€ΰ€ΰ€Ύΰ€° meets wisdom π§ . | |
| # ποΈ Hindu Trimurti | |
| - **Brahma, Vishnu, Shiva**: Creator Brahma, preserver Vishnu, destroyer Shiva. | |
| - **Relation**: Divine trio cycles existence π. | |
| - **Lesson**: Balance βοΈ sustains life π. | |
| """ | |
| with open(CWD / "sample.md", "w", encoding="utf-8") as f: f.write(SAMPLE_MARKDOWN) | |
| with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo: | |
| gr.Markdown("# π Advanced PDF Layout Engine") | |
| gr.Markdown("Upload Markdown/Image files. The app finds local `.ttf` fonts. Group assets with a common name (e.g., `Doc_part1.md`, `Doc_img1.png`) to combine them. `# Headers` create automatic page breaks.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### βοΈ Generation Settings") | |
| uploaded_files = gr.File(label="Upload Markdown & Image Files", file_count="multiple", file_types=[".md", ".png", ".jpg", ".jpeg"]) | |
| with gr.Row(): | |
| page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1) | |
| page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1) | |
| num_columns_slider = gr.Slider(label="Number of Text Columns", minimum=1, maximum=4, step=1, value=1) | |
| selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Base Page Layout", value=["A4 Portrait"]) | |
| selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else []) | |
| generate_btn = gr.Button("π Generate PDFs", variant="primary") | |
| with gr.Column(scale=2): | |
| gr.Markdown("### πΌοΈ PDF Preview Gallery") | |
| gallery_output = gr.Gallery(label="Generated PDF Previews", show_label=False, elem_id="gallery", columns=3, height="auto", object_fit="contain") | |
| log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...") | |
| downloadable_files_output = gr.Files(label="Download Generated PDFs") | |
| generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider], outputs=[gallery_output, log_output, downloadable_files_output]) | |
| if __name__ == "__main__": | |
| demo.launch() | |