Spaces:
Build error
Build error
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
import datetime
|
| 4 |
+
import re
|
| 5 |
+
import requests
|
| 6 |
+
import os
|
| 7 |
+
import shutil
|
| 8 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
|
| 9 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 10 |
+
from reportlab.lib.pagesizes import letter, A4, legal, landscape
|
| 11 |
+
from reportlab.lib.units import inch
|
| 12 |
+
from reportlab.lib import colors
|
| 13 |
+
from reportlab.pdfbase import pdfmetrics
|
| 14 |
+
from reportlab.pdfbase.ttfonts import TTFont
|
| 15 |
+
|
| 16 |
+
# --- Configuration & Setup ---
|
| 17 |
+
# These settings define the available layouts and directories for fonts and outputs.
|
| 18 |
+
|
| 19 |
+
LAYOUTS = {
|
| 20 |
+
"A4 Portrait": {"size": A4},
|
| 21 |
+
"A4 Landscape": {"size": landscape(A4)},
|
| 22 |
+
"Letter Portrait": {"size": letter},
|
| 23 |
+
"Letter Landscape": {"size": landscape(letter)},
|
| 24 |
+
"Legal Portrait": {"size": legal},
|
| 25 |
+
"Legal Landscape": {"size": landscape(legal)},
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
OUTPUT_DIR = Path("generated_pdfs")
|
| 29 |
+
OUTPUT_DIR.mkdir(exist_ok=True)
|
| 30 |
+
FONT_DIR = Path("fonts")
|
| 31 |
+
FONT_DIR.mkdir(exist_ok=True)
|
| 32 |
+
|
| 33 |
+
# --- Font Handling ---
|
| 34 |
+
|
| 35 |
+
def download_default_font():
|
| 36 |
+
"""Downloads DejaVuSans as a default font if no fonts are available in the 'fonts' directory."""
|
| 37 |
+
dejavu_path = FONT_DIR / "DejaVuSans.ttf"
|
| 38 |
+
if not dejavu_path.exists():
|
| 39 |
+
print("No fonts found. Downloading default font (DejaVuSans)...")
|
| 40 |
+
try:
|
| 41 |
+
url = "https://github.com/dejavu-fonts/dejavu-fonts/blob/main/ttf/DejaVuSans.ttf?raw=true"
|
| 42 |
+
r = requests.get(url, allow_redirects=True)
|
| 43 |
+
r.raise_for_status()
|
| 44 |
+
with open(dejavu_path, "wb") as f:
|
| 45 |
+
f.write(r.content)
|
| 46 |
+
print("Default font downloaded successfully.")
|
| 47 |
+
return True
|
| 48 |
+
except Exception as e:
|
| 49 |
+
print(f"Failed to download default font: {e}")
|
| 50 |
+
return False
|
| 51 |
+
return True
|
| 52 |
+
|
| 53 |
+
def discover_and_register_fonts():
|
| 54 |
+
"""Finds .ttf files in the font directory, registers them with ReportLab, and returns their names."""
|
| 55 |
+
font_files = list(FONT_DIR.glob("*.ttf"))
|
| 56 |
+
if not font_files:
|
| 57 |
+
if not download_default_font():
|
| 58 |
+
return [] # Return empty if download fails
|
| 59 |
+
font_files = list(FONT_DIR.glob("*.ttf"))
|
| 60 |
+
|
| 61 |
+
font_names = []
|
| 62 |
+
for font_path in font_files:
|
| 63 |
+
try:
|
| 64 |
+
font_name = font_path.stem
|
| 65 |
+
pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
|
| 66 |
+
font_names.append(font_name)
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"Could not register font {font_path.name}: {e}")
|
| 69 |
+
return sorted(font_names)
|
| 70 |
+
|
| 71 |
+
# --- ReportLab PDF Generation (Core Logic) ---
|
| 72 |
+
|
| 73 |
+
def markdown_to_story(markdown_text: str, font_name: str):
|
| 74 |
+
"""Converts a markdown string into a list of ReportLab Flowables ('story') with enhanced styling."""
|
| 75 |
+
styles = getSampleStyleSheet()
|
| 76 |
+
style_normal = ParagraphStyle('BodyText', parent=styles['BodyText'], fontName=font_name, spaceAfter=6)
|
| 77 |
+
style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name)
|
| 78 |
+
style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name)
|
| 79 |
+
style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name)
|
| 80 |
+
style_code = ParagraphStyle('Code', parent=styles['Code'], fontName=font_name, backColor=colors.whitesmoke, borderColor=colors.lightgrey, borderWidth=1, padding=(5, 5))
|
| 81 |
+
|
| 82 |
+
story = []
|
| 83 |
+
lines = markdown_text.split('\n')
|
| 84 |
+
|
| 85 |
+
in_code_block, in_table = False, False
|
| 86 |
+
code_block_text, table_data = "", []
|
| 87 |
+
|
| 88 |
+
for line in lines:
|
| 89 |
+
if line.strip().startswith('|') and line.strip().endswith('|'): # Table handler
|
| 90 |
+
if not in_table:
|
| 91 |
+
in_table = True
|
| 92 |
+
table_data = []
|
| 93 |
+
if all(c in '-|: ' for c in line.strip()): continue
|
| 94 |
+
cells = [cell.strip() for cell in line.strip().strip('|').split('|')]
|
| 95 |
+
table_data.append([Paragraph(cell, style_normal) for cell in cells])
|
| 96 |
+
continue
|
| 97 |
+
elif in_table:
|
| 98 |
+
in_table = False
|
| 99 |
+
if table_data:
|
| 100 |
+
table = Table(table_data, hAlign='LEFT', colWidths=[1.5*inch]*len(table_data[0]))
|
| 101 |
+
table.setStyle(TableStyle([('BACKGROUND', (0,0), (-1,0), colors.lightgrey), ('GRID', (0,0), (-1,-1), 1, colors.black)]))
|
| 102 |
+
story.append(table)
|
| 103 |
+
story.append(Spacer(1, 0.2 * inch))
|
| 104 |
+
table_data = []
|
| 105 |
+
|
| 106 |
+
if line.strip().startswith("```"): # Code block handler
|
| 107 |
+
in_code_block = not in_code_block
|
| 108 |
+
if not in_code_block:
|
| 109 |
+
story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code))
|
| 110 |
+
code_block_text = ""
|
| 111 |
+
continue
|
| 112 |
+
if in_code_block:
|
| 113 |
+
code_block_text += line.replace('&', '&').replace('<', '<').replace('>', '>') + '\n'
|
| 114 |
+
continue
|
| 115 |
+
|
| 116 |
+
# Markdown elements to ReportLab Flowables
|
| 117 |
+
if line.startswith("# "): story.append(Paragraph(line[2:], style_h1))
|
| 118 |
+
elif line.startswith("## "): story.append(Paragraph(line[3:], style_h2))
|
| 119 |
+
elif line.startswith("### "): story.append(Paragraph(line[4:], style_h3))
|
| 120 |
+
elif line.strip().startswith(("* ", "- ")): story.append(Paragraph(line.strip()[2:], style_normal, bulletText='β’'))
|
| 121 |
+
elif re.match(r'^\d+\.\s', line.strip()): story.append(Paragraph(line.strip(), style_normal))
|
| 122 |
+
elif line.strip() == "": story.append(Spacer(1, 0.1 * inch))
|
| 123 |
+
else: story.append(Paragraph(re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)), style_normal))
|
| 124 |
+
|
| 125 |
+
return story
|
| 126 |
+
|
| 127 |
+
# --- Gradio API Function ---
|
| 128 |
+
|
| 129 |
+
def generate_pdfs_api(files, layouts, fonts, progress=gr.Progress(track_tqdm=True)):
|
| 130 |
+
"""Main function to drive PDF generation from the Gradio UI."""
|
| 131 |
+
if not files: raise gr.Error("Please upload at least one Markdown file.")
|
| 132 |
+
if not layouts: raise gr.Error("Please select at least one page layout.")
|
| 133 |
+
if not fonts: raise gr.Error("Please select at least one font.")
|
| 134 |
+
|
| 135 |
+
# Clean output directory before generation
|
| 136 |
+
if OUTPUT_DIR.exists():
|
| 137 |
+
for f in OUTPUT_DIR.glob('*'):
|
| 138 |
+
if f.is_file(): os.remove(f)
|
| 139 |
+
|
| 140 |
+
log_updates = "Starting PDF generation...\n"
|
| 141 |
+
total_pdfs = len(files) * len(layouts) * len(fonts)
|
| 142 |
+
|
| 143 |
+
for md_file_obj in progress.tqdm(files, desc="Processing Files"):
|
| 144 |
+
original_md_path = Path(md_file_obj.name)
|
| 145 |
+
log_updates += f"\nProcessing: **{original_md_path.name}**\n"
|
| 146 |
+
md_content = ""
|
| 147 |
+
with open(original_md_path, 'r', encoding='utf-8') as f:
|
| 148 |
+
md_content = f.read()
|
| 149 |
+
|
| 150 |
+
for layout_name in layouts:
|
| 151 |
+
for font_name in fonts:
|
| 152 |
+
status_text = f" - Generating: {layout_name}, {font_name}"
|
| 153 |
+
log_updates += status_text + "\n"
|
| 154 |
+
|
| 155 |
+
try:
|
| 156 |
+
date_str = datetime.datetime.now().strftime("%Y-%m-%d")
|
| 157 |
+
output_filename = f"{original_md_path.stem}_{layout_name.replace(' ', '-')}_{font_name}_{date_str}.pdf"
|
| 158 |
+
output_path = OUTPUT_DIR / output_filename
|
| 159 |
+
|
| 160 |
+
doc = SimpleDocTemplate(str(output_path), pagesize=LAYOUTS[layout_name]["size"], rightMargin=inch, leftMargin=inch, topMargin=inch, bottomMargin=inch)
|
| 161 |
+
story = markdown_to_story(md_content, font_name)
|
| 162 |
+
doc.build(story)
|
| 163 |
+
except Exception as e:
|
| 164 |
+
log_updates += f" - **ERROR**: Failed to process with font {font_name}: {e}\n"
|
| 165 |
+
|
| 166 |
+
log_updates += "\nβ
PDF generation complete!"
|
| 167 |
+
|
| 168 |
+
generated_files = [str(f) for f in OUTPUT_DIR.glob("*.pdf")]
|
| 169 |
+
return generated_files, log_updates
|
| 170 |
+
|
| 171 |
+
# --- Gradio UI Definition ---
|
| 172 |
+
|
| 173 |
+
AVAILABLE_FONTS = discover_and_register_fonts()
|
| 174 |
+
SAMPLE_MARKDOWN = "# Sample Document\n\nThis is a sample markdown file. **ReportLab** is now creating the PDF with dynamic fonts and layouts.\n\n### Features\n- Item 1\n- Item 2\n\n| Header 1 | Header 2 |\n|----------|----------|\n| Cell 1 | Cell 2 |\n\n```python\ndef hello():\n print(\"Hello, PDF!\")\n```\n"
|
| 175 |
+
|
| 176 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo:
|
| 177 |
+
gr.Markdown("# π Advanced Markdown to PDF Generator")
|
| 178 |
+
gr.Markdown("Upload Markdown files, select layouts and fonts, and generate multiple PDF variations.")
|
| 179 |
+
|
| 180 |
+
with gr.Row():
|
| 181 |
+
with gr.Column(scale=1):
|
| 182 |
+
gr.Markdown("### βοΈ Generation Settings")
|
| 183 |
+
gr.Textbox(value=SAMPLE_MARKDOWN, label="Sample Markdown (for reference)", lines=10)
|
| 184 |
+
|
| 185 |
+
uploaded_files = gr.File(label="Upload Markdown Files (.md)", file_count="multiple", file_types=[".md"])
|
| 186 |
+
selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Page Layouts", value=list(LAYOUTS.keys())[0])
|
| 187 |
+
|
| 188 |
+
if not AVAILABLE_FONTS:
|
| 189 |
+
gr.Warning("No fonts found in 'fonts' directory. Using default.")
|
| 190 |
+
AVAILABLE_FONTS = ["Helvetica"] # ReportLab's default
|
| 191 |
+
|
| 192 |
+
selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Fonts to Use", value=AVAILABLE_FONTS[0] if AVAILABLE_FONTS else None)
|
| 193 |
+
|
| 194 |
+
generate_btn = gr.Button("π Generate PDFs", variant="primary")
|
| 195 |
+
|
| 196 |
+
with gr.Column(scale=2):
|
| 197 |
+
gr.Markdown("### π Results")
|
| 198 |
+
log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...")
|
| 199 |
+
file_output = gr.Files(label="Download Generated PDFs")
|
| 200 |
+
|
| 201 |
+
generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts], outputs=[file_output, log_output])
|
| 202 |
+
|
| 203 |
+
if __name__ == "__main__":
|
| 204 |
+
demo.launch()
|