Spaces:
Build error
Build error
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
import datetime
|
| 4 |
+
import re
|
| 5 |
+
import requests
|
| 6 |
+
import os
|
| 7 |
+
import shutil
|
| 8 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate
|
| 9 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 10 |
+
from reportlab.lib.pagesizes import letter, A4, legal, landscape
|
| 11 |
+
from reportlab.lib.units import inch
|
| 12 |
+
from reportlab.lib import colors
|
| 13 |
+
from reportlab.pdfbase import pdfmetrics
|
| 14 |
+
from reportlab.pdfbase.ttfonts import TTFont
|
| 15 |
+
|
| 16 |
+
# --- Configuration & Setup ---
|
| 17 |
+
LAYOUTS = {
|
| 18 |
+
"A4 Portrait": {"size": A4},
|
| 19 |
+
"A4 Landscape": {"size": landscape(A4)},
|
| 20 |
+
"Letter Portrait": {"size": letter},
|
| 21 |
+
"Letter Landscape": {"size": landscape(letter)},
|
| 22 |
+
"Legal Portrait": {"size": legal},
|
| 23 |
+
"Legal Landscape": {"size": landscape(legal)},
|
| 24 |
+
}
|
| 25 |
+
OUTPUT_DIR = Path("generated_pdfs")
|
| 26 |
+
OUTPUT_DIR.mkdir(exist_ok=True)
|
| 27 |
+
FONT_DIR = Path("fonts")
|
| 28 |
+
FONT_DIR.mkdir(exist_ok=True)
|
| 29 |
+
EMOJI_FONT_NAME = "NotoColorEmoji"
|
| 30 |
+
|
| 31 |
+
# --- Font & Emoji Handling ---
|
| 32 |
+
|
| 33 |
+
def download_fonts():
|
| 34 |
+
"""Downloads DejaVuSans (for text) and NotoColorEmoji (for emojis) if they don't exist."""
|
| 35 |
+
fonts_to_check = {
|
| 36 |
+
"DejaVuSans.ttf": "https://github.com/dejavu-fonts/dejavu-fonts/blob/main/ttf/DejaVuSans.ttf?raw=true",
|
| 37 |
+
"NotoColorEmoji.ttf": "https://github.com/googlefonts/noto-emoji/blob/main/fonts/NotoColorEmoji.ttf?raw=true"
|
| 38 |
+
}
|
| 39 |
+
for font_filename, url in fonts_to_check.items():
|
| 40 |
+
font_path = FONT_DIR / font_filename
|
| 41 |
+
if not font_path.exists():
|
| 42 |
+
print(f"Downloading {font_filename}...")
|
| 43 |
+
try:
|
| 44 |
+
r = requests.get(url, allow_redirects=True)
|
| 45 |
+
r.raise_for_status()
|
| 46 |
+
with open(font_path, "wb") as f:
|
| 47 |
+
f.write(r.content)
|
| 48 |
+
print(f"{font_filename} downloaded successfully.")
|
| 49 |
+
except Exception as e:
|
| 50 |
+
print(f"Failed to download {font_filename}: {e}")
|
| 51 |
+
|
| 52 |
+
def discover_and_register_fonts():
|
| 53 |
+
"""Finds .ttf files, registers them, and ensures the emoji font is available."""
|
| 54 |
+
download_fonts()
|
| 55 |
+
font_files = list(FONT_DIR.glob("*.ttf"))
|
| 56 |
+
font_names = []
|
| 57 |
+
for font_path in font_files:
|
| 58 |
+
try:
|
| 59 |
+
font_name = font_path.stem
|
| 60 |
+
pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
|
| 61 |
+
# Don't add the emoji font to the user-selectable list
|
| 62 |
+
if "emoji" not in font_name.lower():
|
| 63 |
+
font_names.append(font_name)
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f"Could not register font {font_path.name}: {e}")
|
| 66 |
+
return sorted(font_names)
|
| 67 |
+
|
| 68 |
+
def apply_emoji_font(text: str) -> str:
|
| 69 |
+
"""Wraps emoji characters in a <font> tag to use the dedicated emoji font."""
|
| 70 |
+
# This regex covers a wide range of emoji unicode blocks.
|
| 71 |
+
emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
|
| 72 |
+
f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}"
|
| 73 |
+
f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}"
|
| 74 |
+
f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}"
|
| 75 |
+
f"{re.escape(''.join(map(chr, range(0x2700, 0x27bf))))}]+)")
|
| 76 |
+
return emoji_pattern.sub(fr'<font name="{EMOJI_FONT_NAME}">\1</font>', text)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# --- ReportLab PDF Generation (Core Logic) ---
|
| 80 |
+
|
| 81 |
+
def markdown_to_story(markdown_text: str, font_name: str):
|
| 82 |
+
"""Converts markdown to a ReportLab story, handling emojis and page breaks."""
|
| 83 |
+
styles = getSampleStyleSheet()
|
| 84 |
+
style_normal = ParagraphStyle('BodyText', parent=styles['BodyText'], fontName=font_name, spaceAfter=6, leading=14)
|
| 85 |
+
style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name, spaceBefore=12, fontSize=20, leading=24)
|
| 86 |
+
style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name, spaceBefore=10, fontSize=16, leading=20)
|
| 87 |
+
style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name, spaceBefore=8, fontSize=14, leading=18)
|
| 88 |
+
style_code = ParagraphStyle('Code', parent=styles['Code'], fontName='Courier', backColor=colors.whitesmoke, borderColor=colors.lightgrey, borderWidth=1, padding=(5, 5))
|
| 89 |
+
|
| 90 |
+
story = []
|
| 91 |
+
# Split by our custom page break marker or process as a single block
|
| 92 |
+
pages = markdown_text.split('\n\n---PAGE_BREAK---\n\n')
|
| 93 |
+
|
| 94 |
+
for i, page_content in enumerate(pages):
|
| 95 |
+
lines = page_content.split('\n')
|
| 96 |
+
in_code_block, in_table = False, False
|
| 97 |
+
code_block_text, table_data = "", []
|
| 98 |
+
|
| 99 |
+
for line in lines:
|
| 100 |
+
line_with_emoji = apply_emoji_font(line)
|
| 101 |
+
# Table handler...
|
| 102 |
+
if line.strip().startswith('|') and line.strip().endswith('|'):
|
| 103 |
+
if not in_table: in_table = True
|
| 104 |
+
if all(c in '-|: ' for c in line.strip()): continue
|
| 105 |
+
cells = [apply_emoji_font(c.strip()) for c in line.strip().strip('|').split('|')]
|
| 106 |
+
table_data.append([Paragraph(cell, style_normal) for cell in cells])
|
| 107 |
+
continue
|
| 108 |
+
elif in_table:
|
| 109 |
+
in_table = False
|
| 110 |
+
if table_data:
|
| 111 |
+
table = Table(table_data, hAlign='LEFT', repeatRows=1)
|
| 112 |
+
table.setStyle(TableStyle([
|
| 113 |
+
('BACKGROUND', (0,0), (-1,0), colors.lightgrey),
|
| 114 |
+
('TEXTCOLOR', (0,0), (-1,0), colors.black),
|
| 115 |
+
('ALIGN', (0,0), (-1,-1), 'LEFT'),
|
| 116 |
+
('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
|
| 117 |
+
('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
|
| 118 |
+
('BOTTOMPADDING', (0,0), (-1,0), 12),
|
| 119 |
+
('GRID', (0,0), (-1,-1), 1, colors.black)
|
| 120 |
+
]))
|
| 121 |
+
story.append(table)
|
| 122 |
+
story.append(Spacer(1, 0.2 * inch))
|
| 123 |
+
|
| 124 |
+
# Code block handler...
|
| 125 |
+
if line.strip().startswith("```"):
|
| 126 |
+
in_code_block = not in_code_block
|
| 127 |
+
if not in_code_block:
|
| 128 |
+
story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code))
|
| 129 |
+
code_block_text = ""
|
| 130 |
+
continue
|
| 131 |
+
if in_code_block:
|
| 132 |
+
code_block_text += line.replace('&', '&').replace('<', '<').replace('>', '>') + '\n'
|
| 133 |
+
continue
|
| 134 |
+
|
| 135 |
+
# Markdown elements to Flowables
|
| 136 |
+
if line.startswith("# "): story.append(Paragraph(line_with_emoji[2:], style_h1))
|
| 137 |
+
elif line.startswith("## "): story.append(Paragraph(line_with_emoji[3:], style_h2))
|
| 138 |
+
elif line.startswith("### "): story.append(Paragraph(line_with_emoji[4:], style_h3))
|
| 139 |
+
elif line.strip().startswith(("* ", "- ")): story.append(Paragraph(line_with_emoji.strip()[2:], style_normal, bulletText='β’'))
|
| 140 |
+
elif re.match(r'^\d+\.\s', line.strip()): story.append(Paragraph(line_with_emoji.strip(), style_normal))
|
| 141 |
+
elif line.strip() == "": story.append(Spacer(1, 0.1 * inch))
|
| 142 |
+
else:
|
| 143 |
+
formatted_line = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line_with_emoji))
|
| 144 |
+
story.append(Paragraph(formatted_line, style_normal))
|
| 145 |
+
|
| 146 |
+
if i < len(pages) - 1:
|
| 147 |
+
story.append(PageBreak())
|
| 148 |
+
|
| 149 |
+
return story
|
| 150 |
+
|
| 151 |
+
# --- Gradio API Function ---
|
| 152 |
+
|
| 153 |
+
def generate_pdfs_api(files, layouts, fonts, combine_files, num_columns, progress=gr.Progress(track_tqdm=True)):
|
| 154 |
+
if not files: raise gr.Error("Please upload at least one Markdown file.")
|
| 155 |
+
if not layouts: raise gr.Error("Please select at least one page layout.")
|
| 156 |
+
if not fonts: raise gr.Error("Please select at least one font.")
|
| 157 |
+
|
| 158 |
+
if OUTPUT_DIR.exists():
|
| 159 |
+
shutil.rmtree(OUTPUT_DIR)
|
| 160 |
+
OUTPUT_DIR.mkdir(exist_ok=True)
|
| 161 |
+
|
| 162 |
+
log_updates = "Starting PDF generation...\n"
|
| 163 |
+
|
| 164 |
+
md_contents = []
|
| 165 |
+
for md_file_obj in files:
|
| 166 |
+
with open(md_file_obj.name, 'r', encoding='utf-8') as f:
|
| 167 |
+
md_contents.append(f.read())
|
| 168 |
+
|
| 169 |
+
tasks = []
|
| 170 |
+
if combine_files:
|
| 171 |
+
combined_content = '\n\n---PAGE_BREAK---\n\n'.join(md_contents)
|
| 172 |
+
for layout_name in layouts:
|
| 173 |
+
for font_name in fonts:
|
| 174 |
+
tasks.append({"content": combined_content, "layout": layout_name, "font": font_name, "filename_stem": "Combined_Document"})
|
| 175 |
+
else:
|
| 176 |
+
for i, content in enumerate(md_contents):
|
| 177 |
+
filename_stem = Path(files[i].name).stem
|
| 178 |
+
for layout_name in layouts:
|
| 179 |
+
for font_name in fonts:
|
| 180 |
+
tasks.append({"content": content, "layout": layout_name, "font": font_name, "filename_stem": filename_stem})
|
| 181 |
+
|
| 182 |
+
for task in progress.tqdm(tasks, desc="Generating PDFs"):
|
| 183 |
+
try:
|
| 184 |
+
date_str = datetime.datetime.now().strftime("%Y-%m-%d")
|
| 185 |
+
output_filename = f"{task['filename_stem']}_{task['layout'].replace(' ', '-')}_{task['font']}_Cols{num_columns}_{date_str}.pdf"
|
| 186 |
+
output_path = OUTPUT_DIR / output_filename
|
| 187 |
+
log_updates += f" - Generating: {output_filename}\n"
|
| 188 |
+
|
| 189 |
+
story = markdown_to_story(task['content'], task['font'])
|
| 190 |
+
pagesize = LAYOUTS[task['layout']]["size"]
|
| 191 |
+
|
| 192 |
+
if num_columns > 1:
|
| 193 |
+
doc = BaseDocTemplate(str(output_path), pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch)
|
| 194 |
+
frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch
|
| 195 |
+
frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height, id=f'col{i}') for i in range(num_columns)]
|
| 196 |
+
doc.addPageTemplates([PageTemplate(id='TwoCol', frames=frames)])
|
| 197 |
+
doc.build(story)
|
| 198 |
+
else:
|
| 199 |
+
doc = SimpleDocTemplate(str(output_path), pagesize=pagesize, leftMargin=inch, rightMargin=inch, topMargin=inch, bottomMargin=inch)
|
| 200 |
+
doc.build(story)
|
| 201 |
+
except Exception as e:
|
| 202 |
+
log_updates += f" - **ERROR**: {e}\n"
|
| 203 |
+
|
| 204 |
+
log_updates += "\nβ
PDF generation complete!"
|
| 205 |
+
generated_files = [str(f) for f in OUTPUT_DIR.glob("*.pdf")]
|
| 206 |
+
return generated_files, log_updates
|
| 207 |
+
|
| 208 |
+
# --- Gradio UI Definition ---
|
| 209 |
+
AVAILABLE_FONTS = discover_and_register_fonts()
|
| 210 |
+
SAMPLE_MARKDOWN = "# Sample Document π\n\nThis document shows **bold text**, _italic text_, and emojis like π and π».\n\n### A Table\n| Flavor | Rating |\n|-------------|------------|\n| Chocolate | 10/10 |\n| Vanilla | 9/10 |"
|
| 211 |
+
|
| 212 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo:
|
| 213 |
+
gr.Markdown("# π Advanced PDF Generator with Emojis & Columns")
|
| 214 |
+
gr.Markdown("Upload Markdown files, combine them, and generate multi-column PDFs with custom fonts and layouts.")
|
| 215 |
+
|
| 216 |
+
with gr.Row():
|
| 217 |
+
with gr.Column(scale=1):
|
| 218 |
+
gr.Markdown("### βοΈ Generation Settings")
|
| 219 |
+
uploaded_files = gr.File(label="Upload Markdown Files (.md)", file_count="multiple", file_types=[".md"])
|
| 220 |
+
combine_files_check = gr.Checkbox(label="Combine uploaded files into a single PDF", value=False)
|
| 221 |
+
num_columns_slider = gr.Slider(label="Number of Columns", minimum=1, maximum=4, step=1, value=1)
|
| 222 |
+
selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Page Layouts", value=list(LAYOUTS.keys())[0])
|
| 223 |
+
|
| 224 |
+
if not AVAILABLE_FONTS:
|
| 225 |
+
gr.Warning("No text fonts found in 'fonts' directory. Using defaults.")
|
| 226 |
+
AVAILABLE_FONTS = ["Helvetica"]
|
| 227 |
+
|
| 228 |
+
selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Text Fonts to Use", value=AVAILABLE_FONTS[0] if AVAILABLE_FONTS else None)
|
| 229 |
+
generate_btn = gr.Button("π Generate PDFs", variant="primary")
|
| 230 |
+
gr.Textbox(value=SAMPLE_MARKDOWN, label="Sample Markdown (for reference)", lines=10, interactive=False)
|
| 231 |
+
|
| 232 |
+
with gr.Column(scale=2):
|
| 233 |
+
gr.Markdown("### π Results")
|
| 234 |
+
log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...")
|
| 235 |
+
file_output = gr.Files(label="Download Generated PDFs")
|
| 236 |
+
|
| 237 |
+
generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts, combine_files_check, num_columns_slider], outputs=[file_output, log_output])
|
| 238 |
+
|
| 239 |
+
if __name__ == "__main__":
|
| 240 |
+
demo.launch()
|