Spaces:
Running
Running
David
commited on
Commit
·
2e375e5
1
Parent(s):
5d46fa8
add pages selection
Browse files- describepdf/cli.py +10 -0
- describepdf/config.py +9 -35
- describepdf/core.py +82 -4
- describepdf/ui.py +26 -12
- describepdf/ui_ollama.py +32 -15
describepdf/cli.py
CHANGED
|
@@ -61,6 +61,10 @@ def setup_cli_parser() -> argparse.ArgumentParser:
|
|
| 61 |
help="VLM model to use (default: configured in .env)"
|
| 62 |
)
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
parser.add_argument(
|
| 65 |
"-l", "--language",
|
| 66 |
help="Output language (default: configured in .env)"
|
|
@@ -157,6 +161,7 @@ def run_cli() -> None:
|
|
| 157 |
"output_language": args.language if args.language else env_config.get("output_language"),
|
| 158 |
"use_markitdown": args.use_markitdown if args.use_markitdown is not None else env_config.get("use_markitdown"),
|
| 159 |
"use_summary": args.use_summary if args.use_summary is not None else env_config.get("use_summary"),
|
|
|
|
| 160 |
}
|
| 161 |
|
| 162 |
# Configure provider-specific settings
|
|
@@ -219,6 +224,11 @@ def run_cli() -> None:
|
|
| 219 |
logger.info(f"Summary: {'Yes' if run_config['use_summary'] else 'No'}")
|
| 220 |
if run_config.get('use_summary') and run_config.get('summary_llm_model'):
|
| 221 |
logger.info(f"Summary model: {run_config['summary_llm_model']}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
# Create progress callback
|
| 224 |
progress_callback = create_progress_callback()
|
|
|
|
| 61 |
help="VLM model to use (default: configured in .env)"
|
| 62 |
)
|
| 63 |
|
| 64 |
+
parser.add_argument(
|
| 65 |
+
"--pages",
|
| 66 |
+
help="Pages to process (e.g. '1,3,5-10,15'). Default: all pages."
|
| 67 |
+
)
|
| 68 |
parser.add_argument(
|
| 69 |
"-l", "--language",
|
| 70 |
help="Output language (default: configured in .env)"
|
|
|
|
| 161 |
"output_language": args.language if args.language else env_config.get("output_language"),
|
| 162 |
"use_markitdown": args.use_markitdown if args.use_markitdown is not None else env_config.get("use_markitdown"),
|
| 163 |
"use_summary": args.use_summary if args.use_summary is not None else env_config.get("use_summary"),
|
| 164 |
+
"page_selection": args.pages if args.pages else env_config.get("page_selection")
|
| 165 |
}
|
| 166 |
|
| 167 |
# Configure provider-specific settings
|
|
|
|
| 224 |
logger.info(f"Summary: {'Yes' if run_config['use_summary'] else 'No'}")
|
| 225 |
if run_config.get('use_summary') and run_config.get('summary_llm_model'):
|
| 226 |
logger.info(f"Summary model: {run_config['summary_llm_model']}")
|
| 227 |
+
|
| 228 |
+
if run_config.get('page_selection'):
|
| 229 |
+
logger.info(f"Page selection: {run_config['page_selection']}")
|
| 230 |
+
else:
|
| 231 |
+
logger.info("Page selection: All pages")
|
| 232 |
|
| 233 |
# Create progress callback
|
| 234 |
progress_callback = create_progress_callback()
|
describepdf/config.py
CHANGED
|
@@ -6,7 +6,7 @@ and prompt templates from files.
|
|
| 6 |
"""
|
| 7 |
import os
|
| 8 |
import logging
|
| 9 |
-
from typing import Dict, Any, Optional
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
import pathlib
|
| 12 |
|
|
@@ -14,41 +14,11 @@ import pathlib
|
|
| 14 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [%(module)s] - %(message)s')
|
| 15 |
logger = logging.getLogger('describepdf')
|
| 16 |
|
| 17 |
-
def _resolve_prompts_directory() -> pathlib.Path:
|
| 18 |
-
"""
|
| 19 |
-
Resolve the path to the prompts directory with multiple fallback strategies.
|
| 20 |
-
Returns:
|
| 21 |
-
pathlib.Path: Path to the prompts directory
|
| 22 |
-
"""
|
| 23 |
-
# List of potential paths to check
|
| 24 |
-
potential_paths = [
|
| 25 |
-
# Current file's parent directory
|
| 26 |
-
pathlib.Path(__file__).parent.parent / "prompts",
|
| 27 |
-
|
| 28 |
-
# Relative to the current working directory
|
| 29 |
-
pathlib.Path.cwd() / "prompts",
|
| 30 |
-
|
| 31 |
-
# Absolute path fallback (useful in deployment)
|
| 32 |
-
pathlib.Path("/app/prompts"),
|
| 33 |
-
pathlib.Path("/workspace/prompts"),
|
| 34 |
-
|
| 35 |
-
# Hugging Face Spaces specific path
|
| 36 |
-
pathlib.Path("/home/user/app/prompts")
|
| 37 |
-
]
|
| 38 |
-
|
| 39 |
-
# Try each path
|
| 40 |
-
for path in potential_paths:
|
| 41 |
-
if path.is_dir():
|
| 42 |
-
logger.info(f"Prompts directory found at: {path}")
|
| 43 |
-
return path
|
| 44 |
-
|
| 45 |
-
# If no path is found
|
| 46 |
-
logger.error("Could not locate prompts directory. Using a temporary fallback.")
|
| 47 |
-
return pathlib.Path(__file__).parent / "prompts"
|
| 48 |
-
|
| 49 |
# Directory containing prompt templates (making path absolute by using current file location)
|
| 50 |
SCRIPT_DIR = pathlib.Path(__file__).parent.parent.absolute()
|
| 51 |
-
PROMPTS_DIR =
|
|
|
|
|
|
|
| 52 |
|
| 53 |
# Default configuration values
|
| 54 |
DEFAULT_CONFIG: Dict[str, Any] = {
|
|
@@ -62,7 +32,8 @@ DEFAULT_CONFIG: Dict[str, Any] = {
|
|
| 62 |
|
| 63 |
"output_language": "English",
|
| 64 |
"use_markitdown": False,
|
| 65 |
-
"use_summary": False
|
|
|
|
| 66 |
}
|
| 67 |
|
| 68 |
# Mapping of prompt template identifiers to their file names
|
|
@@ -122,6 +93,9 @@ def load_env_config() -> Dict[str, Any]:
|
|
| 122 |
|
| 123 |
if os.getenv("DEFAULT_USE_SUMMARY"):
|
| 124 |
loaded_config["use_summary"] = str(os.getenv("DEFAULT_USE_SUMMARY")).lower() == 'true'
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
logger.info("Configuration loaded from environment variables.")
|
| 127 |
|
|
|
|
| 6 |
"""
|
| 7 |
import os
|
| 8 |
import logging
|
| 9 |
+
from typing import Dict, Any, Optional, List
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
import pathlib
|
| 12 |
|
|
|
|
| 14 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [%(module)s] - %(message)s')
|
| 15 |
logger = logging.getLogger('describepdf')
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# Directory containing prompt templates (making path absolute by using current file location)
|
| 18 |
SCRIPT_DIR = pathlib.Path(__file__).parent.parent.absolute()
|
| 19 |
+
PROMPTS_DIR = pathlib.Path(SCRIPT_DIR) / "prompts"
|
| 20 |
+
|
| 21 |
+
# Default configuration values
|
| 22 |
|
| 23 |
# Default configuration values
|
| 24 |
DEFAULT_CONFIG: Dict[str, Any] = {
|
|
|
|
| 32 |
|
| 33 |
"output_language": "English",
|
| 34 |
"use_markitdown": False,
|
| 35 |
+
"use_summary": False,
|
| 36 |
+
"page_selection": None
|
| 37 |
}
|
| 38 |
|
| 39 |
# Mapping of prompt template identifiers to their file names
|
|
|
|
| 93 |
|
| 94 |
if os.getenv("DEFAULT_USE_SUMMARY"):
|
| 95 |
loaded_config["use_summary"] = str(os.getenv("DEFAULT_USE_SUMMARY")).lower() == 'true'
|
| 96 |
+
|
| 97 |
+
if os.getenv("DEFAULT_PAGE_SELECTION"):
|
| 98 |
+
loaded_config["page_selection"] = os.getenv("DEFAULT_PAGE_SELECTION")
|
| 99 |
|
| 100 |
logger.info("Configuration loaded from environment variables.")
|
| 101 |
|
describepdf/core.py
CHANGED
|
@@ -24,22 +24,87 @@ class ConversionError(Exception):
|
|
| 24 |
"""Error raised during PDF conversion process."""
|
| 25 |
pass
|
| 26 |
|
| 27 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
"""
|
| 29 |
Combine page descriptions into a single Markdown file.
|
| 30 |
|
| 31 |
Args:
|
| 32 |
descriptions: List of strings, each being a description of a page
|
| 33 |
original_filename: Name of the original PDF file
|
|
|
|
| 34 |
|
| 35 |
Returns:
|
| 36 |
str: Complete Markdown content
|
| 37 |
"""
|
| 38 |
md_content = f"# Description of PDF: {original_filename}\n\n"
|
|
|
|
| 39 |
for i, desc in enumerate(descriptions):
|
| 40 |
-
|
|
|
|
|
|
|
| 41 |
md_content += desc if desc else "*No description generated for this page.*"
|
| 42 |
md_content += "\n\n---\n\n"
|
|
|
|
| 43 |
return md_content
|
| 44 |
|
| 45 |
def convert_pdf_to_markdown(
|
|
@@ -175,7 +240,17 @@ def convert_pdf_to_markdown(
|
|
| 175 |
page_processing_progress_start = pdf_load_progress
|
| 176 |
total_page_progress_ratio = (0.98 - page_processing_progress_start) if total_pages > 0 else 0
|
| 177 |
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
page_num = i + 1
|
| 180 |
current_page_ratio = (page_num / total_pages) if total_pages > 0 else 1.0
|
| 181 |
|
|
@@ -306,7 +381,10 @@ def convert_pdf_to_markdown(
|
|
| 306 |
# Generate final markdown
|
| 307 |
final_progress = 0.99
|
| 308 |
progress_callback(final_progress, "Combining page descriptions into final Markdown...")
|
| 309 |
-
|
|
|
|
|
|
|
|
|
|
| 310 |
logger.info("Final Markdown content assembled.")
|
| 311 |
|
| 312 |
# Report completion
|
|
|
|
| 24 |
"""Error raised during PDF conversion process."""
|
| 25 |
pass
|
| 26 |
|
| 27 |
+
def parse_page_selection(selection_string: Optional[str], total_pages: int) -> List[int]:
|
| 28 |
+
"""
|
| 29 |
+
Parse a page selection string into a list of page indices.
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
selection_string: String with page selection (e.g. "1,3,5-10,15")
|
| 33 |
+
total_pages: Total number of pages in the document
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
List[int]: List of zero-based page indices to process
|
| 37 |
+
"""
|
| 38 |
+
if not selection_string:
|
| 39 |
+
# Return all pages if selection is empty
|
| 40 |
+
return list(range(total_pages))
|
| 41 |
+
|
| 42 |
+
page_indices = []
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
sections = selection_string.split(',')
|
| 46 |
+
for section in sections:
|
| 47 |
+
section = section.strip()
|
| 48 |
+
if not section:
|
| 49 |
+
continue
|
| 50 |
+
|
| 51 |
+
if '-' in section:
|
| 52 |
+
# Handle page range
|
| 53 |
+
start, end = section.split('-', 1)
|
| 54 |
+
start_idx = int(start.strip()) - 1 # Convert to 0-based index
|
| 55 |
+
end_idx = int(end.strip()) - 1
|
| 56 |
+
|
| 57 |
+
# Validate range
|
| 58 |
+
if start_idx < 0 or end_idx >= total_pages or start_idx > end_idx:
|
| 59 |
+
logger.warning(f"Invalid page range: {section}. Must be between 1 and {total_pages}.")
|
| 60 |
+
continue
|
| 61 |
+
|
| 62 |
+
page_indices.extend(range(start_idx, end_idx + 1))
|
| 63 |
+
else:
|
| 64 |
+
# Handle single page
|
| 65 |
+
page_idx = int(section) - 1 # Convert to 0-based index
|
| 66 |
+
|
| 67 |
+
# Validate page number
|
| 68 |
+
if page_idx < 0 or page_idx >= total_pages:
|
| 69 |
+
logger.warning(f"Invalid page number: {section}. Must be between 1 and {total_pages}.")
|
| 70 |
+
continue
|
| 71 |
+
|
| 72 |
+
page_indices.append(page_idx)
|
| 73 |
+
|
| 74 |
+
# Remove duplicates and sort
|
| 75 |
+
page_indices = sorted(set(page_indices))
|
| 76 |
+
|
| 77 |
+
if not page_indices:
|
| 78 |
+
logger.warning("No valid pages specified. Processing all pages.")
|
| 79 |
+
return list(range(total_pages))
|
| 80 |
+
|
| 81 |
+
return page_indices
|
| 82 |
+
|
| 83 |
+
except ValueError as e:
|
| 84 |
+
logger.error(f"Error parsing page selection '{selection_string}': {e}. Processing all pages.")
|
| 85 |
+
return list(range(total_pages))
|
| 86 |
+
|
| 87 |
+
def format_markdown_output(descriptions: List[str], original_filename: str, page_numbers: Optional[List[int]] = None) -> str:
|
| 88 |
"""
|
| 89 |
Combine page descriptions into a single Markdown file.
|
| 90 |
|
| 91 |
Args:
|
| 92 |
descriptions: List of strings, each being a description of a page
|
| 93 |
original_filename: Name of the original PDF file
|
| 94 |
+
page_numbers: List of actual page numbers corresponding to descriptions (1-based)
|
| 95 |
|
| 96 |
Returns:
|
| 97 |
str: Complete Markdown content
|
| 98 |
"""
|
| 99 |
md_content = f"# Description of PDF: {original_filename}\n\n"
|
| 100 |
+
|
| 101 |
for i, desc in enumerate(descriptions):
|
| 102 |
+
# Use actual page number if provided, otherwise use sequential numbering
|
| 103 |
+
page_num = page_numbers[i] if page_numbers else (i + 1)
|
| 104 |
+
md_content += f"## Page {page_num}\n\n"
|
| 105 |
md_content += desc if desc else "*No description generated for this page.*"
|
| 106 |
md_content += "\n\n---\n\n"
|
| 107 |
+
|
| 108 |
return md_content
|
| 109 |
|
| 110 |
def convert_pdf_to_markdown(
|
|
|
|
| 240 |
page_processing_progress_start = pdf_load_progress
|
| 241 |
total_page_progress_ratio = (0.98 - page_processing_progress_start) if total_pages > 0 else 0
|
| 242 |
|
| 243 |
+
# Parse page selection
|
| 244 |
+
page_selection = cfg.get("page_selection")
|
| 245 |
+
selected_indices = parse_page_selection(page_selection, total_pages)
|
| 246 |
+
|
| 247 |
+
if page_selection:
|
| 248 |
+
logger.info(f"Processing {len(selected_indices)} selected pages out of {total_pages} total pages.")
|
| 249 |
+
else:
|
| 250 |
+
logger.info(f"Processing all {total_pages} pages.")
|
| 251 |
+
|
| 252 |
+
for i in selected_indices:
|
| 253 |
+
page = pages[i]
|
| 254 |
page_num = i + 1
|
| 255 |
current_page_ratio = (page_num / total_pages) if total_pages > 0 else 1.0
|
| 256 |
|
|
|
|
| 381 |
# Generate final markdown
|
| 382 |
final_progress = 0.99
|
| 383 |
progress_callback(final_progress, "Combining page descriptions into final Markdown...")
|
| 384 |
+
|
| 385 |
+
actual_page_numbers = [i + 1 for i in selected_indices] if 'selected_indices' in locals() else None
|
| 386 |
+
|
| 387 |
+
final_markdown = format_markdown_output(all_descriptions, original_filename, actual_page_numbers)
|
| 388 |
logger.info("Final Markdown content assembled.")
|
| 389 |
|
| 390 |
# Report completion
|
describepdf/ui.py
CHANGED
|
@@ -21,7 +21,7 @@ theme = gr.themes.Soft(
|
|
| 21 |
spacing_size="lg",
|
| 22 |
)
|
| 23 |
|
| 24 |
-
def
|
| 25 |
pdf_file_obj: Optional[gr.File],
|
| 26 |
ui_api_key: str,
|
| 27 |
ui_vlm_model: str,
|
|
@@ -29,25 +29,32 @@ def generate(
|
|
| 29 |
ui_use_md: bool,
|
| 30 |
ui_use_sum: bool,
|
| 31 |
ui_sum_model: str,
|
|
|
|
| 32 |
progress: gr.Progress = gr.Progress(track_tqdm=True)
|
| 33 |
) -> Tuple[str, gr.update, Optional[str]]:
|
| 34 |
"""
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
Args:
|
| 38 |
pdf_file_obj: Gradio File object for the uploaded PDF
|
| 39 |
ui_api_key: OpenRouter API key from UI
|
| 40 |
-
ui_vlm_model: VLM model name from UI
|
| 41 |
-
ui_lang: Output language
|
| 42 |
-
ui_use_md: Whether to use Markitdown
|
| 43 |
-
ui_use_sum: Whether to generate a summary
|
| 44 |
-
ui_sum_model: Summary model name from UI
|
|
|
|
| 45 |
progress: Gradio progress tracker
|
| 46 |
|
| 47 |
Returns:
|
| 48 |
Tuple containing:
|
| 49 |
-
- str: Status message
|
| 50 |
-
- gr.update: Download button update
|
| 51 |
- Optional[str]: Markdown result content
|
| 52 |
"""
|
| 53 |
# Validate input file
|
|
@@ -67,7 +74,8 @@ def generate(
|
|
| 67 |
"output_language": ui_lang,
|
| 68 |
"use_markitdown": ui_use_md,
|
| 69 |
"use_summary": ui_use_sum,
|
| 70 |
-
"summary_llm_model": ui_sum_model if ui_sum_model else env_config.get("or_summary_model")
|
|
|
|
| 71 |
}
|
| 72 |
|
| 73 |
# Validate API key
|
|
@@ -238,6 +246,12 @@ def create_ui() -> gr.Blocks:
|
|
| 238 |
allow_custom_value=True,
|
| 239 |
info="Select or type the desired output language (e.g., English, Spanish)"
|
| 240 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
with gr.Row():
|
| 242 |
use_markitdown_checkbox = gr.Checkbox(
|
| 243 |
label="Use Markitdown for extra text context",
|
|
@@ -258,13 +272,13 @@ def create_ui() -> gr.Blocks:
|
|
| 258 |
# Connect UI components
|
| 259 |
conversion_inputs = [
|
| 260 |
pdf_input, api_key_input, vlm_model_input, output_language_input,
|
| 261 |
-
use_markitdown_checkbox, use_summary_checkbox, summary_llm_model_input
|
| 262 |
]
|
| 263 |
conversion_outputs = [
|
| 264 |
progress_output, download_button, markdown_output
|
| 265 |
]
|
| 266 |
convert_button.click(
|
| 267 |
-
fn=
|
| 268 |
inputs=conversion_inputs,
|
| 269 |
outputs=conversion_outputs
|
| 270 |
)
|
|
|
|
| 21 |
spacing_size="lg",
|
| 22 |
)
|
| 23 |
|
| 24 |
+
def convert_pdf_to_descriptive_markdown(
|
| 25 |
pdf_file_obj: Optional[gr.File],
|
| 26 |
ui_api_key: str,
|
| 27 |
ui_vlm_model: str,
|
|
|
|
| 29 |
ui_use_md: bool,
|
| 30 |
ui_use_sum: bool,
|
| 31 |
ui_sum_model: str,
|
| 32 |
+
ui_page_selection: str,
|
| 33 |
progress: gr.Progress = gr.Progress(track_tqdm=True)
|
| 34 |
) -> Tuple[str, gr.update, Optional[str]]:
|
| 35 |
"""
|
| 36 |
+
Convert a PDF file to detailed page-by-page Markdown descriptions using Vision-Language Models.
|
| 37 |
+
|
| 38 |
+
This function processes the uploaded PDF, analyzing the visual and textual content of each page
|
| 39 |
+
using OpenRouter's Vision-Language Models (VLMs). It generates rich, contextual descriptions in
|
| 40 |
+
Markdown format that capture both the visual elements and text content of the document, making
|
| 41 |
+
the PDF accessible and searchable in contexts where traditional text extraction would fail.
|
| 42 |
|
| 43 |
Args:
|
| 44 |
pdf_file_obj: Gradio File object for the uploaded PDF
|
| 45 |
ui_api_key: OpenRouter API key from UI
|
| 46 |
+
ui_vlm_model: VLM model name from UI (e.g., qwen/qwen2.5-vl-72b-instruct)
|
| 47 |
+
ui_lang: Output language for descriptions (e.g., English, Spanish)
|
| 48 |
+
ui_use_md: Whether to use Markitdown for enhanced text extraction
|
| 49 |
+
ui_use_sum: Whether to generate a document summary for context
|
| 50 |
+
ui_sum_model: Summary model name from UI (e.g., google/gemini-2.5-flash-preview)
|
| 51 |
+
ui_page_selection: Optional page selection string (e.g., "1,3,5-10")
|
| 52 |
progress: Gradio progress tracker
|
| 53 |
|
| 54 |
Returns:
|
| 55 |
Tuple containing:
|
| 56 |
+
- str: Status message indicating success or failure
|
| 57 |
+
- gr.update: Download button update with the result file
|
| 58 |
- Optional[str]: Markdown result content
|
| 59 |
"""
|
| 60 |
# Validate input file
|
|
|
|
| 74 |
"output_language": ui_lang,
|
| 75 |
"use_markitdown": ui_use_md,
|
| 76 |
"use_summary": ui_use_sum,
|
| 77 |
+
"summary_llm_model": ui_sum_model if ui_sum_model else env_config.get("or_summary_model"),
|
| 78 |
+
"page_selection": ui_page_selection.strip() if ui_page_selection.strip() else None
|
| 79 |
}
|
| 80 |
|
| 81 |
# Validate API key
|
|
|
|
| 246 |
allow_custom_value=True,
|
| 247 |
info="Select or type the desired output language (e.g., English, Spanish)"
|
| 248 |
)
|
| 249 |
+
page_selection_input = gr.Textbox(
|
| 250 |
+
label="Page Selection (Optional)",
|
| 251 |
+
value="",
|
| 252 |
+
placeholder="Example: 1,3,5-10,15 (leave empty for all pages)",
|
| 253 |
+
info="Specify individual pages or ranges to process"
|
| 254 |
+
)
|
| 255 |
with gr.Row():
|
| 256 |
use_markitdown_checkbox = gr.Checkbox(
|
| 257 |
label="Use Markitdown for extra text context",
|
|
|
|
| 272 |
# Connect UI components
|
| 273 |
conversion_inputs = [
|
| 274 |
pdf_input, api_key_input, vlm_model_input, output_language_input,
|
| 275 |
+
use_markitdown_checkbox, use_summary_checkbox, summary_llm_model_input, page_selection_input
|
| 276 |
]
|
| 277 |
conversion_outputs = [
|
| 278 |
progress_output, download_button, markdown_output
|
| 279 |
]
|
| 280 |
convert_button.click(
|
| 281 |
+
fn=convert_pdf_to_descriptive_markdown,
|
| 282 |
inputs=conversion_inputs,
|
| 283 |
outputs=conversion_outputs
|
| 284 |
)
|
describepdf/ui_ollama.py
CHANGED
|
@@ -22,7 +22,7 @@ theme = gr.themes.Soft(
|
|
| 22 |
spacing_size="lg",
|
| 23 |
)
|
| 24 |
|
| 25 |
-
def
|
| 26 |
pdf_file_obj: Optional[gr.File],
|
| 27 |
ollama_endpoint: str,
|
| 28 |
ui_vlm_model: str,
|
|
@@ -30,25 +30,36 @@ def generate(
|
|
| 30 |
ui_use_md: bool,
|
| 31 |
ui_use_sum: bool,
|
| 32 |
ui_sum_model: str,
|
|
|
|
| 33 |
progress: gr.Progress = gr.Progress(track_tqdm=True)
|
| 34 |
) -> Tuple[str, gr.update, Optional[str]]:
|
| 35 |
"""
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
Args:
|
| 39 |
pdf_file_obj: Gradio File object for the uploaded PDF
|
| 40 |
-
ollama_endpoint: Ollama server endpoint URL
|
| 41 |
-
ui_vlm_model: VLM model name from UI
|
| 42 |
-
ui_lang: Output language
|
| 43 |
-
ui_use_md: Whether to use Markitdown
|
| 44 |
-
ui_use_sum: Whether to generate a summary
|
| 45 |
-
ui_sum_model: Summary model name from UI
|
|
|
|
| 46 |
progress: Gradio progress tracker
|
| 47 |
|
| 48 |
Returns:
|
| 49 |
Tuple containing:
|
| 50 |
-
- str: Status message
|
| 51 |
-
- gr.update: Download button update
|
| 52 |
- Optional[str]: Markdown result content
|
| 53 |
"""
|
| 54 |
# Validate input file
|
|
@@ -69,7 +80,8 @@ def generate(
|
|
| 69 |
"output_language": ui_lang,
|
| 70 |
"use_markitdown": ui_use_md,
|
| 71 |
"use_summary": ui_use_sum,
|
| 72 |
-
"summary_llm_model": ui_sum_model
|
|
|
|
| 73 |
}
|
| 74 |
|
| 75 |
# Create progress callback for Gradio
|
|
@@ -160,7 +172,7 @@ def create_ui() -> gr.Blocks:
|
|
| 160 |
gr.Markdown("<center><img src='https://davidlms.github.io/DescribePDF/assets/poster.png' alt='Describe PDF Logo' width='600px'/></center>")
|
| 161 |
gr.Markdown(
|
| 162 |
"""<div style="display: flex;align-items: center;justify-content: center">
|
| 163 |
-
[<a href="https://davidlms.github.io/
|
| 164 |
"""
|
| 165 |
)
|
| 166 |
gr.Markdown(
|
|
@@ -223,6 +235,12 @@ def create_ui() -> gr.Blocks:
|
|
| 223 |
allow_custom_value=True,
|
| 224 |
info="Select or type the desired output language (e.g., English, Spanish)"
|
| 225 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
with gr.Row():
|
| 227 |
use_markitdown_checkbox = gr.Checkbox(
|
| 228 |
label="Use Markitdown for extra text context",
|
|
@@ -239,17 +257,16 @@ def create_ui() -> gr.Blocks:
|
|
| 239 |
allow_custom_value=True,
|
| 240 |
info="Select or type the Ollama LLM model name for summaries"
|
| 241 |
)
|
| 242 |
-
|
| 243 |
# Connect UI components
|
| 244 |
conversion_inputs = [
|
| 245 |
pdf_input, ollama_endpoint_input, vlm_model_input, output_language_input,
|
| 246 |
-
use_markitdown_checkbox, use_summary_checkbox, summary_llm_model_input
|
| 247 |
]
|
| 248 |
conversion_outputs = [
|
| 249 |
progress_output, download_button, markdown_output
|
| 250 |
]
|
| 251 |
convert_button.click(
|
| 252 |
-
fn=
|
| 253 |
inputs=conversion_inputs,
|
| 254 |
outputs=conversion_outputs
|
| 255 |
)
|
|
|
|
| 22 |
spacing_size="lg",
|
| 23 |
)
|
| 24 |
|
| 25 |
+
def convert_pdf_to_descriptive_markdown(
|
| 26 |
pdf_file_obj: Optional[gr.File],
|
| 27 |
ollama_endpoint: str,
|
| 28 |
ui_vlm_model: str,
|
|
|
|
| 30 |
ui_use_md: bool,
|
| 31 |
ui_use_sum: bool,
|
| 32 |
ui_sum_model: str,
|
| 33 |
+
ui_page_selection: str,
|
| 34 |
progress: gr.Progress = gr.Progress(track_tqdm=True)
|
| 35 |
) -> Tuple[str, gr.update, Optional[str]]:
|
| 36 |
"""
|
| 37 |
+
Convert a PDF file to detailed page-by-page Markdown descriptions using local Ollama Vision-Language Models.
|
| 38 |
+
|
| 39 |
+
This function processes the uploaded PDF, analyzing the visual and textual content of each page
|
| 40 |
+
using locally hosted Vision-Language Models (VLMs) through Ollama. It generates rich, contextual
|
| 41 |
+
descriptions in Markdown format that capture both the visual elements and text content of the document,
|
| 42 |
+
making the PDF accessible and searchable in contexts where traditional text extraction would fail.
|
| 43 |
+
|
| 44 |
+
Unlike the OpenRouter version, this function utilizes local models running through Ollama,
|
| 45 |
+
providing privacy and eliminating the need for API keys, but potentially with different model options
|
| 46 |
+
and performance characteristics.
|
| 47 |
|
| 48 |
Args:
|
| 49 |
pdf_file_obj: Gradio File object for the uploaded PDF
|
| 50 |
+
ollama_endpoint: Ollama server endpoint URL (e.g., http://localhost:11434)
|
| 51 |
+
ui_vlm_model: VLM model name from UI (e.g., llama3.2-vision)
|
| 52 |
+
ui_lang: Output language for descriptions (e.g., English, Spanish)
|
| 53 |
+
ui_use_md: Whether to use Markitdown for enhanced text extraction
|
| 54 |
+
ui_use_sum: Whether to generate a document summary for context
|
| 55 |
+
ui_sum_model: Summary model name from UI (e.g., qwen2.5)
|
| 56 |
+
ui_page_selection: Optional page selection string (e.g., "1,3,5-10")
|
| 57 |
progress: Gradio progress tracker
|
| 58 |
|
| 59 |
Returns:
|
| 60 |
Tuple containing:
|
| 61 |
+
- str: Status message indicating success or failure
|
| 62 |
+
- gr.update: Download button update with the result file
|
| 63 |
- Optional[str]: Markdown result content
|
| 64 |
"""
|
| 65 |
# Validate input file
|
|
|
|
| 80 |
"output_language": ui_lang,
|
| 81 |
"use_markitdown": ui_use_md,
|
| 82 |
"use_summary": ui_use_sum,
|
| 83 |
+
"summary_llm_model": ui_sum_model,
|
| 84 |
+
"page_selection": ui_page_selection.strip() if ui_page_selection.strip() else None
|
| 85 |
}
|
| 86 |
|
| 87 |
# Create progress callback for Gradio
|
|
|
|
| 172 |
gr.Markdown("<center><img src='https://davidlms.github.io/DescribePDF/assets/poster.png' alt='Describe PDF Logo' width='600px'/></center>")
|
| 173 |
gr.Markdown(
|
| 174 |
"""<div style="display: flex;align-items: center;justify-content: center">
|
| 175 |
+
[<a href="https://davidlms.github.io/DescribePDF/">Project Page</a>] | [<a href="https://github.com/DavidLMS/describepdf">Github</a>]</div>
|
| 176 |
"""
|
| 177 |
)
|
| 178 |
gr.Markdown(
|
|
|
|
| 235 |
allow_custom_value=True,
|
| 236 |
info="Select or type the desired output language (e.g., English, Spanish)"
|
| 237 |
)
|
| 238 |
+
page_selection_input = gr.Textbox(
|
| 239 |
+
label="Page Selection (Optional)",
|
| 240 |
+
value="",
|
| 241 |
+
placeholder="Example: 1,3,5-10,15 (leave empty for all pages)",
|
| 242 |
+
info="Specify individual pages or ranges to process"
|
| 243 |
+
)
|
| 244 |
with gr.Row():
|
| 245 |
use_markitdown_checkbox = gr.Checkbox(
|
| 246 |
label="Use Markitdown for extra text context",
|
|
|
|
| 257 |
allow_custom_value=True,
|
| 258 |
info="Select or type the Ollama LLM model name for summaries"
|
| 259 |
)
|
|
|
|
| 260 |
# Connect UI components
|
| 261 |
conversion_inputs = [
|
| 262 |
pdf_input, ollama_endpoint_input, vlm_model_input, output_language_input,
|
| 263 |
+
use_markitdown_checkbox, use_summary_checkbox, summary_llm_model_input, page_selection_input
|
| 264 |
]
|
| 265 |
conversion_outputs = [
|
| 266 |
progress_output, download_button, markdown_output
|
| 267 |
]
|
| 268 |
convert_button.click(
|
| 269 |
+
fn=convert_pdf_to_descriptive_markdown,
|
| 270 |
inputs=conversion_inputs,
|
| 271 |
outputs=conversion_outputs
|
| 272 |
)
|