amine_dubs
commited on
Commit
·
52c54ab
1
Parent(s):
1913c15
- backend/main.py +128 -46
- backend/requirements.txt +2 -1
backend/main.py
CHANGED
|
@@ -728,30 +728,21 @@ async def download_translated_document(request: Request):
|
|
| 728 |
|
| 729 |
elif filename.endswith('.pdf'):
|
| 730 |
try:
|
| 731 |
-
# For PDF files,
|
| 732 |
-
# Try to create a simple PDF with reportlab, which should be available
|
| 733 |
try:
|
|
|
|
| 734 |
from reportlab.pdfgen import canvas
|
| 735 |
from reportlab.lib.pagesizes import letter
|
| 736 |
from io import BytesIO
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
from reportlab.lib.colors import black
|
| 740 |
|
| 741 |
# Create a PDF in memory
|
| 742 |
buffer = BytesIO()
|
| 743 |
c = canvas.Canvas(buffer, pagesize=letter)
|
| 744 |
|
| 745 |
-
#
|
| 746 |
-
|
| 747 |
-
# Try to use a system font that supports Arabic
|
| 748 |
-
pdfmetrics.registerFont(TTFont('Arabic', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf'))
|
| 749 |
-
font_name = 'Arabic'
|
| 750 |
-
except:
|
| 751 |
-
# Default to built-in Helvetica which has limited Arabic support
|
| 752 |
-
font_name = 'Helvetica'
|
| 753 |
-
|
| 754 |
-
# Set font
|
| 755 |
c.setFont(font_name, 12)
|
| 756 |
|
| 757 |
# Check if text contains Arabic
|
|
@@ -761,64 +752,154 @@ async def download_translated_document(request: Request):
|
|
| 761 |
lines = content.split('\n')
|
| 762 |
y_position = 750 # Start from top
|
| 763 |
|
| 764 |
-
# Draw text
|
| 765 |
for line in lines:
|
| 766 |
if line.strip():
|
| 767 |
-
# For Arabic,
|
| 768 |
if has_arabic:
|
| 769 |
-
#
|
| 770 |
text_width = c.stringWidth(line, font_name, 12)
|
| 771 |
-
|
|
|
|
| 772 |
else:
|
| 773 |
-
# Left-
|
| 774 |
-
c.drawString(
|
|
|
|
|
|
|
| 775 |
y_position -= 14
|
| 776 |
|
| 777 |
-
# Add a new page if
|
| 778 |
-
if y_position <
|
| 779 |
c.showPage()
|
|
|
|
| 780 |
y_position = 750
|
| 781 |
|
|
|
|
| 782 |
c.save()
|
| 783 |
|
| 784 |
# Get PDF content
|
| 785 |
pdf_content = buffer.getvalue()
|
| 786 |
buffer.close()
|
| 787 |
|
| 788 |
-
# Return PDF
|
| 789 |
return Response(
|
| 790 |
content=pdf_content,
|
| 791 |
media_type="application/pdf",
|
| 792 |
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
| 793 |
)
|
|
|
|
| 794 |
except ImportError:
|
| 795 |
-
|
| 796 |
-
|
|
|
|
| 797 |
from io import BytesIO
|
|
|
|
|
|
|
|
|
|
| 798 |
|
| 799 |
-
#
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
# Add text - keep it very simple
|
| 804 |
-
page.insert_text((72, 72), content)
|
| 805 |
-
|
| 806 |
-
# Save PDF
|
| 807 |
-
pdf_bytes = BytesIO()
|
| 808 |
-
doc.save(pdf_bytes)
|
| 809 |
-
pdf_bytes.seek(0)
|
| 810 |
-
doc.close()
|
| 811 |
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
media_type="application/pdf",
|
| 815 |
-
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
| 816 |
-
)
|
| 817 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
except Exception as e:
|
| 819 |
-
print(f"PDF creation error: {
|
| 820 |
traceback.print_exc()
|
| 821 |
-
|
|
|
|
| 822 |
return Response(
|
| 823 |
content=content.encode('utf-8'),
|
| 824 |
media_type="text/plain; charset=utf-8",
|
|
@@ -827,8 +908,9 @@ async def download_translated_document(request: Request):
|
|
| 827 |
"Content-Type": "text/plain; charset=utf-8"
|
| 828 |
}
|
| 829 |
)
|
|
|
|
| 830 |
except Exception as e:
|
| 831 |
-
print(f"PDF creation error: {
|
| 832 |
traceback.print_exc()
|
| 833 |
# Return a text file as fallback
|
| 834 |
return Response(
|
|
|
|
| 728 |
|
| 729 |
elif filename.endswith('.pdf'):
|
| 730 |
try:
|
| 731 |
+
# For PDF files, try multiple approaches
|
|
|
|
| 732 |
try:
|
| 733 |
+
# Try ReportLab first (which handles Arabic better)
|
| 734 |
from reportlab.pdfgen import canvas
|
| 735 |
from reportlab.lib.pagesizes import letter
|
| 736 |
from io import BytesIO
|
| 737 |
+
|
| 738 |
+
print("Using ReportLab for PDF generation")
|
|
|
|
| 739 |
|
| 740 |
# Create a PDF in memory
|
| 741 |
buffer = BytesIO()
|
| 742 |
c = canvas.Canvas(buffer, pagesize=letter)
|
| 743 |
|
| 744 |
+
# Use a basic font that should work with most installations
|
| 745 |
+
font_name = 'Helvetica'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 746 |
c.setFont(font_name, 12)
|
| 747 |
|
| 748 |
# Check if text contains Arabic
|
|
|
|
| 752 |
lines = content.split('\n')
|
| 753 |
y_position = 750 # Start from top
|
| 754 |
|
| 755 |
+
# Draw text line by line
|
| 756 |
for line in lines:
|
| 757 |
if line.strip():
|
| 758 |
+
# For Arabic, right-align the text
|
| 759 |
if has_arabic:
|
| 760 |
+
# Get width to calculate right alignment
|
| 761 |
text_width = c.stringWidth(line, font_name, 12)
|
| 762 |
+
# Position from right margin
|
| 763 |
+
c.drawString(letter[0] - 72 - text_width, y_position, line)
|
| 764 |
else:
|
| 765 |
+
# Left-align for non-Arabic text
|
| 766 |
+
c.drawString(72, y_position, line)
|
| 767 |
+
|
| 768 |
+
# Move down for next line
|
| 769 |
y_position -= 14
|
| 770 |
|
| 771 |
+
# Add a new page if needed
|
| 772 |
+
if y_position < 72:
|
| 773 |
c.showPage()
|
| 774 |
+
c.setFont(font_name, 12)
|
| 775 |
y_position = 750
|
| 776 |
|
| 777 |
+
# Save the PDF to the buffer
|
| 778 |
c.save()
|
| 779 |
|
| 780 |
# Get PDF content
|
| 781 |
pdf_content = buffer.getvalue()
|
| 782 |
buffer.close()
|
| 783 |
|
| 784 |
+
# Return the PDF
|
| 785 |
return Response(
|
| 786 |
content=pdf_content,
|
| 787 |
media_type="application/pdf",
|
| 788 |
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
| 789 |
)
|
| 790 |
+
|
| 791 |
except ImportError:
|
| 792 |
+
# Fall back to PyMuPDF with improved approach for Arabic
|
| 793 |
+
print("ReportLab not available, using PyMuPDF with improved Arabic handling")
|
| 794 |
+
import fitz
|
| 795 |
from io import BytesIO
|
| 796 |
+
import uuid
|
| 797 |
+
import os
|
| 798 |
+
import tempfile
|
| 799 |
|
| 800 |
+
# For PyMuPDF, we'll take a different approach for Arabic text:
|
| 801 |
+
# 1. Create a temporary HTML file with the Arabic text and proper RTL styling
|
| 802 |
+
# 2. Convert it to PDF using PyMuPDF's HTML parser
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 803 |
|
| 804 |
+
# Determine if we have Arabic text
|
| 805 |
+
has_arabic = any('\u0600' <= ch <= '\u06FF' for ch in content)
|
|
|
|
|
|
|
|
|
|
| 806 |
|
| 807 |
+
if has_arabic:
|
| 808 |
+
# Create a temporary HTML file with RTL direction for Arabic
|
| 809 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.html', mode='w', encoding='utf-8') as temp_file:
|
| 810 |
+
html_content = f"""<!DOCTYPE html>
|
| 811 |
+
<html dir="rtl" lang="ar">
|
| 812 |
+
<head>
|
| 813 |
+
<meta charset="UTF-8">
|
| 814 |
+
<title>Translated Document</title>
|
| 815 |
+
<style>
|
| 816 |
+
body {{
|
| 817 |
+
font-family: Arial, sans-serif;
|
| 818 |
+
direction: rtl;
|
| 819 |
+
text-align: right;
|
| 820 |
+
margin: 1.5cm;
|
| 821 |
+
font-size: 12pt;
|
| 822 |
+
line-height: 1.5;
|
| 823 |
+
}}
|
| 824 |
+
</style>
|
| 825 |
+
</head>
|
| 826 |
+
<body>
|
| 827 |
+
{content.replace('\n', '<br>')}
|
| 828 |
+
</body>
|
| 829 |
+
</html>"""
|
| 830 |
+
temp_file.write(html_content)
|
| 831 |
+
temp_html_path = temp_file.name
|
| 832 |
+
|
| 833 |
+
try:
|
| 834 |
+
# Convert HTML to PDF
|
| 835 |
+
doc = fitz.open()
|
| 836 |
+
|
| 837 |
+
# Load the HTML file as a separate document and insert it
|
| 838 |
+
html_doc = fitz.open(temp_html_path)
|
| 839 |
+
doc.insert_pdf(html_doc)
|
| 840 |
+
html_doc.close()
|
| 841 |
+
|
| 842 |
+
# Save to memory
|
| 843 |
+
pdf_bytes = BytesIO()
|
| 844 |
+
doc.save(pdf_bytes)
|
| 845 |
+
doc.close()
|
| 846 |
+
|
| 847 |
+
# Clean up temporary file
|
| 848 |
+
try:
|
| 849 |
+
os.unlink(temp_html_path)
|
| 850 |
+
except:
|
| 851 |
+
pass
|
| 852 |
+
|
| 853 |
+
# Return the PDF
|
| 854 |
+
return Response(
|
| 855 |
+
content=pdf_bytes.getvalue(),
|
| 856 |
+
media_type="application/pdf",
|
| 857 |
+
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
| 858 |
+
)
|
| 859 |
+
except Exception as html_err:
|
| 860 |
+
print(f"HTML conversion failed: {html_err}")
|
| 861 |
+
# Clean up temp file if it exists
|
| 862 |
+
try:
|
| 863 |
+
os.unlink(temp_html_path)
|
| 864 |
+
except:
|
| 865 |
+
pass
|
| 866 |
+
|
| 867 |
+
# Fall back to text file since all PDF attempts failed
|
| 868 |
+
return Response(
|
| 869 |
+
content=content.encode('utf-8'),
|
| 870 |
+
media_type="text/plain; charset=utf-8",
|
| 871 |
+
headers={
|
| 872 |
+
"Content-Disposition": f"attachment; filename={filename.replace('.pdf', '.txt')}",
|
| 873 |
+
"Content-Type": "text/plain; charset=utf-8"
|
| 874 |
+
}
|
| 875 |
+
)
|
| 876 |
+
else:
|
| 877 |
+
# For non-Arabic text, use the simpler PDF creation method
|
| 878 |
+
doc = fitz.open()
|
| 879 |
+
page = doc.new_page()
|
| 880 |
+
|
| 881 |
+
# Add text content
|
| 882 |
+
rect = fitz.Rect(72, 72, page.rect.width-72, page.rect.height-72)
|
| 883 |
+
page.insert_text((72, 72), content, fontsize=11)
|
| 884 |
+
|
| 885 |
+
# Save to memory
|
| 886 |
+
pdf_bytes = BytesIO()
|
| 887 |
+
doc.save(pdf_bytes)
|
| 888 |
+
pdf_bytes.seek(0)
|
| 889 |
+
doc.close()
|
| 890 |
+
|
| 891 |
+
# Return the PDF
|
| 892 |
+
return Response(
|
| 893 |
+
content=pdf_bytes.getvalue(),
|
| 894 |
+
media_type="application/pdf",
|
| 895 |
+
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
| 896 |
+
)
|
| 897 |
+
|
| 898 |
except Exception as e:
|
| 899 |
+
print(f"PDF creation error with advanced methods: {e}")
|
| 900 |
traceback.print_exc()
|
| 901 |
+
|
| 902 |
+
# Fall back to text file if all PDF attempts fail
|
| 903 |
return Response(
|
| 904 |
content=content.encode('utf-8'),
|
| 905 |
media_type="text/plain; charset=utf-8",
|
|
|
|
| 908 |
"Content-Type": "text/plain; charset=utf-8"
|
| 909 |
}
|
| 910 |
)
|
| 911 |
+
|
| 912 |
except Exception as e:
|
| 913 |
+
print(f"Overall PDF creation error: {e}")
|
| 914 |
traceback.print_exc()
|
| 915 |
# Return a text file as fallback
|
| 916 |
return Response(
|
backend/requirements.txt
CHANGED
|
@@ -13,4 +13,5 @@ sentencepiece
|
|
| 13 |
tensorflow
|
| 14 |
accelerate
|
| 15 |
langdetect
|
| 16 |
-
hf_xet
|
|
|
|
|
|
| 13 |
tensorflow
|
| 14 |
accelerate
|
| 15 |
langdetect
|
| 16 |
+
hf_xet
|
| 17 |
+
reportlab # Added for PDF generation with Arabic support
|