texttopdf / app.py
Opera8's picture
Update app.py
0b4df0b verified
import os
import io
import traceback
import re
from flask import Flask, request, send_file, render_template
from flask_cors import CORS
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt
from weasyprint import HTML, CSS
import arabic_reshaper
from bidi.algorithm import get_display # برای راست‌چین و ترکیب حروف فارسی
import markdown
from htmldocx import HtmlToDocx
app = Flask(__name__)
CORS(app)
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
FONT_FILE_NAME = "Vazirmatn-Regular.ttf"
FOOTER_TEXT = "هوش مصنوعی آلفا دانلود از گوگل پلی"
def get_line_direction(line):
if not line or line.isspace(): return 'ltr'
rtl_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F]')
return 'rtl' if rtl_pattern.search(line) else 'ltr'
def reshape_rtl_text(text):
# ترکیب reshaper و bidi برای خروجی بی‌نقص در فایل‌ها
reshaped_text = arabic_reshaper.reshape(text)
return get_display(reshaped_text)
def get_base_html_for_conversion(text_content):
raw_html = markdown.markdown(
text_content,
extensions=['extra', 'tables', 'nl2br', 'sane_lists']
)
reshaped_html = arabic_reshaper.reshape(raw_html)
tags_to_rtl =['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol', 'li', 'table', 'th', 'td']
for tag in tags_to_rtl:
pattern = re.compile(rf'<{tag}(?P<attrs>\s[^>]*)?>', re.IGNORECASE)
def replacer(match):
attrs = match.group('attrs') or ''
if 'dir="ltr"' in attrs.lower() or 'style="text-align: left' in attrs.lower():
return match.group(0)
return f'<{tag}{attrs} style="text-align: right; direction: rtl; font-family: Vazir, sans-serif;">'
reshaped_html = pattern.sub(replacer, reshaped_html)
return reshaped_html
def create_docx(text_content):
try:
document = Document()
parser = HtmlToDocx()
html_content = get_base_html_for_conversion(text_content)
parser.add_html_to_document(html_content, document)
document.add_paragraph("")
footer_p = document.add_paragraph(FOOTER_TEXT)
footer_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
for run in footer_p.runs:
run.bold = True
buffer = io.BytesIO()
document.save(buffer)
buffer.seek(0)
return buffer
except Exception as e:
print(f"⚠️ HTML to DOCX Failed: {e}")
# سیستم Fallback: اگر مارک‌داون هوش مصنوعی خراب بود، فایل Word ساده می‌سازد
document = Document()
document.styles['Normal'].font.name = 'Arial'
for line in text_content.split('\n'):
if line.strip():
p = document.add_paragraph(reshape_rtl_text(line))
p.alignment = WD_ALIGN_PARAGRAPH.RIGHT
document.add_paragraph("\n")
footer_p = document.add_paragraph(FOOTER_TEXT)
footer_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
for run in footer_p.runs: run.bold = True
buffer = io.BytesIO()
document.save(buffer)
buffer.seek(0)
return buffer
def create_pdf_with_weasyprint(text_content):
html_body = get_base_html_for_conversion(text_content)
reshaped_footer = FOOTER_TEXT
full_html = f"""
<!DOCTYPE html><html lang="fa"><head><meta charset="UTF-8"><title>Exported PDF</title>
<style>
@font-face {{ font-family: 'Vazir'; src: url('{FONT_FILE_NAME}'); }}
body {{ font-family: 'Vazir', sans-serif; font-size: 12pt; line-height: 1.8; color: #111; }}
h1 {{ font-size: 24pt; color: #0d5c75; border-bottom: 2px solid #0d5c75; padding-bottom: 5px; margin-top: 30px; margin-bottom: 15px; }}
h2 {{ font-size: 20pt; color: #1a7b9c; margin-top: 25px; margin-bottom: 10px; }}
h3 {{ font-size: 16pt; color: #222; margin-top: 20px; }}
h4, h5, h6 {{ font-size: 14pt; color: #333; font-weight: bold; }}
strong, b {{ font-weight: bold; color: #000; }}
em, i {{ font-style: italic; color: #444; }}
blockquote {{ border-right: 4px solid #1095c1; margin: 15px 0; padding: 10px 15px 10px 0; color: #555; background-color: #f7fbff; border-radius: 4px; }}
code {{ font-family: monospace; background-color: #f1f1f1; padding: 2px 6px; border-radius: 4px; font-size: 11pt; direction: ltr; display: inline-block; color: #c7254e; word-wrap: break-word; }}
pre {{ background-color: #f8f9fa; padding: 15px; border-radius: 6px; border: 1px solid #e1e1e8; direction: ltr; text-align: left; overflow-x: auto; white-space: pre-wrap; }}
pre code {{ background-color: transparent; padding: 0; color: #333; }}
table {{ border-collapse: collapse; width: 100%; margin-top: 20px; margin-bottom: 20px; }}
th, td {{ border: 1px solid #ccc; padding: 10px; text-align: right; }}
th {{ background-color: #e9ecef; font-weight: bold; }}
p {{ margin: 0 0 12px 0; padding: 0; text-align: justify; }}
.footer {{ margin-top: 50px; padding-top: 15px; border-top: 2px solid #1095c1; text-align: center; color: #1095c1; font-size: 11pt; font-weight: bold; font-family: 'Vazir', sans-serif; page-break-inside: avoid; }}
</style></head><body>{html_body}
<div class="footer">{reshaped_footer}</div></body></html>
"""
try:
html = HTML(string=full_html, base_url=BASE_DIR)
return io.BytesIO(html.write_pdf())
except Exception as e:
print(f"🔥🔥🔥 WEASYPRINT FAILED! 🔥🔥🔥\n{e}")
# در صورت شکست، به جای ارور دادن، یک خروجی ساده برمی‌گرداند
return create_txt("خطا در ساخت PDF به دلیل پیچیدگی متن. لطفا از فایل DOCX استفاده کنید.\n\n" + text_content)
def create_txt(text_content):
full_content = f"{text_content}\n\n\n---\n{FOOTER_TEXT}"
return io.BytesIO(full_content.encode('utf-8'))
def create_html(text_content):
html_body = get_base_html_for_conversion(text_content)
reshaped_footer = FOOTER_TEXT
full_html = f"""
<!DOCTYPE html><html lang="fa"><head><meta charset="UTF-8"><title>Exported File</title>
<style>
body {{ font-size: 12pt; line-height: 1.8; max-width: 800px; margin: 2rem auto; padding: 2rem; border: 1px solid #ddd; font-family: sans-serif; direction: rtl; color: #222; }}
h1 {{ color: #0d5c75; border-bottom: 2px solid #ddd; padding-bottom: 5px; }}
h2 {{ color: #1a7b9c; }}
blockquote {{ border-right: 4px solid #1095c1; margin: 0; padding-right: 15px; background: #f9f9f9; padding: 10px; }}
code {{ font-family: monospace; background-color: #f4f4f4; padding: 2px 5px; border-radius: 4px; direction: ltr; display: inline-block; color: #c7254e; word-wrap: break-word; }}
pre {{ background-color: #f4f4f4; padding: 15px; border-radius: 5px; direction: ltr; text-align: left; overflow-x: auto; white-space: pre-wrap; }}
table {{ border-collapse: collapse; width: 100%; }}
th, td {{ border: 1px solid #ddd; padding: 8px; }}
th {{ background-color: #f2f2f2; }}
p {{ margin: 0 0 10px 0; text-align: justify; }}
.footer {{ margin-top: 3rem; padding-top: 1rem; border-top: 2px solid #1095c1; text-align: center; color: #1095c1; font-weight: bold; font-size: 11pt; }}
</style></head><body>{html_body}
<div class="footer">{reshaped_footer}</div></body></html>
"""
return io.BytesIO(full_html.encode('utf-8'))
def process_request(content, file_format):
actions = {'pdf': create_pdf_with_weasyprint, 'docx': create_docx, 'html': create_html, 'txt': create_txt}
buffer_func = actions.get(file_format, create_txt)
buffer = buffer_func(content)
mimetypes = {'pdf': 'application/pdf', 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'html': 'text/html', 'txt': 'text/plain'}
mimetype = mimetypes.get(file_format, 'text/plain')
filename = f'export.{file_format}'
return send_file(buffer, as_attachment=True, download_name=filename, mimetype=mimetype)
@app.route('/', methods=['GET', 'POST', 'HEAD'])
def index():
if request.method == 'HEAD':
return '', 200
if request.method == 'POST':
content = request.form.get('content')
file_format = request.form.get('format', 'txt').lower()
if not content:
return "لطفا متنی برای تبدیل وارد کنید.", 400
return process_request(content, file_format)
return render_template('index.html')
if __name__ == '__main__':
app.run(debug=False, host='0.0.0.0', port=7860)