Как преобразовать fb2 в формат html на python

У меня есть файл book.fb2 и мне нужно его преобразовать в формат .html, подскажите пожалуйста как это сделать на Python.


Ответы (1 шт):

Автор решения: vadim vaduxa

Задайте вопрос нейросети, например Grock - python fb2 to html, и вот что он вам ответит:

# -*- coding: UTF-8 -*-
import xml.etree.ElementTree as ET
import re


def fb2_to_html(fb2_file, html_file):
    try:
        # Parse the FB2 file
        tree = ET.parse(fb2_file)
        root = tree.getroot()

        # FB2 namespace
        ns = {'fb': 'http://www.gribuser.ru/xml/fictionbook/2.0'}

        # Initialize HTML content
        html_content = ['<!DOCTYPE html>', '<html lang="en">', '<head>',
                        '<meta charset="UTF-8">',
                        '<title>FB2 to HTML Conversion</title>',
                        '<style>',
                        'body { font-family: Arial, sans-serif; margin: 40px; line-height: 1.6; }',
                        'h1 { color: #333; }',
                        'h2 { color: #555; }',
                        'p { margin: 10px 0; }',
                        '</style>', '</head>', '<body>']

        # Extract book title from description (if available)
        description = root.find('fb:description/fb:title-info/fb:book-title', ns)
        if description is not None:
            book_title = ''.join(description.itertext()).strip()
            if book_title:
                html_content.append(f'<h1>{book_title}</h1>')

        # Process main body content
        for body in root.findall('.//fb:body', ns):
            # Skip bodies like notes or comments
            if body.get('name') in ['notes', 'comments']:
                continue

            # Process each section
            for section in body.findall('.//fb:section', ns):
                # Extract section title
                title = section.find('fb:title', ns)
                if title is not None:
                    title_text = ''.join(title.itertext()).strip()
                    if title_text:
                        html_content.append(f'<h2>{title_text}</h2>')

                # Extract paragraphs
                for paragraph in section.findall('.//fb:p', ns):
                    para_text = ''.join(paragraph.itertext()).strip()
                    if para_text:
                        # Escape HTML special characters
                        para_text = re.sub(r'&', '&amp;', para_text)
                        para_text = re.sub(r'<', '&lt;', para_text)
                        para_text = re.sub(r'>', '&gt;', para_text)
                        html_content.append(f'<p>{para_text}</p>')

        # Close HTML tags
        html_content.append('</body>')
        html_content.append('</html>')

        # Write to output HTML file
        with open(html_file, 'w', encoding='utf-8') as f:
            f.write('\n'.join(html_content))

        print(f"Successfully converted {fb2_file} to {html_file}")

    except ET.ParseError as e:
        print(f"Error parsing FB2 file: {e}")
    except FileNotFoundError:
        print(f"File {fb2_file} not found")
    except Exception as e:
        print(f"An error occurred: {e}")


# Example usage
if __name__ == "__main__":
    fb2_file = 'D:\\downloads\\Around the World in 28 Languages.fb2'  # Replace with your FB2 file path
    html_file = 'output.html'  # Replace with desired output file path
    fb2_to_html(fb2_file, html_file)
→ Ссылка