Как преобразовать fb2 в формат html на python
У меня есть файл book.fb2 и мне нужно его преобразовать в формат .html, подскажите пожалуйста как это сделать на Python.
Ответы (1 шт):
Автор решения: vadim vaduxa
→ Ссылка
Задайте вопрос нейросети, например Grock - python fb2 to html, и вот что он вам ответит:
# -*- coding: UTF-8 -*-
import xml.etree.ElementTree as ET
import re
def fb2_to_html(fb2_file, html_file):
try:
# Parse the FB2 file
tree = ET.parse(fb2_file)
root = tree.getroot()
# FB2 namespace
ns = {'fb': 'http://www.gribuser.ru/xml/fictionbook/2.0'}
# Initialize HTML content
html_content = ['<!DOCTYPE html>', '<html lang="en">', '<head>',
'<meta charset="UTF-8">',
'<title>FB2 to HTML Conversion</title>',
'<style>',
'body { font-family: Arial, sans-serif; margin: 40px; line-height: 1.6; }',
'h1 { color: #333; }',
'h2 { color: #555; }',
'p { margin: 10px 0; }',
'</style>', '</head>', '<body>']
# Extract book title from description (if available)
description = root.find('fb:description/fb:title-info/fb:book-title', ns)
if description is not None:
book_title = ''.join(description.itertext()).strip()
if book_title:
html_content.append(f'<h1>{book_title}</h1>')
# Process main body content
for body in root.findall('.//fb:body', ns):
# Skip bodies like notes or comments
if body.get('name') in ['notes', 'comments']:
continue
# Process each section
for section in body.findall('.//fb:section', ns):
# Extract section title
title = section.find('fb:title', ns)
if title is not None:
title_text = ''.join(title.itertext()).strip()
if title_text:
html_content.append(f'<h2>{title_text}</h2>')
# Extract paragraphs
for paragraph in section.findall('.//fb:p', ns):
para_text = ''.join(paragraph.itertext()).strip()
if para_text:
# Escape HTML special characters
para_text = re.sub(r'&', '&', para_text)
para_text = re.sub(r'<', '<', para_text)
para_text = re.sub(r'>', '>', para_text)
html_content.append(f'<p>{para_text}</p>')
# Close HTML tags
html_content.append('</body>')
html_content.append('</html>')
# Write to output HTML file
with open(html_file, 'w', encoding='utf-8') as f:
f.write('\n'.join(html_content))
print(f"Successfully converted {fb2_file} to {html_file}")
except ET.ParseError as e:
print(f"Error parsing FB2 file: {e}")
except FileNotFoundError:
print(f"File {fb2_file} not found")
except Exception as e:
print(f"An error occurred: {e}")
# Example usage
if __name__ == "__main__":
fb2_file = 'D:\\downloads\\Around the World in 28 Languages.fb2' # Replace with your FB2 file path
html_file = 'output.html' # Replace with desired output file path
fb2_to_html(fb2_file, html_file)