import os import re def parse_markdown_files(file_paths): """ Parses markdown files to extract content for translation. :param file_paths: List of markdown file paths :return: List of dictionaries containing filename and content """ parsed_files = [] for path in file_paths: with open(path, 'r', encoding='utf-8') as f: content = f.read() parsed_files.append({'filename': path, 'content': content}) return parsed_files def extract_translatable_text(content): """ Extracts translatable text from markdown content. :param content: Markdown content :return: List of translatable text segments """ code_block_pattern = re.compile(r'```.*?```', re.DOTALL) html_block_pattern = re.compile(r'<.*?>', re.DOTALL) url_pattern = re.compile(r'\[.*?\]\(.*?\)') # Remove code blocks, HTML blocks, and URLs content = re.sub(code_block_pattern, '', content) content = re.sub(html_block_pattern, '', content) content = re.sub(url_pattern, '', content) # Extract paragraphs and headers paragraphs = re.split(r'\n\s*\n', content) return [para.strip() for para in paragraphs if para.strip()] def save_translated_files(translated_files): """ Saves translated files to the local machine. :param translated_files: List of translated file data """ for file in translated_files: directory = os.path.dirname(file['filename']) if not os.path.exists(directory): os.makedirs(directory) with open(file['filename'], 'w', encoding='utf-8') as f: f.write(file['content'])