import json import os import re from deep_translator import GoogleTranslator def process_json_files(start, end, step, length=0, tlang="en", spaces_include=False, strip_in_braces=True, strip_diacritics=True): base_path = "texts" translator = GoogleTranslator(source='auto', target=tlang) results = [] for i in range(start, end + 1): file_name = f"{base_path}/{i:02}.json" try: with open(file_name, 'r', encoding='utf-8') as file: data = json.load(file) text_blocks = data["text"] full_text = "" for block in text_blocks: full_text += ' '.join(block) clean_text = full_text if strip_in_braces: clean_text = re.sub(r"\[.*?\]", "", clean_text, flags=re.DOTALL) if strip_diacritics: clean_text = re.sub(r"[^\u05D0-\u05EA ]+", "", clean_text) if not spaces_include: clean_text = clean_text.replace(" ", "") if length != 0: selected_characters = clean_text[step - 1::step][:length] else: selected_characters = clean_text[step - 1::step] # If length is 0, select all characters from step translated_text = translator.translate(''.join(selected_characters)) if selected_characters != "": results.append({ "book": i, "title": data["title"], "original_text": selected_characters, "translated_text": translated_text }) except FileNotFoundError: results.append({"error": f"File {file_name} not found."}) except json.JSONDecodeError: results.append({"error": f"File {file_name} could not be read as JSON."}) except KeyError: results.append({"error": f"Expected key 'text' is missing in {file_name}."}) return results