Spaces:
Sleeping
Sleeping
from deep_translator import GoogleTranslator | |
import torahcodes.resources.func.utils as util | |
from hebrew_numbers import gematria_to_int | |
from textblob import TextBlob | |
from os import listdir | |
from os.path import isfile, join | |
import re | |
import time | |
import random | |
import os | |
import json | |
BLUE, RED, WHITE, YELLOW, MAGENTA, GREEN, END = '\33[1;94m', '\033[1;91m', '\33[1;97m', '\33[1;93m', '\033[1;35m', '\033[1;32m', '\033[0m' | |
ORANGE = '\033[1;33m' # orange | |
data_dir = "resources/texts" | |
class BibleBooks(): | |
def __init__(self): | |
self.folder = data_dir | |
self.book = {} | |
def load(self): | |
for f in listdir(self.folder): | |
if isfile(join(self.folder, f)) and f.endswith(".json"): | |
fn = f.split('.') | |
#print('Load', fn[0]) | |
with open(self.folder+f, encoding="utf-8-sig") as File: | |
self.book[fn[0]] = File.read() | |
def rawdata(self, bookname): | |
return self.book[bookname] | |
def booklist(self): | |
return list(self.book.keys()) | |
books = BibleBooks() | |
class Torah(): | |
def __init__(self): | |
self.book = '' | |
self.gcode = { | |
'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 600, | |
'k': 10, 'l': 20, 'm': 30, 'n': 40, 'o': 50, 'p': 60, 'q': 70, 'r': 80, 's': 90, | |
't': 100, 'u': 200, 'v': 700, 'w': 900, 'x': 300, 'y': 400, 'z': 500 | |
} | |
def loadbooks(self): | |
books.load() | |
def func_getnumber(self, listL, listW): | |
return util.fn_GetNumberValues(listL, listW) | |
def func_checklang(self, word, lang): | |
b = TextBlob(word) | |
try: | |
b.detect_language() | |
if (b.detect_language() == lang): | |
return True | |
except: | |
return True | |
return False | |
def numtobook(self, number): | |
for x in books.booklist(): | |
xt = re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", x) | |
if xt[0] == str(number): | |
return x | |
def func_translate(self, lang_in, lang_out, data): | |
translated = GoogleTranslator(source=lang_in, target=lang_out).translate(data.strip()) | |
return translated | |
def gematria(self, word: str) -> int: | |
try: | |
if word.isdigit(): | |
return int(word) | |
# Aufteilen des Wortes in Buchstaben und Zahlen | |
letters = [char for char in word if char.isalpha()] | |
numbers = [int(char) for char in word if char.isdigit()] | |
# Berechnen des Gematria-Werts für die Buchstaben | |
letters_value = sum([self.gcode[char] for char in letters if char in self.gcode]) | |
# Hinzufügen der Summe der Zahlen zum Gematria-Wert der Buchstaben | |
total_value = letters_value + sum(numbers) | |
return total_value | |
except: | |
print(word) | |
raise ValueError | |
def gematrix(self, phrase: str) -> int: | |
phrase = self.strip_accents(phrase.lower()) | |
phrase = ''.join([i for i in phrase if i.isalpha() or i.isdigit() or i.isspace()]) | |
# Aufteilen der Eingabe in separate Wörter und Zahlen | |
elements = phrase.split() | |
total_value = 0 | |
for element in elements: | |
if element.isalpha(): | |
# Berechne den Wert für Buchstaben | |
total_value += sum([self.gcode[char] for char in element if char in self.gcode]) | |
elif element.isdigit(): | |
# Addiere Zahlen direkt zum Gesamtwert | |
total_value += int(element) | |
return total_value | |
def strip_accents(self, s): | |
try: | |
return ''.join( | |
c for c in unicodedata.normalize('NFD', s) | |
if unicodedata.category(c) != 'Mn' | |
) | |
except: | |
return s | |
def gematria_iw_int(text): | |
return gematria_to_int(text) | |
def func_ParseTranslation(self, translated, lang, active): | |
abd = 'abcdefghijklmnñopqrstuvwxyz1234567890' | |
str_split = translated.split(' ') | |
str_final = '' | |
for word in str_split: | |
try: | |
if word[0].lower() in abd: | |
if active == 'true': | |
if self.func_checklang(word, lang) == True: | |
str_final = str_final+ word+' ' | |
else: | |
str_final = str_final+ word+' ' | |
except: | |
pass | |
if not str_final == '': | |
return str_final | |
else: | |
return 0 | |
def els(self, namebook, number, tracert='false', visualice=False): | |
space = number | |
abd = 'abcdefghijklmnñopqrstuvwxyz' | |
i=1 | |
rese="" | |
totalvalue = 0 | |
D = self.GetDataBook(namebook) | |
for (z,b,y) in D: | |
try: | |
charnum = 0 | |
res="" | |
for char in D[z,b,y]: | |
charnum = charnum+1 | |
if (i % int(space)) == 0: | |
if tracert == 'true': | |
totalvalue = totalvalue + int(charnum) | |
print('Source:',int(z),'chapter:', int(b),'Verse:', int(y),'CharNum:',int(charnum),'Char:', char) | |
res=res+char | |
i=i+1 | |
rese=rese+" "+res | |
except: | |
pass | |
#print('Total', totalvalue) | |
ret = re.sub('\s+', ' ', rese.strip()) | |
return ret, totalvalue | |
def GetDataBook(self, bibleNumberBook): | |
JSON = books.rawdata(bibleNumberBook) | |
ListOfJSONStringsParsed, ListOfJSONStringsParsedWithSpaces = util.fn_TextFilePreprocess(JSON) | |
ListOfDictsOfJSONStringsParsed, ListOfDictsOfJSONStringsParsedWithSpaces = util.fn_ConvertJSONStringsToDicts(ListOfJSONStringsParsed, ListOfJSONStringsParsedWithSpaces) | |
SearchTextChosen = util.fn_GetNumberOfTextChosen(ListOfDictsOfJSONStringsParsed) | |
ZippedTupleNoSpaces, ZippedTupleWithSpaces = util.fn_ZippedTupleCreate(ListOfDictsOfJSONStringsParsed, ListOfDictsOfJSONStringsParsedWithSpaces, SearchTextChosen) | |
D, DS = util.fn_DictionaryOfVersesCreate(ZippedTupleNoSpaces, ZippedTupleWithSpaces) | |
S, L, DL, D5, ListOfWords = util.fn_DataObjectsCreate(D, DS) | |
N, NW = util.fn_GetNumberValues(S, ListOfWords) | |
ListOfIndexesCustom = util.fn_ListOfIndexesCustomCreate(D5) | |
W = util.fn_TupleOfWordsAndGematriaValuesCreate(ListOfWords, NW) | |
return D | |
def process_json_files(start, end, step, length=0, tlang="en", spaces_include=False, strip_in_braces=True, strip_diacritics=True): | |
base_path = "resources/texts" | |
translator = GoogleTranslator(source='auto', target=tlang) | |
results = [] | |
for i in range(start, end + 1): | |
file_name = f"{base_path}/{i:02}.json" | |
try: | |
with open(file_name, 'r', encoding='utf-8') as file: | |
data = json.load(file) | |
text_blocks = data["text"] | |
full_text = "" | |
for block in text_blocks: | |
full_text += ' '.join(block) | |
clean_text = full_text | |
if strip_in_braces: | |
clean_text = re.sub(r"\[.*?\]", "", clean_text, flags=re.DOTALL) | |
if strip_diacritics: | |
clean_text = re.sub(r"[^\u05D0-\u05EA ]+", "", clean_text) | |
if not spaces_include: | |
clean_text = clean_text.replace(" ", "") | |
if length != 0: | |
selected_characters = clean_text[step - 1::step][:length] | |
else: | |
selected_characters = clean_text[step - 1::step] # If length is 0, select all characters from step | |
translated_text = translator.translate(''.join(selected_characters)) | |
if selected_characters != "": | |
results.append({ | |
"book": i, | |
"title": data["title"], | |
"original_text": selected_characters, | |
"translated_text": translated_text | |
}) | |
except FileNotFoundError: | |
results.append({"error": f"File {file_name} not found."}) | |
except json.JSONDecodeError: | |
results.append({"error": f"File {file_name} could not be read as JSON."}) | |
except KeyError: | |
results.append({"error": f"Expected key 'text' is missing in {file_name}."}) | |
return results | |