Spaces:

NHLOCAL
/

is-this-bible

Runtime error

App Files Files Community

NHLOCAL commited on Mar 17, 2024

Commit

0e6fd2c

1 Parent(s): edc87b4

add talmud

Browse files

Files changed (5) hide show

app.py +6 -4
data_creation/text_identification_model.pkl → bible_or_talmud_model.pkl +0 -0
data_creation/text_identification_vectorizer.pkl → bible_or_talmud_vectorizer.pkl +0 -0
try_model.py +0 -74
data_creation/try_model.py → try_talmud_or_bible.py +0 -0

app.py CHANGED Viewed

@@ -6,15 +6,17 @@ import joblib
 nltk.download('punkt')
 # Load the trained model and vectorizer outside the function for better performance
-loaded_classifier = joblib.load("is_this_bible_model.pkl")
-vectorizer = joblib.load("is_this_bible_vectorizer.pkl")
 def parse_text(new_text):
     new_text_tfidf = vectorizer.transform([new_text])
     prediction = loaded_classifier.predict(new_text_tfidf)
     probabilities = loaded_classifier.predict_proba(new_text_tfidf)
-    confidence_score = probabilities[0, 1]
-    return 'תנ"ך' if prediction[0] == 1 else 'אחר', confidence_score
 iface = gr.Interface(fn=parse_text, inputs="text", outputs=["text", "number"], title='גילוי פסוקי התנ"ך באמצעות AI', description='הזן טקסט כדי לסווג אם הוא מהתנ"ך או לא.')
 iface.launch()

 nltk.download('punkt')
 # Load the trained model and vectorizer outside the function for better performance
+loaded_classifier = joblib.load("bible_or_talmud_model.pkl")
+vectorizer = joblib.load("bible_or_talmud_vectorizer.pkl")
 def parse_text(new_text):
     new_text_tfidf = vectorizer.transform([new_text])
     prediction = loaded_classifier.predict(new_text_tfidf)
     probabilities = loaded_classifier.predict_proba(new_text_tfidf)
+    confidence_score = max(probabilities[0])
+    labels = {0: 'אחר', 1: 'תנ"ך', 2: 'תלמוד בבלי'}
+    predicted_label = labels[prediction[0]]
+    return predicted_label, confidence_score
 iface = gr.Interface(fn=parse_text, inputs="text", outputs=["text", "number"], title='גילוי פסוקי התנ"ך באמצעות AI', description='הזן טקסט כדי לסווג אם הוא מהתנ"ך או לא.')
 iface.launch()

data_creation/text_identification_model.pkl → bible_or_talmud_model.pkl RENAMED Viewed

File without changes

data_creation/text_identification_vectorizer.pkl → bible_or_talmud_vectorizer.pkl RENAMED Viewed

File without changes

try_model.py DELETED Viewed

@@ -1,74 +0,0 @@
-from sys import argv
-#import re
-import nltk
-from nltk.corpus import stopwords
-import joblib
-"""
-# Remove punctuation and special characters
-def remove_punctuation(text):
-    return re.sub(r'[^\w\s]', '', text)
-# Function to remove custom stop words from text
-def remove_custom_stopwords(text):
-    hebrew_stopwords = set(stopwords.words('hebrew'))
-    additional_stopwords = {'אני', 'אתה', 'את', 'אנחנו', 'אתם', 'אתן', 'הם', 'הן'}
-    hebrew_stopwords.update(additional_stopwords)
-    return ' '.join(word for word in text.split() if word not in hebrew_stopwords)
-# Preprocess the new text (remove punctuation and custom stop words)
-# אם רוצים להחזיר את הפונקצייה הלא פעילה יש להעביר את המשתנה אחרי המשתנה new_text
-new_text_cleaned = remove_custom_stopwords(remove_punctuation(new_text))
-"""
-# Load the trained model from the file
-loaded_classifier = joblib.load("is_this_bible_model.pkl")
-# Load the TF-IDF vectorizer used for training
-vectorizer = joblib.load("is_this_bible_vectorizer.pkl")
-def parse_text(new_text):
-    # Transform the new text using the TF-IDF vectorizer
-    new_text_tfidf = vectorizer.transform([new_text])
-    # Make predictions on the new text
-    prediction = loaded_classifier.predict(new_text_tfidf)
-    # Get the confidence score for the predicted class
-    probabilities = loaded_classifier.predict_proba(new_text_tfidf)
-    confidence_score = probabilities[0, 1]  # The confidence score for class "Bible" (index 1)
-    # Print the prediction and the confidence score
-    print(f"Text: {new_text} | Prediction: {'Bible' if prediction[0] == 1 else 'Other'} | Confidence Score: {confidence_score:.4f}")
-text_list = [
-'אני יושב פה בשקט ומקלל את העובדה שחלק מהתוכנות שאני מתחזק קשורה לפייתון 2.4, שאין לה את זה',
-'כמה יפה ונאה כששומעים השירה שלהם',
-'והיה בעת ההיא אחפש את ירושלים בנרות והודעתיה את כל תועבותיה',
-'והיא שעמדה לאבותינו ולנו שלא אחד בלבד עמד עלינו לכלותינו',
-'אני הסתכלתי לשמים אתה צללת במים',
-'הצב הוא בעל חיים שחי בים וביבשה',
-'והיה הנשאר בציון והנותר בירושלים קדוש יאמר לו',
-'שיר השירים אשר לשלמה',
-'ישקני מנשיקות פיהו כי טובים דודיך מיין',
-'והיה רק מלא שמחה וחדוה תמיד כשהיה גומר המנעל ומן הסתם היה לו שלשה קצוות',
-'זה מעשה שלו וזה מעשה שלי ועוד מה לנו לדבר מאחרים',
-'דודי ירד לגנו לערוגות הבושם לרעות בגנים וללקוט שושנים',
-'וימרו בי בית ישראל במדבר בחקותי לא הלכו ואת משפטי מאסו אשר יעשה אתם האדם וחי בהם',
-'זה לא משנה אופניים נעליים העיקר זה בחיים',
-'זכור את יום השבת לקדשו',
-'וישלח יעקב מלאכים לפניו אל עשיו אחיו',
-'לך לך מארצך וממולדתך ומבית אביך',
-'עדכון :דור לדור תנ"ך ,מאורעות בזמן התנ"ך קרדיט']
-if argv[1:]:
-    new_text = argv[1]
-    parse_text(new_text)
-else:
-    for new_text in text_list:
-        parse_text(new_text)

data_creation/try_model.py → try_talmud_or_bible.py RENAMED Viewed

File without changes