Spaces:
Runtime error
Runtime error
add talmud
Browse files
app.py
CHANGED
@@ -6,15 +6,17 @@ import joblib
|
|
6 |
nltk.download('punkt')
|
7 |
|
8 |
# Load the trained model and vectorizer outside the function for better performance
|
9 |
-
loaded_classifier = joblib.load("
|
10 |
-
vectorizer = joblib.load("
|
11 |
|
12 |
def parse_text(new_text):
|
13 |
new_text_tfidf = vectorizer.transform([new_text])
|
14 |
prediction = loaded_classifier.predict(new_text_tfidf)
|
15 |
probabilities = loaded_classifier.predict_proba(new_text_tfidf)
|
16 |
-
confidence_score = probabilities[0
|
17 |
-
|
|
|
|
|
18 |
|
19 |
iface = gr.Interface(fn=parse_text, inputs="text", outputs=["text", "number"], title='ืืืืื ืคืกืืงื ืืชื "ื ืืืืฆืขืืช AI', description='ืืื ืืงืกื ืืื ืืกืืื ืื ืืื ืืืชื "ื ืื ืื.')
|
20 |
iface.launch()
|
|
|
6 |
nltk.download('punkt')
|
7 |
|
8 |
# Load the trained model and vectorizer outside the function for better performance
|
9 |
+
loaded_classifier = joblib.load("bible_or_talmud_model.pkl")
|
10 |
+
vectorizer = joblib.load("bible_or_talmud_vectorizer.pkl")
|
11 |
|
12 |
def parse_text(new_text):
|
13 |
new_text_tfidf = vectorizer.transform([new_text])
|
14 |
prediction = loaded_classifier.predict(new_text_tfidf)
|
15 |
probabilities = loaded_classifier.predict_proba(new_text_tfidf)
|
16 |
+
confidence_score = max(probabilities[0])
|
17 |
+
labels = {0: 'ืืืจ', 1: 'ืชื "ื', 2: 'ืชืืืื ืืืื'}
|
18 |
+
predicted_label = labels[prediction[0]]
|
19 |
+
return predicted_label, confidence_score
|
20 |
|
21 |
iface = gr.Interface(fn=parse_text, inputs="text", outputs=["text", "number"], title='ืืืืื ืคืกืืงื ืืชื "ื ืืืืฆืขืืช AI', description='ืืื ืืงืกื ืืื ืืกืืื ืื ืืื ืืืชื "ื ืื ืื.')
|
22 |
iface.launch()
|
data_creation/text_identification_model.pkl โ bible_or_talmud_model.pkl
RENAMED
File without changes
|
data_creation/text_identification_vectorizer.pkl โ bible_or_talmud_vectorizer.pkl
RENAMED
File without changes
|
try_model.py
DELETED
@@ -1,74 +0,0 @@
|
|
1 |
-
from sys import argv
|
2 |
-
#import re
|
3 |
-
import nltk
|
4 |
-
from nltk.corpus import stopwords
|
5 |
-
import joblib
|
6 |
-
|
7 |
-
|
8 |
-
"""
|
9 |
-
# Remove punctuation and special characters
|
10 |
-
def remove_punctuation(text):
|
11 |
-
return re.sub(r'[^\w\s]', '', text)
|
12 |
-
|
13 |
-
# Function to remove custom stop words from text
|
14 |
-
def remove_custom_stopwords(text):
|
15 |
-
hebrew_stopwords = set(stopwords.words('hebrew'))
|
16 |
-
additional_stopwords = {'ืื ื', 'ืืชื', 'ืืช', 'ืื ืื ื', 'ืืชื', 'ืืชื', 'ืื', 'ืื'}
|
17 |
-
hebrew_stopwords.update(additional_stopwords)
|
18 |
-
return ' '.join(word for word in text.split() if word not in hebrew_stopwords)
|
19 |
-
|
20 |
-
|
21 |
-
# Preprocess the new text (remove punctuation and custom stop words)
|
22 |
-
# ืื ืจืืฆืื ืืืืืืจ ืืช ืืคืื ืงืฆืืื ืืื ืคืขืืื ืืฉ ืืืขืืืจ ืืช ืืืฉืชื ื ืืืจื ืืืฉืชื ื new_text
|
23 |
-
new_text_cleaned = remove_custom_stopwords(remove_punctuation(new_text))
|
24 |
-
"""
|
25 |
-
|
26 |
-
|
27 |
-
# Load the trained model from the file
|
28 |
-
loaded_classifier = joblib.load("is_this_bible_model.pkl")
|
29 |
-
|
30 |
-
# Load the TF-IDF vectorizer used for training
|
31 |
-
vectorizer = joblib.load("is_this_bible_vectorizer.pkl")
|
32 |
-
|
33 |
-
def parse_text(new_text):
|
34 |
-
# Transform the new text using the TF-IDF vectorizer
|
35 |
-
new_text_tfidf = vectorizer.transform([new_text])
|
36 |
-
|
37 |
-
# Make predictions on the new text
|
38 |
-
prediction = loaded_classifier.predict(new_text_tfidf)
|
39 |
-
|
40 |
-
# Get the confidence score for the predicted class
|
41 |
-
probabilities = loaded_classifier.predict_proba(new_text_tfidf)
|
42 |
-
confidence_score = probabilities[0, 1] # The confidence score for class "Bible" (index 1)
|
43 |
-
|
44 |
-
# Print the prediction and the confidence score
|
45 |
-
print(f"Text: {new_text} | Prediction: {'Bible' if prediction[0] == 1 else 'Other'} | Confidence Score: {confidence_score:.4f}")
|
46 |
-
|
47 |
-
|
48 |
-
text_list = [
|
49 |
-
'ืื ื ืืืฉื ืคื ืืฉืงื ืืืงืื ืืช ืืขืืืื ืฉืืืง ืืืชืืื ืืช ืฉืื ื ืืชืืืง ืงืฉืืจื ืืคืืืชืื 2.4, ืฉืืื ืื ืืช ืื',
|
50 |
-
'ืืื ืืคื ืื ืื ืืฉืฉืืืขืื ืืฉืืจื ืฉืืื',
|
51 |
-
'ืืืื ืืขืช ืืืื ืืืคืฉ ืืช ืืจืืฉืืื ืื ืจืืช ืืืืืขืชืื ืืช ืื ืชืืขืืืชืื',
|
52 |
-
'ืืืื ืฉืขืืื ืืืืืชืื ื ืืื ื ืฉืื ืืื ืืืื ืขืื ืขืืื ื ืืืืืชืื ื',
|
53 |
-
'ืื ื ืืกืชืืืชื ืืฉืืื ืืชื ืฆืืืช ืืืื',
|
54 |
-
'ืืฆื ืืื ืืขื ืืืื ืฉืื ืืื ืืืืืฉื',
|
55 |
-
'ืืืื ืื ืฉืืจ ืืฆืืื ืืื ืืชืจ ืืืจืืฉืืื ืงืืืฉ ืืืืจ ืื',
|
56 |
-
'ืฉืืจ ืืฉืืจืื ืืฉืจ ืืฉืืื',
|
57 |
-
'ืืฉืงื ื ืื ืฉืืงืืช ืคืืื ืื ืืืืื ืืืืื ืืืื',
|
58 |
-
'ืืืื ืจืง ืืื ืฉืืื ืืืืื ืชืืื ืืฉืืื ืืืืจ ืืื ืขื ืืื ืืกืชื ืืื ืื ืฉืืฉื ืงืฆืืืช',
|
59 |
-
'ืื ืืขืฉื ืฉืื ืืื ืืขืฉื ืฉืื ืืขืื ืื ืื ื ืืืืจ ืืืืจืื',
|
60 |
-
'ืืืื ืืจื ืืื ื ืืขืจืืืืช ืืืืฉื ืืจืขืืช ืืื ืื ืืืืงืื ืฉืืฉื ืื',
|
61 |
-
'ืืืืจื ืื ืืืช ืืฉืจืื ืืืืืจ ืืืงืืชื ืื ืืืื ืืืช ืืฉืคืื ืืืกื ืืฉืจ ืืขืฉื ืืชื ืืืื ืืื ืืื',
|
62 |
-
'ืื ืื ืืฉื ื ืืืคื ืืื ื ืขืืืื ืืขืืงืจ ืื ืืืืื',
|
63 |
-
'ืืืืจ ืืช ืืื ืืฉืืช ืืงืืฉื',
|
64 |
-
'ืืืฉืื ืืขืงื ืืืืืื ืืคื ืื ืื ืขืฉืื ืืืื',
|
65 |
-
'ืื ืื ืืืจืฆื ืืืืืืืชื ืืืืืช ืืืื',
|
66 |
-
'ืขืืืื :ืืืจ ืืืืจ ืชื "ื ,ืืืืจืขืืช ืืืื ืืชื "ื ืงืจืืื']
|
67 |
-
|
68 |
-
if argv[1:]:
|
69 |
-
new_text = argv[1]
|
70 |
-
parse_text(new_text)
|
71 |
-
|
72 |
-
else:
|
73 |
-
for new_text in text_list:
|
74 |
-
parse_text(new_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data_creation/try_model.py โ try_talmud_or_bible.py
RENAMED
File without changes
|