Spaces:

sasank-229
/

major_project

Runtime error

App Files Files Community

sasank-229 commited on Mar 16, 2024

Commit

11bce6b

verified ·

1 Parent(s): b44c0fd

Upload 2 files

Browse files

Files changed (2) hide show

app.py +130 -0
tempCodeRunnerFile.py +1 -0

app.py ADDED Viewed

	@@ -0,0 +1,130 @@

+from flask import Flask, request, render_template,url_for, current_app, abort
+from tqdm import tqdm
+import numpy as np
+# import nbformat
+# from nbconvert import PythonExporter
+# import os
+import torch
+from transformers import AutoModel,AutoTokenizer
+import pickle
+from xgboost import XGBClassifier
+app = Flask(__name__)
+# Load the model during the application startup
+# @before_first_request
+def load_model():
+    try:
+        with open('static/ipynbFiles/classifier2.pkl', 'rb') as file:
+            current_app.clf = pickle.load(file)
+    except Exception as e:
+        print(f"Error loading model: {str(e)}")
+        abort(500)  # Internal Server Error
+app.before_first_request(load_model)
+def model_extract(input_string):
+    param ={'maxLen' :256,}
+    model = AutoModel.from_pretrained("ai4bharat/indic-bert")
+    tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")
+    def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.0):
+        padded_sequences = []
+        for seq in sequences:
+            if padding == 'pre':
+                padded_seq = np.pad(seq, (maxlen - len(seq), 0), 'constant', constant_values=value)
+            elif padding == 'post':
+                padded_seq = np.pad(seq, (0, maxlen - len(seq)), 'constant', constant_values=value)
+            else:
+                raise ValueError("Padding should be 'pre' or 'post'.")
+            if truncating == 'pre':
+                padded_seq = padded_seq[-maxlen:]
+            elif truncating == 'post':
+                padded_seq = padded_seq[:maxlen]
+            else:
+                raise ValueError("Truncating should be 'pre' or 'post'.")
+            padded_sequences.append(padded_seq)
+        return np.array(padded_sequences, dtype=dtype)
+    def create_attention_masks(input_ids):
+        attention_masks = []
+        for seq in tqdm(input_ids):
+            seq_mask = [float(i>0) for i in seq]
+            attention_masks.append(seq_mask)
+        return np.array(attention_masks)
+    def getFeaturesandLabel(single_string, label):
+        # Wrap the single string in a list
+        sentences = ["[CLS] " + single_string + " [SEP]"]
+        # Tokenize and preprocess
+        tokenizer_texts = list(map(lambda t: tokenizer.tokenize(t)[:512], tqdm(sentences)))
+        input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tqdm(tokenizer_texts)]
+        # Pad sequences and create attention masks
+        input_ids = pad_sequences(sequences=input_ids, maxlen=param['maxLen'], dtype='long', padding='post', truncating='post')
+        attention_masks_data = create_attention_masks(input_ids)
+        # Convert to torch tensors
+        X_data = torch.tensor(input_ids)
+        attention_masks_data = torch.tensor(attention_masks_data)
+        y_data = torch.tensor(label)
+        return X_data, attention_masks_data, y_data
+    text_input=input_string
+    label_input = [0]
+    X_data, attention_masks_data, y_data = getFeaturesandLabel(text_input, label_input)
+    return X_data
+# def model_heart():
+#     # Path to the notebook file
+#     notebook_path = os.path.join('static', 'ipynbFiles', 'trail.ipynb')
+#     # Read the notebook content
+#     with open(notebook_path, 'r', encoding='utf-8') as notebook_file:
+#         notebook_content = nbformat.read(notebook_file, as_version=4)
+#     # Create a PythonExporter
+#     python_exporter = PythonExporter()
+#     # Convert the notebook to a Python script
+#     python_script, _ = python_exporter.from_notebook_node(notebook_content)
+#     print(python_script)
+#     # Execute the Python script
+#     exec(python_script)
+# model_heart()
+# Now you can use the variables and functions defined in the notebook in your app.py
+from tempCodeRunnerFile import match
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/predict' ,methods=['POST','GET'])
+def predict():
+    input_string=request.form['text']
+    print('text: ',input_string)
+    with open('static/ipynbFiles/classifier_10epochs_updated.pkl','rb') as file:
+        clf=pickle.load(file)
+    if any(c in input_string for c in match):
+        prediction = [0]
+    else:
+        ans=model_extract(input_string)
+        print('torch.tensor variable: ',ans)
+        prediction = clf.predict(ans)
+    print('prediction=',prediction)
+    if prediction==[0]:
+        return render_template('index.html', pred='Cyberbullying Text', question='వాక్యం -   '+input_string)
+    else:
+        return render_template('index.html', pred='Non-Cyberbullying Text', question='వాక్యం -   '+input_string)
+if __name__ == "__main__":
+    app.run(debug=True,port=8001)
+#for creating a pickle file:
+#   with open('classifier.pkl','wb') as file:
+#       pickle.dump(xgb, file)

tempCodeRunnerFile.py ADDED Viewed

	@@ -0,0 +1 @@

+ match=["సచ్చినోడ","పప్పు నాయుడు","నీచుడు","యెడవా","పనికిరాణి వాడు","దున్నపోతు","పిచ్చి","దరిద్రుడు","దొంగ","దోచేసాడు","సైకో","లపాకి","కొజ్జ","ముండ","ఎదవ","అడుక్కుతిను","దద్దమ్మ","సిగ్గులేదా","ఎర్రిపుకు","సన్నాసి","పోరంబోకు"]