Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -123,14 +123,14 @@ def main():
|
|
123 |
result2 = re.sub(r'[^\w\s]','',result1)
|
124 |
result.append(result2)
|
125 |
|
126 |
-
|
127 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
|
128 |
|
129 |
model_path = "checkpoint-2850"
|
130 |
|
131 |
model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
|
132 |
|
133 |
-
|
134 |
pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
|
135 |
for sent in result:
|
136 |
pred = pipe1(sent)
|
@@ -138,8 +138,8 @@ def main():
|
|
138 |
if lab['label'] == 'causal': #causal
|
139 |
causal_sents.append(sent)
|
140 |
|
141 |
-
|
142 |
-
|
143 |
|
144 |
model_name = "distilbert-base-cased"
|
145 |
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
|
@@ -148,7 +148,7 @@ def main():
|
|
148 |
|
149 |
model = DistilBertForTokenClassification.from_pretrained(model_path1) #len(unique_tags),, num_labels= 7, , id2label={0:'CT',1:'E',2:'C',3:'O'}
|
150 |
pipe = pipeline('ner', model=model, tokenizer=tokenizer,aggregation_strategy='simple') #grouped_entities=True
|
151 |
-
|
152 |
sentence_pred = []
|
153 |
class_list = []
|
154 |
entity_list = []
|
@@ -161,8 +161,8 @@ def main():
|
|
161 |
class_list.append(i['word'])
|
162 |
entity_list.append(i['entity_group'])
|
163 |
|
164 |
-
|
165 |
-
|
166 |
|
167 |
# filename = 'Checkpoint-classification.sav'
|
168 |
# loaded_model = pickle.load(open(filename, 'rb'))
|
@@ -190,8 +190,8 @@ def main():
|
|
190 |
predictions = loaded_model.predict(pad_sequences(tokenizer.texts_to_sequences(class_list),maxlen=MAX_SEQUENCE_LENGTH))
|
191 |
predicted = np.argmax(predictions,axis=1)
|
192 |
|
193 |
-
|
194 |
-
|
195 |
pred1 = predicted
|
196 |
level0 = []
|
197 |
count =0
|
|
|
123 |
result2 = re.sub(r'[^\w\s]','',result1)
|
124 |
result.append(result2)
|
125 |
|
126 |
+
st.write("--- %s seconds ---" % (time.time() - start_time))
|
127 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
|
128 |
|
129 |
model_path = "checkpoint-2850"
|
130 |
|
131 |
model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
|
132 |
|
133 |
+
st.write('sequence classification loaded')
|
134 |
pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
|
135 |
for sent in result:
|
136 |
pred = pipe1(sent)
|
|
|
138 |
if lab['label'] == 'causal': #causal
|
139 |
causal_sents.append(sent)
|
140 |
|
141 |
+
st.write('causal sentence classification finished')
|
142 |
+
st.write("--- %s seconds ---" % (time.time() - start_time))
|
143 |
|
144 |
model_name = "distilbert-base-cased"
|
145 |
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
|
|
|
148 |
|
149 |
model = DistilBertForTokenClassification.from_pretrained(model_path1) #len(unique_tags),, num_labels= 7, , id2label={0:'CT',1:'E',2:'C',3:'O'}
|
150 |
pipe = pipeline('ner', model=model, tokenizer=tokenizer,aggregation_strategy='simple') #grouped_entities=True
|
151 |
+
st.write('DistilBERT loaded')
|
152 |
sentence_pred = []
|
153 |
class_list = []
|
154 |
entity_list = []
|
|
|
161 |
class_list.append(i['word'])
|
162 |
entity_list.append(i['entity_group'])
|
163 |
|
164 |
+
st.write('causality extraction finished')
|
165 |
+
st.write("--- %s seconds ---" % (time.time() - start_time))
|
166 |
|
167 |
# filename = 'Checkpoint-classification.sav'
|
168 |
# loaded_model = pickle.load(open(filename, 'rb'))
|
|
|
190 |
predictions = loaded_model.predict(pad_sequences(tokenizer.texts_to_sequences(class_list),maxlen=MAX_SEQUENCE_LENGTH))
|
191 |
predicted = np.argmax(predictions,axis=1)
|
192 |
|
193 |
+
st.write('stakeholder taxonomy finished')
|
194 |
+
st.write("--- %s seconds ---" % (time.time() - start_time))
|
195 |
pred1 = predicted
|
196 |
level0 = []
|
197 |
count =0
|