Nuthanon commited on
Commit
a57d36b
1 Parent(s): 5b6c885

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -24
app.py CHANGED
@@ -1,25 +1,32 @@
1
  import streamlit as st
2
- import openai
3
- import os
4
-
5
- # Set your OpenAI API key here or set it as an environment variable
6
- openai.api_key = os.getenv("OPENAI_API_KEY", "sk-None-RVvMry6BpMfG8KxYS6hTT3BlbkFJSOfj2bCwA1EmTRrAHo5y")
7
-
8
- # Function to classify lines using GPT-3
9
- def classify_lines_with_gpt3(text):
10
- response = openai.Completion.create(
11
- engine="text-davinci-003",
12
- prompt=f"Classify the following Finnish contract specifications into categories: Urakka sisältää: Urakka ei sisältää: Tilaajan velvoitteet:Käytäntöjen tarkennukset:Hintojen tarkennukset: Muu:.\n\n{text}\n\n",
13
- max_tokens=1024,
14
- n=1,
15
- stop=None,
16
- temperature=0.5,
17
- )
18
-
19
- classified_text = response.choices[0].text.strip()
20
- return classified_text
21
-
22
- st.title("Finnish Contract Specifications Categorizer with GPT-3")
 
 
 
 
 
 
 
23
 
24
  st.write("Enter the contract specifications in Finnish:")
25
 
@@ -28,9 +35,13 @@ contract_text = st.text_area("Contract Specifications (Finnish):", height=300)
28
 
29
  if st.button("Classify"):
30
  if contract_text:
31
- classified_text = classify_lines_with_gpt3(contract_text)
32
 
33
  st.write("Classified Contract Specifications:")
34
- st.write(classified_text)
 
 
 
 
35
  else:
36
- st.write("Please enter the contract specifications.")
 
1
  import streamlit as st
2
+ from transformers import BertTokenizer, BertForSequenceClassification
3
+ import torch
4
+ import torch.nn.functional as F
5
+
6
+ # Load the tokenizer and model
7
+ model_name = "TurkuNLP/bert-base-finnish-cased-v1"
8
+ tokenizer = BertTokenizer.from_pretrained(model_name)
9
+ model = BertForSequenceClassification.from_pretrained(model_name, num_labels=6) # Assuming 6 categories
10
+
11
+ # Define categories
12
+ categories = ["Urakka sisältää", "Urakka ei sisältää", "Tilaajan velvoitteet", "Käytäntöjen tarkennukset", "Hintojen tarkennukset", "Muu"]
13
+
14
+ # Function to classify lines
15
+ def classify_lines(text):
16
+ lines = text.split("\n")
17
+ categorized_lines = {category: [] for category in categories}
18
+
19
+ for line in lines:
20
+ if line.strip(): # Skip empty lines
21
+ inputs = tokenizer(line, return_tensors="pt", padding=True, truncation=True, max_length=512)
22
+ outputs = model(**inputs)
23
+ probs = F.softmax(outputs.logits, dim=1)
24
+ predicted_category = torch.argmax(probs, dim=1).item()
25
+ categorized_lines[categories[predicted_category]].append(line)
26
+
27
+ return categorized_lines
28
+
29
+ st.title("Finnish Contract Specifications Categorizer with TurkuNLP BERT")
30
 
31
  st.write("Enter the contract specifications in Finnish:")
32
 
 
35
 
36
  if st.button("Classify"):
37
  if contract_text:
38
+ categories = classify_lines(contract_text)
39
 
40
  st.write("Classified Contract Specifications:")
41
+
42
+ for category, lines in categories.items():
43
+ st.write(f"### {category}")
44
+ for line in lines:
45
+ st.write(f"- {line}")
46
  else:
47
+ st.write("Please enter the contract specifications.")