Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
2 |
from transformers import (
|
3 |
AutoTokenizer,
|
4 |
AutoModelForCausalLM,
|
@@ -6,9 +9,6 @@ from transformers import (
|
|
6 |
T5ForConditionalGeneration,
|
7 |
T5Tokenizer
|
8 |
)
|
9 |
-
import torch
|
10 |
-
import pandas as pd
|
11 |
-
import json
|
12 |
|
13 |
class FinancialAnalyzer:
|
14 |
def __init__(self):
|
@@ -22,10 +22,6 @@ class FinancialAnalyzer:
|
|
22 |
self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
|
23 |
self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
|
24 |
|
25 |
-
# Initialize T5
|
26 |
-
self.t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
27 |
-
self.t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
|
28 |
-
|
29 |
self.device = "cpu"
|
30 |
self._move_models_to_device()
|
31 |
print("Models loaded successfully!")
|
@@ -36,10 +32,8 @@ class FinancialAnalyzer:
|
|
36 |
def _move_models_to_device(self):
|
37 |
self.tiny_model.to(self.device)
|
38 |
self.finbert_model.to(self.device)
|
39 |
-
self.t5_model.to(self.device)
|
40 |
|
41 |
def read_file_content(self, file_path):
|
42 |
-
"""Read and process uploaded file content"""
|
43 |
if file_path is None:
|
44 |
return "No file uploaded"
|
45 |
|
@@ -59,7 +53,6 @@ class FinancialAnalyzer:
|
|
59 |
return f"Error processing file: {str(e)}"
|
60 |
|
61 |
def analyze_financial_data(self, balance_sheet_path, income_statement_path):
|
62 |
-
"""Analyze uploaded financial statements"""
|
63 |
try:
|
64 |
# Read file contents
|
65 |
balance_sheet = self.read_file_content(balance_sheet_path)
|
@@ -72,10 +65,10 @@ class FinancialAnalyzer:
|
|
72 |
prompt = f"""<human>Analyze these financial statements:
|
73 |
|
74 |
Balance Sheet:
|
75 |
-
{balance_sheet[:
|
76 |
|
77 |
Income Statement:
|
78 |
-
{income_statement[:
|
79 |
|
80 |
Provide:
|
81 |
1. Key financial metrics
|
@@ -85,16 +78,25 @@ class FinancialAnalyzer:
|
|
85 |
|
86 |
<assistant>Here's my analysis:"""
|
87 |
|
88 |
-
#
|
89 |
-
inputs = self.tiny_tokenizer(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
outputs = self.tiny_model.generate(
|
91 |
inputs["input_ids"],
|
92 |
-
|
93 |
temperature=0.7,
|
94 |
top_p=0.95,
|
95 |
-
|
96 |
-
|
97 |
)
|
|
|
|
|
98 |
analysis = self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
99 |
|
100 |
# Generate sentiment
|
@@ -103,7 +105,8 @@ class FinancialAnalyzer:
|
|
103 |
# Format results
|
104 |
results = {
|
105 |
"Analysis": analysis,
|
106 |
-
"Sentiment": sentiment
|
|
|
107 |
}
|
108 |
|
109 |
return json.dumps(results, indent=2)
|
@@ -114,10 +117,17 @@ class FinancialAnalyzer:
|
|
114 |
def analyze_sentiment(self, balance_sheet, income_statement):
|
115 |
try:
|
116 |
text = f"{balance_sheet[:500]}\n{income_statement[:500]}"
|
117 |
-
inputs = self.finbert_tokenizer(
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
outputs = self.finbert_model(**inputs)
|
119 |
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
|
120 |
labels = ['negative', 'neutral', 'positive']
|
|
|
121 |
return {
|
122 |
'sentiment': labels[probs.argmax().item()],
|
123 |
'confidence': f"{probs.max().item():.2f}"
|
@@ -133,11 +143,11 @@ def create_interface():
|
|
133 |
inputs=[
|
134 |
gr.File(
|
135 |
label="Upload Balance Sheet (CSV, Excel, or Markdown)",
|
136 |
-
type="filepath"
|
137 |
),
|
138 |
gr.File(
|
139 |
label="Upload Income Statement (CSV, Excel, or Markdown)",
|
140 |
-
type="filepath"
|
141 |
)
|
142 |
],
|
143 |
outputs=gr.Textbox(
|
|
|
1 |
import gradio as gr
|
2 |
+
import torch
|
3 |
+
import pandas as pd
|
4 |
+
import json
|
5 |
from transformers import (
|
6 |
AutoTokenizer,
|
7 |
AutoModelForCausalLM,
|
|
|
9 |
T5ForConditionalGeneration,
|
10 |
T5Tokenizer
|
11 |
)
|
|
|
|
|
|
|
12 |
|
13 |
class FinancialAnalyzer:
|
14 |
def __init__(self):
|
|
|
22 |
self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
|
23 |
self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
|
24 |
|
|
|
|
|
|
|
|
|
25 |
self.device = "cpu"
|
26 |
self._move_models_to_device()
|
27 |
print("Models loaded successfully!")
|
|
|
32 |
def _move_models_to_device(self):
|
33 |
self.tiny_model.to(self.device)
|
34 |
self.finbert_model.to(self.device)
|
|
|
35 |
|
36 |
def read_file_content(self, file_path):
|
|
|
37 |
if file_path is None:
|
38 |
return "No file uploaded"
|
39 |
|
|
|
53 |
return f"Error processing file: {str(e)}"
|
54 |
|
55 |
def analyze_financial_data(self, balance_sheet_path, income_statement_path):
|
|
|
56 |
try:
|
57 |
# Read file contents
|
58 |
balance_sheet = self.read_file_content(balance_sheet_path)
|
|
|
65 |
prompt = f"""<human>Analyze these financial statements:
|
66 |
|
67 |
Balance Sheet:
|
68 |
+
{balance_sheet[:800]}
|
69 |
|
70 |
Income Statement:
|
71 |
+
{income_statement[:800]}
|
72 |
|
73 |
Provide:
|
74 |
1. Key financial metrics
|
|
|
78 |
|
79 |
<assistant>Here's my analysis:"""
|
80 |
|
81 |
+
# Tokenize input
|
82 |
+
inputs = self.tiny_tokenizer(
|
83 |
+
prompt,
|
84 |
+
return_tensors="pt",
|
85 |
+
truncation=True,
|
86 |
+
max_length=2048
|
87 |
+
).to(self.device)
|
88 |
+
|
89 |
+
# Generate response
|
90 |
outputs = self.tiny_model.generate(
|
91 |
inputs["input_ids"],
|
92 |
+
do_sample=True,
|
93 |
temperature=0.7,
|
94 |
top_p=0.95,
|
95 |
+
repetition_penalty=1.2,
|
96 |
+
max_length=2048 # Total length including input
|
97 |
)
|
98 |
+
|
99 |
+
# Decode response
|
100 |
analysis = self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
101 |
|
102 |
# Generate sentiment
|
|
|
105 |
# Format results
|
106 |
results = {
|
107 |
"Analysis": analysis,
|
108 |
+
"Sentiment": sentiment,
|
109 |
+
"Note": "Analysis based on financial statements from 2021-2025"
|
110 |
}
|
111 |
|
112 |
return json.dumps(results, indent=2)
|
|
|
117 |
def analyze_sentiment(self, balance_sheet, income_statement):
|
118 |
try:
|
119 |
text = f"{balance_sheet[:500]}\n{income_statement[:500]}"
|
120 |
+
inputs = self.finbert_tokenizer(
|
121 |
+
text,
|
122 |
+
return_tensors="pt",
|
123 |
+
truncation=True,
|
124 |
+
max_length=512
|
125 |
+
).to(self.device)
|
126 |
+
|
127 |
outputs = self.finbert_model(**inputs)
|
128 |
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
|
129 |
labels = ['negative', 'neutral', 'positive']
|
130 |
+
|
131 |
return {
|
132 |
'sentiment': labels[probs.argmax().item()],
|
133 |
'confidence': f"{probs.max().item():.2f}"
|
|
|
143 |
inputs=[
|
144 |
gr.File(
|
145 |
label="Upload Balance Sheet (CSV, Excel, or Markdown)",
|
146 |
+
type="filepath"
|
147 |
),
|
148 |
gr.File(
|
149 |
label="Upload Income Statement (CSV, Excel, or Markdown)",
|
150 |
+
type="filepath"
|
151 |
)
|
152 |
],
|
153 |
outputs=gr.Textbox(
|