walaa2022 commited on
Commit
b2501de
·
verified ·
1 Parent(s): 0972f64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -21
app.py CHANGED
@@ -1,4 +1,7 @@
1
  import gradio as gr
 
 
 
2
  from transformers import (
3
  AutoTokenizer,
4
  AutoModelForCausalLM,
@@ -6,9 +9,6 @@ from transformers import (
6
  T5ForConditionalGeneration,
7
  T5Tokenizer
8
  )
9
- import torch
10
- import pandas as pd
11
- import json
12
 
13
  class FinancialAnalyzer:
14
  def __init__(self):
@@ -22,10 +22,6 @@ class FinancialAnalyzer:
22
  self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
23
  self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
24
 
25
- # Initialize T5
26
- self.t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
27
- self.t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
28
-
29
  self.device = "cpu"
30
  self._move_models_to_device()
31
  print("Models loaded successfully!")
@@ -36,10 +32,8 @@ class FinancialAnalyzer:
36
  def _move_models_to_device(self):
37
  self.tiny_model.to(self.device)
38
  self.finbert_model.to(self.device)
39
- self.t5_model.to(self.device)
40
 
41
  def read_file_content(self, file_path):
42
- """Read and process uploaded file content"""
43
  if file_path is None:
44
  return "No file uploaded"
45
 
@@ -59,7 +53,6 @@ class FinancialAnalyzer:
59
  return f"Error processing file: {str(e)}"
60
 
61
  def analyze_financial_data(self, balance_sheet_path, income_statement_path):
62
- """Analyze uploaded financial statements"""
63
  try:
64
  # Read file contents
65
  balance_sheet = self.read_file_content(balance_sheet_path)
@@ -72,10 +65,10 @@ class FinancialAnalyzer:
72
  prompt = f"""<human>Analyze these financial statements:
73
 
74
  Balance Sheet:
75
- {balance_sheet[:1000]}
76
 
77
  Income Statement:
78
- {income_statement[:1000]}
79
 
80
  Provide:
81
  1. Key financial metrics
@@ -85,16 +78,25 @@ class FinancialAnalyzer:
85
 
86
  <assistant>Here's my analysis:"""
87
 
88
- # Generate analysis using TinyLlama
89
- inputs = self.tiny_tokenizer(prompt, return_tensors="pt", max_new_tokens=1024, truncation=True)
 
 
 
 
 
 
 
90
  outputs = self.tiny_model.generate(
91
  inputs["input_ids"],
92
- max_length=1024,
93
  temperature=0.7,
94
  top_p=0.95,
95
- do_sample=True,
96
- pad_token_id=self.tiny_tokenizer.eos_token_id
97
  )
 
 
98
  analysis = self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
99
 
100
  # Generate sentiment
@@ -103,7 +105,8 @@ class FinancialAnalyzer:
103
  # Format results
104
  results = {
105
  "Analysis": analysis,
106
- "Sentiment": sentiment
 
107
  }
108
 
109
  return json.dumps(results, indent=2)
@@ -114,10 +117,17 @@ class FinancialAnalyzer:
114
  def analyze_sentiment(self, balance_sheet, income_statement):
115
  try:
116
  text = f"{balance_sheet[:500]}\n{income_statement[:500]}"
117
- inputs = self.finbert_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
 
 
 
 
 
 
118
  outputs = self.finbert_model(**inputs)
119
  probs = torch.nn.functional.softmax(outputs.logits, dim=1)
120
  labels = ['negative', 'neutral', 'positive']
 
121
  return {
122
  'sentiment': labels[probs.argmax().item()],
123
  'confidence': f"{probs.max().item():.2f}"
@@ -133,11 +143,11 @@ def create_interface():
133
  inputs=[
134
  gr.File(
135
  label="Upload Balance Sheet (CSV, Excel, or Markdown)",
136
- type="filepath" # Changed from 'file' to 'filepath'
137
  ),
138
  gr.File(
139
  label="Upload Income Statement (CSV, Excel, or Markdown)",
140
- type="filepath" # Changed from 'file' to 'filepath'
141
  )
142
  ],
143
  outputs=gr.Textbox(
 
1
  import gradio as gr
2
+ import torch
3
+ import pandas as pd
4
+ import json
5
  from transformers import (
6
  AutoTokenizer,
7
  AutoModelForCausalLM,
 
9
  T5ForConditionalGeneration,
10
  T5Tokenizer
11
  )
 
 
 
12
 
13
  class FinancialAnalyzer:
14
  def __init__(self):
 
22
  self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
23
  self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
24
 
 
 
 
 
25
  self.device = "cpu"
26
  self._move_models_to_device()
27
  print("Models loaded successfully!")
 
32
  def _move_models_to_device(self):
33
  self.tiny_model.to(self.device)
34
  self.finbert_model.to(self.device)
 
35
 
36
  def read_file_content(self, file_path):
 
37
  if file_path is None:
38
  return "No file uploaded"
39
 
 
53
  return f"Error processing file: {str(e)}"
54
 
55
  def analyze_financial_data(self, balance_sheet_path, income_statement_path):
 
56
  try:
57
  # Read file contents
58
  balance_sheet = self.read_file_content(balance_sheet_path)
 
65
  prompt = f"""<human>Analyze these financial statements:
66
 
67
  Balance Sheet:
68
+ {balance_sheet[:800]}
69
 
70
  Income Statement:
71
+ {income_statement[:800]}
72
 
73
  Provide:
74
  1. Key financial metrics
 
78
 
79
  <assistant>Here's my analysis:"""
80
 
81
+ # Tokenize input
82
+ inputs = self.tiny_tokenizer(
83
+ prompt,
84
+ return_tensors="pt",
85
+ truncation=True,
86
+ max_length=2048
87
+ ).to(self.device)
88
+
89
+ # Generate response
90
  outputs = self.tiny_model.generate(
91
  inputs["input_ids"],
92
+ do_sample=True,
93
  temperature=0.7,
94
  top_p=0.95,
95
+ repetition_penalty=1.2,
96
+ max_length=2048 # Total length including input
97
  )
98
+
99
+ # Decode response
100
  analysis = self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
101
 
102
  # Generate sentiment
 
105
  # Format results
106
  results = {
107
  "Analysis": analysis,
108
+ "Sentiment": sentiment,
109
+ "Note": "Analysis based on financial statements from 2021-2025"
110
  }
111
 
112
  return json.dumps(results, indent=2)
 
117
  def analyze_sentiment(self, balance_sheet, income_statement):
118
  try:
119
  text = f"{balance_sheet[:500]}\n{income_statement[:500]}"
120
+ inputs = self.finbert_tokenizer(
121
+ text,
122
+ return_tensors="pt",
123
+ truncation=True,
124
+ max_length=512
125
+ ).to(self.device)
126
+
127
  outputs = self.finbert_model(**inputs)
128
  probs = torch.nn.functional.softmax(outputs.logits, dim=1)
129
  labels = ['negative', 'neutral', 'positive']
130
+
131
  return {
132
  'sentiment': labels[probs.argmax().item()],
133
  'confidence': f"{probs.max().item():.2f}"
 
143
  inputs=[
144
  gr.File(
145
  label="Upload Balance Sheet (CSV, Excel, or Markdown)",
146
+ type="filepath"
147
  ),
148
  gr.File(
149
  label="Upload Income Statement (CSV, Excel, or Markdown)",
150
+ type="filepath"
151
  )
152
  ],
153
  outputs=gr.Textbox(