walaa2022 commited on
Commit
5bc4f16
·
verified ·
1 Parent(s): 8dae603

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -145
app.py CHANGED
@@ -1,174 +1,156 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import numpy as np
4
  import json
5
- import re
6
- from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
7
  import torch
8
 
9
- class FinancialAnalyzer:
10
  def __init__(self):
11
- print("Initializing Financial Analyzer...")
12
- self.initialize_models()
13
-
14
- def initialize_models(self):
15
- print("Loading models...")
16
- self.tiny_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
17
- self.tiny_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
18
-
19
- self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
20
- self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
21
- print("Models loaded successfully!")
22
-
23
- def parse_markdown_table(self, markdown_content):
24
- """Parse markdown table into pandas DataFrame"""
25
- # Split content into lines
26
- lines = markdown_content.strip().split('\n')
27
-
28
- # Find table start (line with |)
29
- table_lines = []
30
- headers = None
31
- current_table = []
32
-
33
  for line in lines:
34
- if '|' in line:
35
- # Skip separator lines (contains ---)
 
 
36
  if '-|-' in line:
37
  continue
38
- # Clean and split the line
39
- row = [cell.strip() for cell in line.split('|')[1:-1]]
40
- if headers is None:
41
- headers = row
42
  else:
43
- current_table.append(row)
44
-
45
- # Create DataFrame
46
- df = pd.DataFrame(current_table, columns=headers)
47
- return df
48
-
49
- def extract_financial_data(self, markdown_content):
50
- """Convert markdown content to a structured text format"""
51
- # Remove markdown formatting
52
- clean_text = markdown_content.replace('#', '').replace('*', '')
53
-
54
- # Extract tables
55
- tables = {}
56
- current_section = "General"
57
-
58
- for line in clean_text.split('\n'):
59
- if line.strip() and not line.startswith('|'):
60
- current_section = line.strip()
61
- elif '|' in line:
62
- if current_section not in tables:
63
- tables[current_section] = []
64
- tables[current_section].append(line)
65
-
66
- # Convert to text format
67
- structured_text = []
68
- for section, content in tables.items():
69
- structured_text.append(f"\n{section}:")
70
- if content:
71
- df = self.parse_markdown_table('\n'.join(content))
72
- structured_text.append(df.to_string())
73
-
74
- return '\n'.join(structured_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  def analyze_financials(self, balance_sheet_file, income_stmt_file):
77
  """Main analysis function"""
78
  try:
79
- # Read markdown files
80
  with open(balance_sheet_file, 'r') as f:
81
- balance_sheet_content = f.read()
82
  with open(income_stmt_file, 'r') as f:
83
- income_stmt_content = f.read()
84
-
85
- # Convert to structured text
86
- structured_balance = self.extract_financial_data(balance_sheet_content)
87
- structured_income = self.extract_financial_data(income_stmt_content)
88
-
89
- # Create analysis prompt
90
- prompt = f"""<human>Please analyze these financial statements and provide detailed insights:
91
 
92
- Financial Statements Analysis (2021-2025)
 
 
93
 
94
- Balance Sheet Summary:
95
- {structured_balance}
 
96
 
97
- Income Statement Summary:
98
- {structured_income}
 
99
 
100
- Please provide a detailed analysis including:
101
- 1. Financial Health Assessment
102
- - Liquidity position
103
- - Capital structure
104
- - Asset efficiency
105
-
106
- 2. Profitability Analysis
107
- - Revenue trends
108
- - Cost management
109
- - Profit margins
110
-
111
- 3. Growth Analysis
112
- - Year-over-year growth rates
113
- - Market position
114
- - Future growth potential
115
-
116
- 4. Risk Assessment
117
- - Operating risks
118
- - Financial risks
119
- - Strategic risks
120
-
121
- 5. Recommendations
122
- - Short-term actions
123
- - Medium-term strategy
124
- - Long-term planning
125
-
126
- 6. Future Outlook
127
- - Market conditions
128
- - Company positioning
129
- - Growth opportunities</human>"""
130
-
131
- # Generate AI analysis
132
- inputs = self.tiny_tokenizer(prompt, return_tensors="pt", truncation=True)
133
- outputs = self.tiny_model.generate(
134
- inputs["input_ids"],
135
- max_new_tokens=1024, # Generate up to 1024 new tokens
136
- temperature=0.7,
137
- top_p=0.95,
138
- do_sample=True,
139
- pad_token_id=self.tiny_tokenizer.eos_token_id,
140
- repetition_penalty=1.2 )
141
- analysis = self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
142
-
143
- # Generate sentiment
144
- sentiment = self.analyze_sentiment(structured_balance + structured_income)
145
-
146
- # Compile results
147
  results = {
148
- "Financial Analysis": analysis,
149
- "Sentiment Analysis": sentiment,
150
- "Analysis Period": "2021-2025",
151
- "Note": "All values in millions ($M)"
 
 
152
  }
153
 
154
  return json.dumps(results, indent=2)
155
 
156
  except Exception as e:
157
- return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"
158
-
159
- def analyze_sentiment(self, text):
160
- inputs = self.finbert_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
161
- outputs = self.finbert_model(**inputs)
162
- probs = torch.nn.functional.softmax(outputs.logits, dim=1)
163
- sentiment_labels = ['negative', 'neutral', 'positive']
164
-
165
- return {
166
- 'sentiment': sentiment_labels[probs.argmax().item()],
167
- 'confidence': f"{probs.max().item():.2f}"
168
- }
169
 
170
  def create_interface():
171
- analyzer = FinancialAnalyzer()
172
 
173
  iface = gr.Interface(
174
  fn=analyzer.analyze_financials,
@@ -177,8 +159,8 @@ def create_interface():
177
  gr.File(label="Income Statement (Markdown)", type="filepath")
178
  ],
179
  outputs=gr.Textbox(label="Analysis Results", lines=25),
180
- title="Financial Statement Analyzer",
181
- description="Upload financial statements in Markdown format for comprehensive AI-powered analysis."
182
  )
183
 
184
  return iface
 
1
  import gradio as gr
2
  import pandas as pd
 
3
  import json
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
5
  import torch
6
 
7
+ class FastFinancialAnalyzer:
8
  def __init__(self):
9
+ print("Initializing Analyzer...")
10
+ self.initialize_model()
11
+ print("Initialization complete!")
12
+
13
+ def initialize_model(self):
14
+ """Initialize TinyLlama model"""
15
+ self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
16
+ self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
17
+ self.model.eval() # Set to evaluation mode
18
+
19
+ def parse_markdown_table(self, content, section_name=""):
20
+ """Extract data from markdown table"""
21
+ data = {}
22
+ lines = content.split('\n')
23
+ headers = []
24
+ current_section = section_name
25
+
 
 
 
 
 
26
  for line in lines:
27
+ if line.startswith('##'):
28
+ current_section = line.strip('#').strip()
29
+ elif '|' in line:
30
+ # Skip separator lines
31
  if '-|-' in line:
32
  continue
33
+ # Process table rows
34
+ cells = [cell.strip() for cell in line.split('|')[1:-1]]
35
+ if not headers:
36
+ headers = cells
37
  else:
38
+ if len(cells) == len(headers):
39
+ row_data = dict(zip(headers, cells))
40
+ key = row_data.get(headers[0], "").strip()
41
+ if key:
42
+ data[key] = row_data
43
+
44
+ return {current_section: data}
45
+
46
+ def clean_number(self, value):
47
+ """Clean numerical values"""
48
+ if isinstance(value, str):
49
+ value = value.replace(',', '').replace('$', '').replace('(', '-').replace(')', '')
50
+ value = value.strip()
51
+ try:
52
+ return float(value)
53
+ except:
54
+ return 0.0
55
+
56
+ def extract_key_metrics(self, income_data, balance_data):
57
+ """Extract key financial metrics"""
58
+ metrics = {
59
+ "Revenue": {
60
+ "2025": self.clean_number(income_data.get("Total Net Revenue", {}).get("2025", "0")),
61
+ "2021": self.clean_number(income_data.get("Total Net Revenue", {}).get("2021", "0"))
62
+ },
63
+ "Profit": {
64
+ "2025": self.clean_number(income_data.get("Net Earnings", {}).get("2025", "0")),
65
+ "2021": self.clean_number(income_data.get("Net Earnings", {}).get("2021", "0"))
66
+ },
67
+ "Assets": {
68
+ "2025": self.clean_number(balance_data.get("Total Assets", {}).get("2025", "0")),
69
+ "2021": self.clean_number(balance_data.get("Total Assets", {}).get("2021", "0"))
70
+ }
71
+ }
72
+ return metrics
73
+
74
+ def generate_analysis_prompt(self, metrics):
75
+ """Create focused analysis prompt"""
76
+ return f"""<human>Analyze these financial metrics and provide insights:
77
+
78
+ Key Performance Indicators (in millions):
79
+ 1. Revenue:
80
+ - 2025: ${metrics['Revenue']['2025']:.1f}M
81
+ - 2021: ${metrics['Revenue']['2021']:.1f}M
82
+ - Growth: {((metrics['Revenue']['2025'] - metrics['Revenue']['2021']) / metrics['Revenue']['2021'] * 100):.1f}%
83
+
84
+ 2. Net Profit:
85
+ - 2025: ${metrics['Profit']['2025']:.1f}M
86
+ - 2021: ${metrics['Profit']['2021']:.1f}M
87
+ - Margin 2025: {(metrics['Profit']['2025'] / metrics['Revenue']['2025'] * 100):.1f}%
88
+
89
+ 3. Asset Utilization:
90
+ - 2025: ${metrics['Assets']['2025']:.1f}M
91
+ - 2021: ${metrics['Assets']['2021']:.1f}M
92
+ - Growth: {((metrics['Assets']['2025'] - metrics['Assets']['2021']) / metrics['Assets']['2021'] * 100):.1f}%
93
+
94
+ Provide:
95
+ 1. Performance Assessment
96
+ 2. Key Strengths and Concerns
97
+ 3. Strategic Recommendations</human>"""
98
+
99
+ def generate_analysis(self, prompt):
100
+ """Generate analysis using TinyLlama"""
101
+ try:
102
+ inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500)
103
+ outputs = self.model.generate(
104
+ inputs["input_ids"],
105
+ max_new_tokens=500,
106
+ temperature=0.7,
107
+ top_p=0.9,
108
+ do_sample=True,
109
+ pad_token_id=self.tokenizer.eos_token_id,
110
+ no_repeat_ngram_size=3
111
+ )
112
+ return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
113
+ except Exception as e:
114
+ return f"Error generating analysis: {str(e)}"
115
 
116
  def analyze_financials(self, balance_sheet_file, income_stmt_file):
117
  """Main analysis function"""
118
  try:
119
+ # Read files
120
  with open(balance_sheet_file, 'r') as f:
121
+ balance_sheet = f.read()
122
  with open(income_stmt_file, 'r') as f:
123
+ income_stmt = f.read()
 
 
 
 
 
 
 
124
 
125
+ # Parse data
126
+ income_data = self.parse_markdown_table(income_stmt, "Income Statement")
127
+ balance_data = self.parse_markdown_table(balance_sheet, "Balance Sheet")
128
 
129
+ # Extract metrics
130
+ metrics = self.extract_key_metrics(income_data.get("Income Statement", {}),
131
+ balance_data.get("Balance Sheet", {}))
132
 
133
+ # Generate analysis
134
+ analysis_prompt = self.generate_analysis_prompt(metrics)
135
+ analysis = self.generate_analysis(analysis_prompt)
136
 
137
+ # Prepare results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  results = {
139
+ "Financial Analysis": {
140
+ "Key Metrics": metrics,
141
+ "AI Analysis": analysis.split("<human>")[-1].strip(),
142
+ "Analysis Period": "2021-2025",
143
+ "Note": "All monetary values in millions ($M)"
144
+ }
145
  }
146
 
147
  return json.dumps(results, indent=2)
148
 
149
  except Exception as e:
150
+ return f"Error in analysis: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
151
 
152
  def create_interface():
153
+ analyzer = FastFinancialAnalyzer()
154
 
155
  iface = gr.Interface(
156
  fn=analyzer.analyze_financials,
 
159
  gr.File(label="Income Statement (Markdown)", type="filepath")
160
  ],
161
  outputs=gr.Textbox(label="Analysis Results", lines=25),
162
+ title="Fast Financial Statement Analyzer",
163
+ description="Upload financial statements in Markdown format for quick AI-powered analysis"
164
  )
165
 
166
  return iface