walaa2022 commited on
Commit
6631d2e
·
verified ·
1 Parent(s): 88b54ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -164
app.py CHANGED
@@ -3,8 +3,9 @@ import pandas as pd
3
  import json
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import torch
 
6
 
7
- class FastFinancialAnalyzer:
8
  def __init__(self):
9
  print("Initializing Analyzer...")
10
  self.initialize_model()
@@ -12,185 +13,156 @@ class FastFinancialAnalyzer:
12
 
13
  def initialize_model(self):
14
  """Initialize TinyLlama model"""
15
- self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
16
- self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
17
- self.model.eval() # Set to evaluation mode
18
-
19
- def parse_markdown_table(self, content, section_name=""):
20
- """Extract data from markdown table"""
21
- data = {}
22
- lines = content.split('\n')
23
- headers = []
24
- current_section = section_name
25
-
26
- for line in lines:
27
- if line.startswith('##'):
28
- current_section = line.strip('#').strip()
29
- elif '|' in line:
30
- # Skip separator lines
31
- if '-|-' in line:
32
- continue
33
- # Process table rows
34
- cells = [cell.strip() for cell in line.split('|')[1:-1]]
35
- if not headers:
36
- headers = cells
37
- else:
38
- if len(cells) == len(headers):
39
- row_data = dict(zip(headers, cells))
40
- key = row_data.get(headers[0], "").strip()
41
- if key:
42
- data[key] = row_data
43
-
44
- return {current_section: data}
45
 
46
  def clean_number(self, value):
47
- """Clean numerical values"""
48
- if isinstance(value, str):
49
- value = value.replace(',', '').replace('$', '').replace('(', '-').replace(')', '')
50
- value = value.strip()
51
  try:
52
- return float(value)
 
 
 
 
 
 
53
  except:
54
  return 0.0
55
 
56
- def extract_key_metrics(self, income_data, balance_data):
57
- """Extract key financial metrics with safety checks"""
58
- try:
59
- # First, safely extract values with error handling
60
- revenue_2025 = self.safe_extract_number(income_data, "Total Net Revenue", "2025")
61
- revenue_2021 = self.safe_extract_number(income_data, "Total Net Revenue", "2021")
62
- profit_2025 = self.safe_extract_number(income_data, "Net Earnings", "2025")
63
- profit_2021 = self.safe_extract_number(income_data, "Net Earnings", "2021")
64
- assets_2025 = self.safe_extract_number(balance_data, "Total_Assets", "2025")
65
- assets_2021 = self.safe_extract_number(balance_data, "Total_Assets", "2021")
66
-
67
- metrics = {
68
- "Revenue": {
69
- "2025": revenue_2025,
70
- "2021": revenue_2021,
71
- "Growth": self.calculate_growth(revenue_2025, revenue_2021)
72
- },
73
- "Profit": {
74
- "2025": profit_2025,
75
- "2021": profit_2021,
76
- "Growth": self.calculate_growth(profit_2025, profit_2021),
77
- "Margin_2025": self.calculate_percentage(profit_2025, revenue_2025)
78
- },
79
- "Assets": {
80
- "2025": assets_2025,
81
- "2021": assets_2021,
82
- "Growth": self.calculate_growth(assets_2025, assets_2021)
83
- }
84
- }
85
- return metrics
86
- except Exception as e:
87
- print(f"Error in metric extraction: {str(e)}")
88
- return self.get_default_metrics()
89
-
90
- def safe_extract_number(self, data_dict, key, year):
91
- """Safely extract and convert number from data"""
92
  try:
93
- if isinstance(data_dict, dict):
94
- for k, v in data_dict.items():
95
- if isinstance(v, dict) and key in k:
96
- value = v.get(year, '0')
97
- return self.clean_number(value)
98
- return 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  except Exception as e:
100
- print(f"Error extracting {key} for {year}: {str(e)}")
101
- return 0.0
102
 
103
- def calculate_growth(self, current, previous):
104
- """Calculate growth percentage with safety check"""
105
  try:
106
- if previous and previous != 0:
107
- return ((current - previous) / abs(previous)) * 100
108
- return 0.0
109
- except:
110
- return 0.0
 
 
 
 
 
 
111
 
112
- def calculate_percentage(self, numerator, denominator):
113
- """Calculate percentage with safety check"""
114
  try:
115
- if denominator and denominator != 0:
116
- return (numerator / denominator) * 100
117
- return 0.0
118
- except:
119
- return 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
- def get_default_metrics(self):
122
- """Return default metrics structure"""
123
- return {
124
- "Revenue": {"2025": 0, "2021": 0, "Growth": 0},
125
- "Profit": {"2025": 0, "2021": 0, "Growth": 0, "Margin_2025": 0},
126
- "Assets": {"2025": 0, "2021": 0, "Growth": 0}
127
- }
128
 
129
  def generate_analysis_prompt(self, metrics):
130
- """Create focused analysis prompt with safety checks"""
131
- return f"""<human>Analyze these financial metrics and provide insights:
132
-
133
- Key Performance Indicators:
134
- 1. Revenue Performance:
135
- - 2025: ${metrics['Revenue']['2025']:,.1f}M
136
- - 2021: ${metrics['Revenue']['2021']:,.1f}M
137
- - 5-Year Growth: {metrics['Revenue']['Growth']:.1f}%
138
-
139
- 2. Profitability:
140
- - 2025 Net Profit: ${metrics['Profit']['2025']:,.1f}M
141
- - 2021 Net Profit: ${metrics['Profit']['2021']:,.1f}M
142
- - Profit Growth: {metrics['Profit']['Growth']:.1f}%
143
- - 2025 Profit Margin: {metrics['Profit']['Margin_2025']:.1f}%
144
-
145
- 3. Asset Base:
146
- - 2025 Total Assets: ${metrics['Assets']['2025']:,.1f}M
147
- - 2021 Total Assets: ${metrics['Assets']['2021']:,.1f}M
148
- - Asset Growth: {metrics['Assets']['Growth']:.1f}%
149
-
150
- Based on these metrics, provide:
151
- 1. Financial Performance Assessment
152
  2. Key Strengths and Weaknesses
153
  3. Strategic Recommendations</human>"""
154
-
155
- def generate_analysis_prompt(self, metrics):
156
- """Create focused analysis prompt"""
157
- return f"""<human>Analyze these financial metrics and provide insights:
158
-
159
- Key Performance Indicators (in millions):
160
- 1. Revenue:
161
- - 2025: ${metrics['Revenue']['2025']:.1f}M
162
- - 2021: ${metrics['Revenue']['2021']:.1f}M
163
- - Growth: {((metrics['Revenue']['2025'] - metrics['Revenue']['2021']) / metrics['Revenue']['2021'] * 100):.1f}%
164
-
165
- 2. Net Profit:
166
- - 2025: ${metrics['Profit']['2025']:.1f}M
167
- - 2021: ${metrics['Profit']['2021']:.1f}M
168
- - Margin 2025: {(metrics['Profit']['2025'] / metrics['Revenue']['2025'] * 100):.1f}%
169
-
170
- 3. Asset Utilization:
171
- - 2025: ${metrics['Assets']['2025']:.1f}M
172
- - 2021: ${metrics['Assets']['2021']:.1f}M
173
- - Growth: {((metrics['Assets']['2025'] - metrics['Assets']['2021']) / metrics['Assets']['2021'] * 100):.1f}%
174
-
175
- Provide:
176
- 1. Performance Assessment
177
- 2. Key Strengths and Concerns
178
- 3. Strategic Recommendations</human>"""
179
 
180
  def generate_analysis(self, prompt):
181
  """Generate analysis using TinyLlama"""
182
  try:
183
  inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500)
 
184
  outputs = self.model.generate(
185
  inputs["input_ids"],
186
- max_new_tokens=500,
187
  temperature=0.7,
188
  top_p=0.9,
189
  do_sample=True,
190
  pad_token_id=self.tokenizer.eos_token_id,
191
  no_repeat_ngram_size=3
192
  )
193
- return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
194
  except Exception as e:
195
  return f"Error generating analysis: {str(e)}"
196
 
@@ -203,23 +175,22 @@ Provide:
203
  with open(income_stmt_file, 'r') as f:
204
  income_stmt = f.read()
205
 
206
- # Parse data
207
- income_data = self.parse_markdown_table(income_stmt, "Income Statement")
208
- balance_data = self.parse_markdown_table(balance_sheet, "Balance Sheet")
209
 
210
- # Extract metrics
211
- metrics = self.extract_key_metrics(income_data.get("Income Statement", {}),
212
- balance_data.get("Balance Sheet", {}))
213
 
214
- # Generate analysis
215
- analysis_prompt = self.generate_analysis_prompt(metrics)
216
- analysis = self.generate_analysis(analysis_prompt)
217
 
218
  # Prepare results
219
  results = {
220
  "Financial Analysis": {
221
  "Key Metrics": metrics,
222
- "AI Analysis": analysis.split("<human>")[-1].strip(),
223
  "Analysis Period": "2021-2025",
224
  "Note": "All monetary values in millions ($M)"
225
  }
@@ -228,10 +199,10 @@ Provide:
228
  return json.dumps(results, indent=2)
229
 
230
  except Exception as e:
231
- return f"Error in analysis: {str(e)}"
232
 
233
  def create_interface():
234
- analyzer = FastFinancialAnalyzer()
235
 
236
  iface = gr.Interface(
237
  fn=analyzer.analyze_financials,
@@ -240,8 +211,8 @@ def create_interface():
240
  gr.File(label="Income Statement (Markdown)", type="filepath")
241
  ],
242
  outputs=gr.Textbox(label="Analysis Results", lines=25),
243
- title="Fast Financial Statement Analyzer",
244
- description="Upload financial statements in Markdown format for quick AI-powered analysis"
245
  )
246
 
247
  return iface
 
3
  import json
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import torch
6
+ import re
7
 
8
+ class FinancialAnalyzer:
9
  def __init__(self):
10
  print("Initializing Analyzer...")
11
  self.initialize_model()
 
13
 
14
  def initialize_model(self):
15
  """Initialize TinyLlama model"""
16
+ try:
17
+ self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
18
+ self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
19
+ self.model.eval()
20
+ except Exception as e:
21
+ print(f"Error initializing model: {str(e)}")
22
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def clean_number(self, value):
25
+ """Clean and convert numerical values"""
 
 
 
26
  try:
27
+ if isinstance(value, str):
28
+ # Remove currency symbols, commas, spaces
29
+ value = value.replace('$', '').replace(',', '').strip()
30
+ # Handle parentheses for negative numbers
31
+ if '(' in value and ')' in value:
32
+ value = '-' + value.replace('(', '').replace(')', '')
33
+ return float(value or 0)
34
  except:
35
  return 0.0
36
 
37
+ def parse_financial_data(self, content):
38
+ """Parse markdown content into structured data"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  try:
40
+ data = {}
41
+ current_section = ""
42
+ current_table = []
43
+ headers = None
44
+
45
+ for line in content.split('\n'):
46
+ if line.startswith('#'):
47
+ if current_table and headers:
48
+ data[current_section] = self.process_table(headers, current_table)
49
+ current_section = line.strip('# ')
50
+ current_table = []
51
+ headers = None
52
+ elif '|' in line:
53
+ if '-|-' not in line: # Skip separator lines
54
+ row = [cell.strip() for cell in line.split('|')[1:-1]]
55
+ if not headers:
56
+ headers = row
57
+ else:
58
+ current_table.append(row)
59
+
60
+ # Process last table
61
+ if current_table and headers:
62
+ data[current_section] = self.process_table(headers, current_table)
63
+
64
+ return data
65
  except Exception as e:
66
+ print(f"Error parsing financial data: {str(e)}")
67
+ return {}
68
 
69
+ def process_table(self, headers, rows):
70
+ """Process table data into structured format"""
71
  try:
72
+ processed_data = {}
73
+ for row in rows:
74
+ if len(row) == len(headers):
75
+ item_name = row[0].strip('*').strip()
76
+ processed_data[item_name] = {}
77
+ for i, value in enumerate(row[1:], 1):
78
+ processed_data[item_name][headers[i]] = self.clean_number(value)
79
+ return processed_data
80
+ except Exception as e:
81
+ print(f"Error processing table: {str(e)}")
82
+ return {}
83
 
84
+ def extract_metrics(self, income_data, balance_data):
85
+ """Extract and calculate key financial metrics"""
86
  try:
87
+ metrics = {
88
+ "Revenue": {
89
+ "2025": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025"),
90
+ "2021": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2021")
91
+ },
92
+ "Profitability": {
93
+ "Gross_Profit_2025": self.get_nested_value(income_data, "Cost and Gross Profit", "Gross Profit", "2025"),
94
+ "Net_Earnings_2025": self.get_nested_value(income_data, "Profit Summary", "Net Earnings", "2025"),
95
+ "Operating_Expenses_2025": self.get_nested_value(income_data, "Operating Expenses", "Total Operating Expenses", "2025")
96
+ },
97
+ "Balance_Sheet": {
98
+ "Total_Assets_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Assets", "2025"),
99
+ "Total_Liabilities_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Liabilities", "2025"),
100
+ "Equity_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Shareholders_Equity", "2025")
101
+ }
102
+ }
103
+
104
+ # Calculate additional metrics
105
+ revenue_2025 = metrics["Revenue"]["2025"]
106
+ if revenue_2025 != 0:
107
+ metrics["Profitability"]["Gross_Margin"] = (metrics["Profitability"]["Gross_Profit_2025"] / revenue_2025) * 100
108
+ metrics["Profitability"]["Net_Margin"] = (metrics["Profitability"]["Net_Earnings_2025"] / revenue_2025) * 100
109
+
110
+ return metrics
111
+ except Exception as e:
112
+ print(f"Error extracting metrics: {str(e)}")
113
+ return {}
114
 
115
+ def get_nested_value(self, data, section, key, year):
116
+ """Safely get nested dictionary value"""
117
+ try:
118
+ return data.get(section, {}).get(key, {}).get(year, 0)
119
+ except:
120
+ return 0
 
121
 
122
  def generate_analysis_prompt(self, metrics):
123
+ """Create analysis prompt from metrics"""
124
+ try:
125
+ return f"""<human>Analyze these financial metrics for 2025:
126
+
127
+ Revenue and Profitability:
128
+ - Total Revenue: ${metrics['Revenue']['2025']:,.1f}M
129
+ - Gross Profit: ${metrics['Profitability']['Gross_Profit_2025']:,.1f}M
130
+ - Net Earnings: ${metrics['Profitability']['Net_Earnings_2025']:,.1f}M
131
+ - Gross Margin: {metrics['Profitability'].get('Gross_Margin', 0):,.1f}%
132
+ - Net Margin: {metrics['Profitability'].get('Net_Margin', 0):,.1f}%
133
+
134
+ Balance Sheet Strength:
135
+ - Total Assets: ${metrics['Balance_Sheet']['Total_Assets_2025']:,.1f}M
136
+ - Total Liabilities: ${metrics['Balance_Sheet']['Total_Liabilities_2025']:,.1f}M
137
+ - Shareholders' Equity: ${metrics['Balance_Sheet']['Equity_2025']:,.1f}M
138
+
139
+ Provide a concise analysis of:
140
+ 1. Financial Health
 
 
 
 
141
  2. Key Strengths and Weaknesses
142
  3. Strategic Recommendations</human>"""
143
+ except Exception as e:
144
+ print(f"Error generating prompt: {str(e)}")
145
+ return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  def generate_analysis(self, prompt):
148
  """Generate analysis using TinyLlama"""
149
  try:
150
  inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500)
151
+
152
  outputs = self.model.generate(
153
  inputs["input_ids"],
154
+ max_new_tokens=500, # Generate up to 500 new tokens
155
  temperature=0.7,
156
  top_p=0.9,
157
  do_sample=True,
158
  pad_token_id=self.tokenizer.eos_token_id,
159
  no_repeat_ngram_size=3
160
  )
161
+
162
+ analysis = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
163
+ # Clean up the response
164
+ analysis = analysis.split("<human>")[-1].strip()
165
+ return analysis
166
  except Exception as e:
167
  return f"Error generating analysis: {str(e)}"
168
 
 
175
  with open(income_stmt_file, 'r') as f:
176
  income_stmt = f.read()
177
 
178
+ # Parse financial data
179
+ income_data = self.parse_financial_data(income_stmt)
180
+ balance_data = self.parse_financial_data(balance_sheet)
181
 
182
+ # Extract key metrics
183
+ metrics = self.extract_metrics(income_data, balance_data)
 
184
 
185
+ # Generate and get analysis
186
+ prompt = self.generate_analysis_prompt(metrics)
187
+ analysis = self.generate_analysis(prompt)
188
 
189
  # Prepare results
190
  results = {
191
  "Financial Analysis": {
192
  "Key Metrics": metrics,
193
+ "AI Insights": analysis,
194
  "Analysis Period": "2021-2025",
195
  "Note": "All monetary values in millions ($M)"
196
  }
 
199
  return json.dumps(results, indent=2)
200
 
201
  except Exception as e:
202
+ return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"
203
 
204
  def create_interface():
205
+ analyzer = FinancialAnalyzer()
206
 
207
  iface = gr.Interface(
208
  fn=analyzer.analyze_financials,
 
211
  gr.File(label="Income Statement (Markdown)", type="filepath")
212
  ],
213
  outputs=gr.Textbox(label="Analysis Results", lines=25),
214
+ title="Financial Statement Analyzer",
215
+ description="Upload financial statements in Markdown format for AI-powered analysis"
216
  )
217
 
218
  return iface