Spaces:

walaa2022
/

financial_analysis

Running

App Files Files Community

walaa2022 commited on Dec 1, 2024

Commit

6631d2e

verified ·

1 Parent(s): 88b54ed

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -164

app.py CHANGED Viewed

@@ -3,8 +3,9 @@ import pandas as pd
 import json
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-class FastFinancialAnalyzer:
     def __init__(self):
         print("Initializing Analyzer...")
         self.initialize_model()
@@ -12,185 +13,156 @@ class FastFinancialAnalyzer:
     def initialize_model(self):
         """Initialize TinyLlama model"""
-        self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-        self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-        self.model.eval()  # Set to evaluation mode
-    def parse_markdown_table(self, content, section_name=""):
-        """Extract data from markdown table"""
-        data = {}
-        lines = content.split('\n')
-        headers = []
-        current_section = section_name
-        for line in lines:
-            if line.startswith('##'):
-                current_section = line.strip('#').strip()
-            elif '|' in line:
-                # Skip separator lines
-                if '-|-' in line:
-                    continue
-                # Process table rows
-                cells = [cell.strip() for cell in line.split('|')[1:-1]]
-                if not headers:
-                    headers = cells
-                else:
-                    if len(cells) == len(headers):
-                        row_data = dict(zip(headers, cells))
-                        key = row_data.get(headers[0], "").strip()
-                        if key:
-                            data[key] = row_data
-        return {current_section: data}
     def clean_number(self, value):
-        """Clean numerical values"""
-        if isinstance(value, str):
-            value = value.replace(',', '').replace('$', '').replace('(', '-').replace(')', '')
-            value = value.strip()
         try:
-            return float(value)
         except:
             return 0.0
-    def extract_key_metrics(self, income_data, balance_data):
-    """Extract key financial metrics with safety checks"""
-    try:
-        # First, safely extract values with error handling
-        revenue_2025 = self.safe_extract_number(income_data, "Total Net Revenue", "2025")
-        revenue_2021 = self.safe_extract_number(income_data, "Total Net Revenue", "2021")
-        profit_2025 = self.safe_extract_number(income_data, "Net Earnings", "2025")
-        profit_2021 = self.safe_extract_number(income_data, "Net Earnings", "2021")
-        assets_2025 = self.safe_extract_number(balance_data, "Total_Assets", "2025")
-        assets_2021 = self.safe_extract_number(balance_data, "Total_Assets", "2021")
-        metrics = {
-            "Revenue": {
-                "2025": revenue_2025,
-                "2021": revenue_2021,
-                "Growth": self.calculate_growth(revenue_2025, revenue_2021)
-            },
-            "Profit": {
-                "2025": profit_2025,
-                "2021": profit_2021,
-                "Growth": self.calculate_growth(profit_2025, profit_2021),
-                "Margin_2025": self.calculate_percentage(profit_2025, revenue_2025)
-            },
-            "Assets": {
-                "2025": assets_2025,
-                "2021": assets_2021,
-                "Growth": self.calculate_growth(assets_2025, assets_2021)
-            }
-        }
-        return metrics
-    except Exception as e:
-        print(f"Error in metric extraction: {str(e)}")
-        return self.get_default_metrics()
-    def safe_extract_number(self, data_dict, key, year):
-    """Safely extract and convert number from data"""
         try:
-        if isinstance(data_dict, dict):
-            for k, v in data_dict.items():
-                if isinstance(v, dict) and key in k:
-                    value = v.get(year, '0')
-                    return self.clean_number(value)
-            return 0.0
         except Exception as e:
-            print(f"Error extracting {key} for {year}: {str(e)}")
-            return 0.0
-    def calculate_growth(self, current, previous):
-    """Calculate growth percentage with safety check"""
         try:
-            if previous and previous != 0:
-                return ((current - previous) / abs(previous)) * 100
-            return 0.0
-        except:
-            return 0.0
-    def calculate_percentage(self, numerator, denominator):
-    """Calculate percentage with safety check"""
         try:
-            if denominator and denominator != 0:
-                 return (numerator / denominator) * 100
-            return 0.0
-        except:
-            return 0.0
-    def get_default_metrics(self):
-    """Return default metrics structure"""
-        return {
-        "Revenue": {"2025": 0, "2021": 0, "Growth": 0},
-        "Profit": {"2025": 0, "2021": 0, "Growth": 0, "Margin_2025": 0},
-        "Assets": {"2025": 0, "2021": 0, "Growth": 0}
-    }
     def generate_analysis_prompt(self, metrics):
-    """Create focused analysis prompt with safety checks"""
-        return f"""<human>Analyze these financial metrics and provide insights:
-Key Performance Indicators:
-1. Revenue Performance:
-   - 2025: ${metrics['Revenue']['2025']:,.1f}M
-   - 2021: ${metrics['Revenue']['2021']:,.1f}M
-   - 5-Year Growth: {metrics['Revenue']['Growth']:.1f}%
-2. Profitability:
-   - 2025 Net Profit: ${metrics['Profit']['2025']:,.1f}M
-   - 2021 Net Profit: ${metrics['Profit']['2021']:,.1f}M
-   - Profit Growth: {metrics['Profit']['Growth']:.1f}%
-   - 2025 Profit Margin: {metrics['Profit']['Margin_2025']:.1f}%
-3. Asset Base:
-   - 2025 Total Assets: ${metrics['Assets']['2025']:,.1f}M
-   - 2021 Total Assets: ${metrics['Assets']['2021']:,.1f}M
-   - Asset Growth: {metrics['Assets']['Growth']:.1f}%
-Based on these metrics, provide:
-1. Financial Performance Assessment
 2. Key Strengths and Weaknesses
 3. Strategic Recommendations</human>"""
-    def generate_analysis_prompt(self, metrics):
-        """Create focused analysis prompt"""
-        return f"""<human>Analyze these financial metrics and provide insights:
-Key Performance Indicators (in millions):
-1. Revenue:
-   - 2025: ${metrics['Revenue']['2025']:.1f}M
-   - 2021: ${metrics['Revenue']['2021']:.1f}M
-   - Growth: {((metrics['Revenue']['2025'] - metrics['Revenue']['2021']) / metrics['Revenue']['2021'] * 100):.1f}%
-2. Net Profit:
-   - 2025: ${metrics['Profit']['2025']:.1f}M
-   - 2021: ${metrics['Profit']['2021']:.1f}M
-   - Margin 2025: {(metrics['Profit']['2025'] / metrics['Revenue']['2025'] * 100):.1f}%
-3. Asset Utilization:
-   - 2025: ${metrics['Assets']['2025']:.1f}M
-   - 2021: ${metrics['Assets']['2021']:.1f}M
-   - Growth: {((metrics['Assets']['2025'] - metrics['Assets']['2021']) / metrics['Assets']['2021'] * 100):.1f}%
-Provide:
-1. Performance Assessment
-2. Key Strengths and Concerns
-3. Strategic Recommendations</human>"""
     def generate_analysis(self, prompt):
         """Generate analysis using TinyLlama"""
         try:
             inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500)
             outputs = self.model.generate(
                 inputs["input_ids"],
-                max_new_tokens=500,
                 temperature=0.7,
                 top_p=0.9,
                 do_sample=True,
                 pad_token_id=self.tokenizer.eos_token_id,
                 no_repeat_ngram_size=3
             )
-            return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         except Exception as e:
             return f"Error generating analysis: {str(e)}"
@@ -203,23 +175,22 @@ Provide:
             with open(income_stmt_file, 'r') as f:
                 income_stmt = f.read()
-            # Parse data
-            income_data = self.parse_markdown_table(income_stmt, "Income Statement")
-            balance_data = self.parse_markdown_table(balance_sheet, "Balance Sheet")
-            # Extract metrics
-            metrics = self.extract_key_metrics(income_data.get("Income Statement", {}),
-                                            balance_data.get("Balance Sheet", {}))
-            # Generate analysis
-            analysis_prompt = self.generate_analysis_prompt(metrics)
-            analysis = self.generate_analysis(analysis_prompt)
             # Prepare results
             results = {
                 "Financial Analysis": {
                     "Key Metrics": metrics,
-                    "AI Analysis": analysis.split("<human>")[-1].strip(),
                     "Analysis Period": "2021-2025",
                     "Note": "All monetary values in millions ($M)"
                 }
@@ -228,10 +199,10 @@ Provide:
             return json.dumps(results, indent=2)
         except Exception as e:
-            return f"Error in analysis: {str(e)}"
 def create_interface():
-    analyzer = FastFinancialAnalyzer()
     iface = gr.Interface(
         fn=analyzer.analyze_financials,
@@ -240,8 +211,8 @@ def create_interface():
             gr.File(label="Income Statement (Markdown)", type="filepath")
         ],
         outputs=gr.Textbox(label="Analysis Results", lines=25),
-        title="Fast Financial Statement Analyzer",
-        description="Upload financial statements in Markdown format for quick AI-powered analysis"
     )
     return iface

 import json
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+import re
+class FinancialAnalyzer:
     def __init__(self):
         print("Initializing Analyzer...")
         self.initialize_model()
     def initialize_model(self):
         """Initialize TinyLlama model"""
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+            self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+            self.model.eval()
+        except Exception as e:
+            print(f"Error initializing model: {str(e)}")
+            raise
     def clean_number(self, value):
+        """Clean and convert numerical values"""
         try:
+            if isinstance(value, str):
+                # Remove currency symbols, commas, spaces
+                value = value.replace('$', '').replace(',', '').strip()
+                # Handle parentheses for negative numbers
+                if '(' in value and ')' in value:
+                    value = '-' + value.replace('(', '').replace(')', '')
+            return float(value or 0)
         except:
             return 0.0
+    def parse_financial_data(self, content):
+        """Parse markdown content into structured data"""
         try:
+            data = {}
+            current_section = ""
+            current_table = []
+            headers = None
+            for line in content.split('\n'):
+                if line.startswith('#'):
+                    if current_table and headers:
+                        data[current_section] = self.process_table(headers, current_table)
+                    current_section = line.strip('# ')
+                    current_table = []
+                    headers = None
+                elif '|' in line:
+                    if '-|-' not in line:  # Skip separator lines
+                        row = [cell.strip() for cell in line.split('|')[1:-1]]
+                        if not headers:
+                            headers = row
+                        else:
+                            current_table.append(row)
+            # Process last table
+            if current_table and headers:
+                data[current_section] = self.process_table(headers, current_table)
+            return data
         except Exception as e:
+            print(f"Error parsing financial data: {str(e)}")
+            return {}
+    def process_table(self, headers, rows):
+        """Process table data into structured format"""
         try:
+            processed_data = {}
+            for row in rows:
+                if len(row) == len(headers):
+                    item_name = row[0].strip('*').strip()
+                    processed_data[item_name] = {}
+                    for i, value in enumerate(row[1:], 1):
+                        processed_data[item_name][headers[i]] = self.clean_number(value)
+            return processed_data
+        except Exception as e:
+            print(f"Error processing table: {str(e)}")
+            return {}
+    def extract_metrics(self, income_data, balance_data):
+        """Extract and calculate key financial metrics"""
         try:
+            metrics = {
+                "Revenue": {
+                    "2025": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025"),
+                    "2021": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2021")
+                },
+                "Profitability": {
+                    "Gross_Profit_2025": self.get_nested_value(income_data, "Cost and Gross Profit", "Gross Profit", "2025"),
+                    "Net_Earnings_2025": self.get_nested_value(income_data, "Profit Summary", "Net Earnings", "2025"),
+                    "Operating_Expenses_2025": self.get_nested_value(income_data, "Operating Expenses", "Total Operating Expenses", "2025")
+                },
+                "Balance_Sheet": {
+                    "Total_Assets_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Assets", "2025"),
+                    "Total_Liabilities_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Liabilities", "2025"),
+                    "Equity_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Shareholders_Equity", "2025")
+                }
+            }
+            # Calculate additional metrics
+            revenue_2025 = metrics["Revenue"]["2025"]
+            if revenue_2025 != 0:
+                metrics["Profitability"]["Gross_Margin"] = (metrics["Profitability"]["Gross_Profit_2025"] / revenue_2025) * 100
+                metrics["Profitability"]["Net_Margin"] = (metrics["Profitability"]["Net_Earnings_2025"] / revenue_2025) * 100
+            return metrics
+        except Exception as e:
+            print(f"Error extracting metrics: {str(e)}")
+            return {}
+    def get_nested_value(self, data, section, key, year):
+        """Safely get nested dictionary value"""
+        try:
+            return data.get(section, {}).get(key, {}).get(year, 0)
+        except:
+            return 0
     def generate_analysis_prompt(self, metrics):
+        """Create analysis prompt from metrics"""
+        try:
+            return f"""<human>Analyze these financial metrics for 2025:
+Revenue and Profitability:
+- Total Revenue: ${metrics['Revenue']['2025']:,.1f}M
+- Gross Profit: ${metrics['Profitability']['Gross_Profit_2025']:,.1f}M
+- Net Earnings: ${metrics['Profitability']['Net_Earnings_2025']:,.1f}M
+- Gross Margin: {metrics['Profitability'].get('Gross_Margin', 0):,.1f}%
+- Net Margin: {metrics['Profitability'].get('Net_Margin', 0):,.1f}%
+Balance Sheet Strength:
+- Total Assets: ${metrics['Balance_Sheet']['Total_Assets_2025']:,.1f}M
+- Total Liabilities: ${metrics['Balance_Sheet']['Total_Liabilities_2025']:,.1f}M
+- Shareholders' Equity: ${metrics['Balance_Sheet']['Equity_2025']:,.1f}M
+Provide a concise analysis of:
+1. Financial Health
 2. Key Strengths and Weaknesses
 3. Strategic Recommendations</human>"""
+        except Exception as e:
+            print(f"Error generating prompt: {str(e)}")
+            return ""
     def generate_analysis(self, prompt):
         """Generate analysis using TinyLlama"""
         try:
             inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500)
             outputs = self.model.generate(
                 inputs["input_ids"],
+                max_new_tokens=500,  # Generate up to 500 new tokens
                 temperature=0.7,
                 top_p=0.9,
                 do_sample=True,
                 pad_token_id=self.tokenizer.eos_token_id,
                 no_repeat_ngram_size=3
             )
+            analysis = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Clean up the response
+            analysis = analysis.split("<human>")[-1].strip()
+            return analysis
         except Exception as e:
             return f"Error generating analysis: {str(e)}"
             with open(income_stmt_file, 'r') as f:
                 income_stmt = f.read()
+            # Parse financial data
+            income_data = self.parse_financial_data(income_stmt)
+            balance_data = self.parse_financial_data(balance_sheet)
+            # Extract key metrics
+            metrics = self.extract_metrics(income_data, balance_data)
+            # Generate and get analysis
+            prompt = self.generate_analysis_prompt(metrics)
+            analysis = self.generate_analysis(prompt)
             # Prepare results
             results = {
                 "Financial Analysis": {
                     "Key Metrics": metrics,
+                    "AI Insights": analysis,
                     "Analysis Period": "2021-2025",
                     "Note": "All monetary values in millions ($M)"
                 }
             return json.dumps(results, indent=2)
         except Exception as e:
+            return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"
 def create_interface():
+    analyzer = FinancialAnalyzer()
     iface = gr.Interface(
         fn=analyzer.analyze_financials,
             gr.File(label="Income Statement (Markdown)", type="filepath")
         ],
         outputs=gr.Textbox(label="Analysis Results", lines=25),
+        title="Financial Statement Analyzer",
+        description="Upload financial statements in Markdown format for AI-powered analysis"
     )
     return iface