Spaces:

walaa2022
/

financial_analysis

Sleeping

App Files Files Community

walaa2022 commited on Dec 1, 2024

Commit

5bc4f16

verified ·

1 Parent(s): 8dae603

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -145

app.py CHANGED Viewed

@@ -1,174 +1,156 @@
 import gradio as gr
 import pandas as pd
-import numpy as np
 import json
-import re
-from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
 import torch
-class FinancialAnalyzer:
     def __init__(self):
-        print("Initializing Financial Analyzer...")
-        self.initialize_models()
-    def initialize_models(self):
-        print("Loading models...")
-        self.tiny_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-        self.tiny_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-        self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
-        self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
-        print("Models loaded successfully!")
-    def parse_markdown_table(self, markdown_content):
-        """Parse markdown table into pandas DataFrame"""
-        # Split content into lines
-        lines = markdown_content.strip().split('\n')
-        # Find table start (line with |)
-        table_lines = []
-        headers = None
-        current_table = []
         for line in lines:
-            if '|' in line:
-                # Skip separator lines (contains ---)
                 if '-|-' in line:
                     continue
-                # Clean and split the line
-                row = [cell.strip() for cell in line.split('|')[1:-1]]
-                if headers is None:
-                    headers = row
                 else:
-                    current_table.append(row)
-        # Create DataFrame
-        df = pd.DataFrame(current_table, columns=headers)
-        return df
-    def extract_financial_data(self, markdown_content):
-        """Convert markdown content to a structured text format"""
-        # Remove markdown formatting
-        clean_text = markdown_content.replace('#', '').replace('*', '')
-        # Extract tables
-        tables = {}
-        current_section = "General"
-        for line in clean_text.split('\n'):
-            if line.strip() and not line.startswith('|'):
-                current_section = line.strip()
-            elif '|' in line:
-                if current_section not in tables:
-                    tables[current_section] = []
-                tables[current_section].append(line)
-        # Convert to text format
-        structured_text = []
-        for section, content in tables.items():
-            structured_text.append(f"\n{section}:")
-            if content:
-                df = self.parse_markdown_table('\n'.join(content))
-                structured_text.append(df.to_string())
-        return '\n'.join(structured_text)
     def analyze_financials(self, balance_sheet_file, income_stmt_file):
         """Main analysis function"""
         try:
-            # Read markdown files
             with open(balance_sheet_file, 'r') as f:
-                balance_sheet_content = f.read()
             with open(income_stmt_file, 'r') as f:
-                income_stmt_content = f.read()
-            # Convert to structured text
-            structured_balance = self.extract_financial_data(balance_sheet_content)
-            structured_income = self.extract_financial_data(income_stmt_content)
-            # Create analysis prompt
-            prompt = f"""<human>Please analyze these financial statements and provide detailed insights:
-Financial Statements Analysis (2021-2025)
-Balance Sheet Summary:
-{structured_balance}
-Income Statement Summary:
-{structured_income}
-Please provide a detailed analysis including:
-1. Financial Health Assessment
-   - Liquidity position
-   - Capital structure
-   - Asset efficiency
-2. Profitability Analysis
-   - Revenue trends
-   - Cost management
-   - Profit margins
-3. Growth Analysis
-   - Year-over-year growth rates
-   - Market position
-   - Future growth potential
-4. Risk Assessment
-   - Operating risks
-   - Financial risks
-   - Strategic risks
-5. Recommendations
-   - Short-term actions
-   - Medium-term strategy
-   - Long-term planning
-6. Future Outlook
-   - Market conditions
-   - Company positioning
-   - Growth opportunities</human>"""
-            # Generate AI analysis
-            inputs = self.tiny_tokenizer(prompt, return_tensors="pt", truncation=True)
-            outputs = self.tiny_model.generate(
-                inputs["input_ids"],
-                max_new_tokens=1024,  # Generate up to 1024 new tokens
-                temperature=0.7,
-                top_p=0.95,
-                do_sample=True,
-                pad_token_id=self.tiny_tokenizer.eos_token_id,
-                repetition_penalty=1.2 )
-            analysis = self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
-            # Generate sentiment
-            sentiment = self.analyze_sentiment(structured_balance + structured_income)
-            # Compile results
             results = {
-                "Financial Analysis": analysis,
-                "Sentiment Analysis": sentiment,
-                "Analysis Period": "2021-2025",
-                "Note": "All values in millions ($M)"
             }
             return json.dumps(results, indent=2)
         except Exception as e:
-            return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"
-    def analyze_sentiment(self, text):
-        inputs = self.finbert_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
-        outputs = self.finbert_model(**inputs)
-        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
-        sentiment_labels = ['negative', 'neutral', 'positive']
-        return {
-            'sentiment': sentiment_labels[probs.argmax().item()],
-            'confidence': f"{probs.max().item():.2f}"
-        }
 def create_interface():
-    analyzer = FinancialAnalyzer()
     iface = gr.Interface(
         fn=analyzer.analyze_financials,
@@ -177,8 +159,8 @@ def create_interface():
             gr.File(label="Income Statement (Markdown)", type="filepath")
         ],
         outputs=gr.Textbox(label="Analysis Results", lines=25),
-        title="Financial Statement Analyzer",
-        description="Upload financial statements in Markdown format for comprehensive AI-powered analysis."
     )
     return iface

 import gradio as gr
 import pandas as pd
 import json
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+class FastFinancialAnalyzer:
     def __init__(self):
+        print("Initializing Analyzer...")
+        self.initialize_model()
+        print("Initialization complete!")
+    def initialize_model(self):
+        """Initialize TinyLlama model"""
+        self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+        self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+        self.model.eval()  # Set to evaluation mode
+    def parse_markdown_table(self, content, section_name=""):
+        """Extract data from markdown table"""
+        data = {}
+        lines = content.split('\n')
+        headers = []
+        current_section = section_name
         for line in lines:
+            if line.startswith('##'):
+                current_section = line.strip('#').strip()
+            elif '|' in line:
+                # Skip separator lines
                 if '-|-' in line:
                     continue
+                # Process table rows
+                cells = [cell.strip() for cell in line.split('|')[1:-1]]
+                if not headers:
+                    headers = cells
                 else:
+                    if len(cells) == len(headers):
+                        row_data = dict(zip(headers, cells))
+                        key = row_data.get(headers[0], "").strip()
+                        if key:
+                            data[key] = row_data
+        return {current_section: data}
+    def clean_number(self, value):
+        """Clean numerical values"""
+        if isinstance(value, str):
+            value = value.replace(',', '').replace('$', '').replace('(', '-').replace(')', '')
+            value = value.strip()
+        try:
+            return float(value)
+        except:
+            return 0.0
+    def extract_key_metrics(self, income_data, balance_data):
+        """Extract key financial metrics"""
+        metrics = {
+            "Revenue": {
+                "2025": self.clean_number(income_data.get("Total Net Revenue", {}).get("2025", "0")),
+                "2021": self.clean_number(income_data.get("Total Net Revenue", {}).get("2021", "0"))
+            },
+            "Profit": {
+                "2025": self.clean_number(income_data.get("Net Earnings", {}).get("2025", "0")),
+                "2021": self.clean_number(income_data.get("Net Earnings", {}).get("2021", "0"))
+            },
+            "Assets": {
+                "2025": self.clean_number(balance_data.get("Total Assets", {}).get("2025", "0")),
+                "2021": self.clean_number(balance_data.get("Total Assets", {}).get("2021", "0"))
+            }
+        }
+        return metrics
+    def generate_analysis_prompt(self, metrics):
+        """Create focused analysis prompt"""
+        return f"""<human>Analyze these financial metrics and provide insights:
+Key Performance Indicators (in millions):
+1. Revenue:
+   - 2025: ${metrics['Revenue']['2025']:.1f}M
+   - 2021: ${metrics['Revenue']['2021']:.1f}M
+   - Growth: {((metrics['Revenue']['2025'] - metrics['Revenue']['2021']) / metrics['Revenue']['2021'] * 100):.1f}%
+2. Net Profit:
+   - 2025: ${metrics['Profit']['2025']:.1f}M
+   - 2021: ${metrics['Profit']['2021']:.1f}M
+   - Margin 2025: {(metrics['Profit']['2025'] / metrics['Revenue']['2025'] * 100):.1f}%
+3. Asset Utilization:
+   - 2025: ${metrics['Assets']['2025']:.1f}M
+   - 2021: ${metrics['Assets']['2021']:.1f}M
+   - Growth: {((metrics['Assets']['2025'] - metrics['Assets']['2021']) / metrics['Assets']['2021'] * 100):.1f}%
+Provide:
+1. Performance Assessment
+2. Key Strengths and Concerns
+3. Strategic Recommendations</human>"""
+    def generate_analysis(self, prompt):
+        """Generate analysis using TinyLlama"""
+        try:
+            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500)
+            outputs = self.model.generate(
+                inputs["input_ids"],
+                max_new_tokens=500,
+                temperature=0.7,
+                top_p=0.9,
+                do_sample=True,
+                pad_token_id=self.tokenizer.eos_token_id,
+                no_repeat_ngram_size=3
+            )
+            return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        except Exception as e:
+            return f"Error generating analysis: {str(e)}"
     def analyze_financials(self, balance_sheet_file, income_stmt_file):
         """Main analysis function"""
         try:
+            # Read files
             with open(balance_sheet_file, 'r') as f:
+                balance_sheet = f.read()
             with open(income_stmt_file, 'r') as f:
+                income_stmt = f.read()
+            # Parse data
+            income_data = self.parse_markdown_table(income_stmt, "Income Statement")
+            balance_data = self.parse_markdown_table(balance_sheet, "Balance Sheet")
+            # Extract metrics
+            metrics = self.extract_key_metrics(income_data.get("Income Statement", {}),
+                                            balance_data.get("Balance Sheet", {}))
+            # Generate analysis
+            analysis_prompt = self.generate_analysis_prompt(metrics)
+            analysis = self.generate_analysis(analysis_prompt)
+            # Prepare results
             results = {
+                "Financial Analysis": {
+                    "Key Metrics": metrics,
+                    "AI Analysis": analysis.split("<human>")[-1].strip(),
+                    "Analysis Period": "2021-2025",
+                    "Note": "All monetary values in millions ($M)"
+                }
             }
             return json.dumps(results, indent=2)
         except Exception as e:
+            return f"Error in analysis: {str(e)}"
 def create_interface():
+    analyzer = FastFinancialAnalyzer()
     iface = gr.Interface(
         fn=analyzer.analyze_financials,
             gr.File(label="Income Statement (Markdown)", type="filepath")
         ],
         outputs=gr.Textbox(label="Analysis Results", lines=25),
+        title="Fast Financial Statement Analyzer",
+        description="Upload financial statements in Markdown format for quick AI-powered analysis"
     )
     return iface