Spaces:

walaa2022
/

financial_analysis

Sleeping

App Files Files Community

walaa2022 commited on Nov 30, 2024

Commit

b2501de

verified ·

1 Parent(s): 0972f64

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -21

app.py CHANGED Viewed

@@ -1,4 +1,7 @@
 import gradio as gr
 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
@@ -6,9 +9,6 @@ from transformers import (
     T5ForConditionalGeneration,
     T5Tokenizer
 )
-import torch
-import pandas as pd
-import json
 class FinancialAnalyzer:
     def __init__(self):
@@ -22,10 +22,6 @@ class FinancialAnalyzer:
             self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
             self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
-            # Initialize T5
-            self.t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
-            self.t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
             self.device = "cpu"
             self._move_models_to_device()
             print("Models loaded successfully!")
@@ -36,10 +32,8 @@ class FinancialAnalyzer:
     def _move_models_to_device(self):
         self.tiny_model.to(self.device)
         self.finbert_model.to(self.device)
-        self.t5_model.to(self.device)
     def read_file_content(self, file_path):
-        """Read and process uploaded file content"""
         if file_path is None:
             return "No file uploaded"
@@ -59,7 +53,6 @@ class FinancialAnalyzer:
             return f"Error processing file: {str(e)}"
     def analyze_financial_data(self, balance_sheet_path, income_statement_path):
-        """Analyze uploaded financial statements"""
         try:
             # Read file contents
             balance_sheet = self.read_file_content(balance_sheet_path)
@@ -72,10 +65,10 @@ class FinancialAnalyzer:
             prompt = f"""<human>Analyze these financial statements:
             Balance Sheet:
-            {balance_sheet[:1000]}
             Income Statement:
-            {income_statement[:1000]}
             Provide:
             1. Key financial metrics
@@ -85,16 +78,25 @@ class FinancialAnalyzer:
             <assistant>Here's my analysis:"""
-            # Generate analysis using TinyLlama
-            inputs = self.tiny_tokenizer(prompt, return_tensors="pt", max_new_tokens=1024, truncation=True)
             outputs = self.tiny_model.generate(
                 inputs["input_ids"],
-                max_length=1024,
                 temperature=0.7,
                 top_p=0.95,
-                do_sample=True,
-                pad_token_id=self.tiny_tokenizer.eos_token_id
             )
             analysis = self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
             # Generate sentiment
@@ -103,7 +105,8 @@ class FinancialAnalyzer:
             # Format results
             results = {
                 "Analysis": analysis,
-                "Sentiment": sentiment
             }
             return json.dumps(results, indent=2)
@@ -114,10 +117,17 @@ class FinancialAnalyzer:
     def analyze_sentiment(self, balance_sheet, income_statement):
         try:
             text = f"{balance_sheet[:500]}\n{income_statement[:500]}"
-            inputs = self.finbert_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
             outputs = self.finbert_model(**inputs)
             probs = torch.nn.functional.softmax(outputs.logits, dim=1)
             labels = ['negative', 'neutral', 'positive']
             return {
                 'sentiment': labels[probs.argmax().item()],
                 'confidence': f"{probs.max().item():.2f}"
@@ -133,11 +143,11 @@ def create_interface():
         inputs=[
             gr.File(
                 label="Upload Balance Sheet (CSV, Excel, or Markdown)",
-                type="filepath"  # Changed from 'file' to 'filepath'
             ),
             gr.File(
                 label="Upload Income Statement (CSV, Excel, or Markdown)",
-                type="filepath"  # Changed from 'file' to 'filepath'
             )
         ],
         outputs=gr.Textbox(

 import gradio as gr
+import torch
+import pandas as pd
+import json
 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
     T5ForConditionalGeneration,
     T5Tokenizer
 )
 class FinancialAnalyzer:
     def __init__(self):
             self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
             self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
             self.device = "cpu"
             self._move_models_to_device()
             print("Models loaded successfully!")
     def _move_models_to_device(self):
         self.tiny_model.to(self.device)
         self.finbert_model.to(self.device)
     def read_file_content(self, file_path):
         if file_path is None:
             return "No file uploaded"
             return f"Error processing file: {str(e)}"
     def analyze_financial_data(self, balance_sheet_path, income_statement_path):
         try:
             # Read file contents
             balance_sheet = self.read_file_content(balance_sheet_path)
             prompt = f"""<human>Analyze these financial statements:
             Balance Sheet:
+            {balance_sheet[:800]}
             Income Statement:
+            {income_statement[:800]}
             Provide:
             1. Key financial metrics
             <assistant>Here's my analysis:"""
+            # Tokenize input
+            inputs = self.tiny_tokenizer(
+                prompt,
+                return_tensors="pt",
+                truncation=True,
+                max_length=2048
+            ).to(self.device)
+            # Generate response
             outputs = self.tiny_model.generate(
                 inputs["input_ids"],
+                do_sample=True,
                 temperature=0.7,
                 top_p=0.95,
+                repetition_penalty=1.2,
+                max_length=2048  # Total length including input
             )
+            # Decode response
             analysis = self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
             # Generate sentiment
             # Format results
             results = {
                 "Analysis": analysis,
+                "Sentiment": sentiment,
+                "Note": "Analysis based on financial statements from 2021-2025"
             }
             return json.dumps(results, indent=2)
     def analyze_sentiment(self, balance_sheet, income_statement):
         try:
             text = f"{balance_sheet[:500]}\n{income_statement[:500]}"
+            inputs = self.finbert_tokenizer(
+                text,
+                return_tensors="pt",
+                truncation=True,
+                max_length=512
+            ).to(self.device)
             outputs = self.finbert_model(**inputs)
             probs = torch.nn.functional.softmax(outputs.logits, dim=1)
             labels = ['negative', 'neutral', 'positive']
             return {
                 'sentiment': labels[probs.argmax().item()],
                 'confidence': f"{probs.max().item():.2f}"
         inputs=[
             gr.File(
                 label="Upload Balance Sheet (CSV, Excel, or Markdown)",
+                type="filepath"
             ),
             gr.File(
                 label="Upload Income Statement (CSV, Excel, or Markdown)",
+                type="filepath"
             )
         ],
         outputs=gr.Textbox(