Spaces:

mgmtprofessor
/

risk_factors_scoring

Sleeping

mgmtprofessor commited on Oct 17, 2024

Commit

fa4a747

verified ·

1 Parent(s): d428815

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from transformers import AutoModelForSequenceClassification, AutoTokenizer
 # Set up Streamlit app
 st.title("An App to Score Firm-Generated Text on Eight Risk Factors")
 st.write("Note: You can either upload a CSV file or a single TXT file for scoring.")
-st.write("If uploading a CSV file, ensure that it contains the following columns: cik, fyear, Item 1A. Item 1A should contain the respective risk factors section for each firm-year observation.")
 st.write("If uploading a txt file, ensure it contains the respective risk factors section for each firm-year observation.")
 # Hugging Face model directories
 model_directories = {
@@ -63,7 +63,7 @@ def score_document(model, tokenizer, text_data):
 # Function to find the relevant text column
 def get_text_column(df):
-    possible_columns = ['Item 1A', 'Item 1A.', 'Item 1A. Risk Factors']
     for col in possible_columns:
         if col in df.columns:
             return col
@@ -87,7 +87,7 @@ if file_type == "CSV":
         text_column = get_text_column(df)
         if text_column is None:
-            st.error("No valid text column found. Please ensure your CSV contains 'Item 1A', 'Item 1A.', or 'Item 1A. Risk Factors'.")
         else:
             # Extract text data from the identified column
             text_data = df[text_column].dropna().tolist()  # Extracts all non-empty rows

 # Set up Streamlit app
 st.title("An App to Score Firm-Generated Text on Eight Risk Factors")
 st.write("Note: You can either upload a CSV file or a single TXT file for scoring.")
+st.write("If uploading a CSV file, ensure that it contains the following columns: cik, fyear, Item 1A (or Text). Item 1A should contain the respective risk factors section for each firm-year observation.")
 st.write("If uploading a txt file, ensure it contains the respective risk factors section for each firm-year observation.")
 # Hugging Face model directories
 model_directories = {
 # Function to find the relevant text column
 def get_text_column(df):
+    possible_columns = ['Item 1A', 'Item 1A.', 'Item 1A. Risk Factors', 'text', 'Text']
     for col in possible_columns:
         if col in df.columns:
             return col
         text_column = get_text_column(df)
         if text_column is None:
+            st.error("No valid text column found. Please ensure your CSV contains 'Item 1A', 'Item 1A.', 'Item 1A. Risk Factors', 'Text', or 'text'.")
         else:
             # Extract text data from the identified column
             text_data = df[text_column].dropna().tolist()  # Extracts all non-empty rows