cpv_test / appStore /classifier.py
leavoigt's picture
Update appStore/classifier.py
2163302
raw
history blame
5.19 kB
# set path
import glob, os, sys;
sys.path.append('../utils')
from setfit import SetFitModel
#import needed libraries
#import seaborn as sns
#import matplotlib.pyplot as plt
#import numpy as np
#import pandas as pd
#import streamlit as st
from utils.groups_classifier import load_groupsClassifier, groups_classification
#import logging
#logger = logging.getLogger(__name__)
from utils.config import get_classifier_params
#from utils.preprocessing import paraLengthCheck
#from io import BytesIO
#import xlsxwriter
#import plotly.express as px
vg_model = SetFitModel.from_pretrained("leavoigt/vulnerable_groups")
# Retrieve the necessary paramaters
classifier_identifier = 'group_classification'
params = get_classifier_params(classifier_identifier)
def app():
### Main app code ###
with st.container():
# Classify groups
df = group_classification(haystack_doc=df, threshold= params['threshold'])
def groups_display():
# if 'key1' in st.session_state:
# df = st.session_state.key1
# df['Action_check'] = df['Policy-Action Label'].apply(lambda x: True if 'Action' in x else False)
# hits = df[df['Action_check'] == True]
# # hits['GHG Label'] = hits['GHG Label'].apply(lambda i: _lab_dict[i])
# range_val = min(5,len(hits))
# if range_val !=0:
# count_action = len(hits)
# st.write("")
# st.markdown("###### Top few Action Classified paragraph/text results from list of {} classified paragraphs ######".format(count_action))
# st.markdown("""<hr style="height:10px;border:none;color:#097969;background-color:#097969;" /> """, unsafe_allow_html=True)
# range_val = min(5,len(hits))
# for i in range(range_val):
# # the page number reflects the page that contains the main paragraph
# # according to split limit, the overlapping part can be on a separate page
# st.write('**Result {}** : `page {}`, `Sector: {}`,\
# `Indicators: {}`, `Adapt-Mitig :{}`'\
# .format(i+1,
# hits.iloc[i]['page'], hits.iloc[i]['Sector Label'],
# hits.iloc[i]['Indicator Label'],hits.iloc[i]['Adapt-Mitig Label']))
# st.write("\t Text: \t{}".format(hits.iloc[i]['text'].replace("\n", " ")))
# hits = hits.reset_index(drop =True)
# st.write('----------------')
# st.write('Explore the data')
# st.write(hits)
# df.drop(columns = ['Action_check'],inplace=True)
# df_xlsx = to_excel(df)
# with st.sidebar:
# st.write('-------------')
# st.download_button(label='πŸ“₯ Download Result',
# data=df_xlsx ,
# file_name= 'cpu_analysis.xlsx')
# else:
# st.info("πŸ€” No Actions found")
# def groups_display():
# if 'key1' in st.session_state:
# df = st.session_state.key1
# df['Policy_check'] = df['Policy-Action Label'].apply(lambda x: True if 'Policies & Plans' in x else False)
# hits = df[df['Policy_check'] == True]
# # hits['GHG Label'] = hits['GHG Label'].apply(lambda i: _lab_dict[i])
# range_val = min(5,len(hits))
# if range_val !=0:
# count_policy = len(hits)
# st.write("")
# st.markdown("###### Top few Policy/Plans Classified paragraph/text results from list of {} classified paragraphs ######".format(count_policy))
# st.markdown("""<hr style="height:10px;border:none;color:#097969;background-color:#097969;" /> """, unsafe_allow_html=True)
# range_val = min(5,len(hits))
# for i in range(range_val):
# # the page number reflects the page that contains the main paragraph
# # according to split limit, the overlapping part can be on a separate page
# st.write('**Result {}** : `page {}`, `Sector: {}`,\
# `Indicators: {}`, `Adapt-Mitig :{}`'\
# .format(i+1,
# hits.iloc[i]['page'], hits.iloc[i]['Sector Label'],
# hits.iloc[i]['Indicator Label'],hits.iloc[i]['Adapt-Mitig Label']))
# st.write("\t Text: \t{}".format(hits.iloc[i]['text'].replace("\n", " ")))
# hits = hits.reset_index(drop =True)
# st.write('----------------')
# st.write('Explore the data')
# st.write(hits)
# df.drop(columns = ['Policy_check'],inplace=True)
# df_xlsx = to_excel(df)
# with st.sidebar:
# st.write('-------------')
# st.download_button(label='πŸ“₯ Download Result',
# data=df_xlsx ,
# file_name= 'vulnerable_groups.xlsx')
# else:
# st.info("πŸ€” No Groups found")