cfpb-assistant / app.py
david-oplatka's picture
Add Population Data Tables
b1222b7
import os
import streamlit as st
from st_app import launch_bot
import nest_asyncio
import asyncio
import uuid
import sqlite3
from datasets import load_dataset
# Setup for HTTP API Calls to Amplitude Analytics
if 'device_id' not in st.session_state:
st.session_state.device_id = str(uuid.uuid4())
if "feedback_key" not in st.session_state:
st.session_state.feedback_key = 0
def setup_db():
db_path = 'cfpb_database.db'
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
with st.spinner("Loading data... Please wait..."):
def table_populated() -> bool:
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='cfpb_complaints'")
result = cursor.fetchone()
if not result:
return False
return True
if table_populated():
print("Database table already populated, skipping setup")
conn.close()
return
else:
print("Populating database table")
# Execute the SQL commands to create the database table
with open('create_table.sql', 'r') as sql_file:
sql_script = sql_file.read()
cursor.executescript(sql_script)
hf_token = os.getenv('HF_TOKEN')
# Load data into cfpb_complaints table
df = load_dataset("vectara/cfpb-complaints", data_files="cfpb_complaints.csv", token=hf_token)['train'].to_pandas()
df.to_sql('cfpb_complaints', conn, if_exists='replace', index=False)
df = load_dataset("vectara/cfpb-complaints", data_files="cfpb_county_populations.csv", token=hf_token)['train'].to_pandas()
df.to_sql('cfpb_county_populations', conn, if_exists='replace', index=False)
df = load_dataset("vectara/cfpb-complaints", data_files="cfpb_zip_to_county.csv", token=hf_token)['train'].to_pandas()
df.to_sql('cfpb_zip_to_county', conn, if_exists='replace', index=False)
# Commit changes and close connection
conn.commit()
conn.close()
if __name__ == "__main__":
st.set_page_config(page_title="CFPB Complaints Assistant", layout="wide")
setup_db()
nest_asyncio.apply()
asyncio.run(launch_bot())