""" 1. 启动: 1. 启动streamlit虚拟环境: `cd "/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter" source streamlit/bin/activate` 1. 进入到代码所在的目录: `cd /Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/gradio/中交建/产品演示DEMO/交付_Qwen数据分析` `python3.11 -m streamlit run app.py` 1. working! 用自己的方式实现了一个Qwen的数据分析代码。 1. working!在执行streamlit run命令前,先执行python add_fonts.py。这样可以确保中文字体被加载到当前的python执行环境中。 1. working!测试可以在局域网中访问。 1. Working! Regenerate按键可以重新生成答案。这里需要注意regenerate函数的设定(需要打印messages中的内容来确认),以及main()函数的启动位置。 1. 多个阶段实现这一功能: 1. 根据用户的prompt,用Qwen生成代码。 1. 根据生成的代码,用Python的exec等模块在本地执行代码。 1. 将所有的内容放入hist_message中,然后用Qwen生成结论。 1. 变量名尽量用''引号括起来,避免出现错误。 1. plt.rcParams['font.sans-serif'] = ['Microsoft YaHei UI'] ## 在qwen数据分析模块中,这里最好用plt。 1. 需要把datafram的信息(如列名,数据类型)加入到prompt中,这样可以帮助用户更好的选择数据进行分析。 1. 在部署时,需要确认数据文件和图片的保存路径。 1. """ ##TODO: 1. re-submit button. # -*- coding: utf-8 -*- import requests import random # from keras.utils.np_utils import to_categorical # from keras.optimizers import SGD # from keras.layers import Dense, Activation, LSTM, Dropout, SimpleRNN, SimpleRNNCell # from keras.models import Sequential # import keras from sklearn.preprocessing import StandardScaler import numpy as np import pandas as pd import matplotlib.pyplot as plt from dateutil.relativedelta import relativedelta from scipy.optimize import minimize import statsmodels.formula.api as smf import statsmodels.tsa.api as smt import statsmodels.api as sm import scipy.stats as scs from itertools import product from tqdm import tqdm_notebook, tqdm, trange import time import pretty_errors import seaborn as sns import sklearn from matplotlib.pyplot import style from sklearn.metrics import r2_score, median_absolute_error, mean_absolute_error from sklearn.metrics import median_absolute_error, mean_squared_error, mean_squared_log_error from sklearn.linear_model import LinearRegression from sklearn.model_selection import cross_val_score from sklearn.model_selection import TimeSeriesSplit import matplotlib.pylab as plt import warnings import re from re import sub import smtplib import io import os import contextlib import streamlit as st import streamlit_authenticator as stauth import random from http import HTTPStatus import dashscope from io import StringIO from PIL import Image import add_fonts import tempfile from tempfile import NamedTemporaryFile warnings.filterwarnings('ignore') ## get the current time from datetime import datetime from pytz import timezone def get_current_time(): beijing_tz = timezone('Asia/Shanghai') beijing_time = datetime.now(beijing_tz) current_time = beijing_time.strftime('%H:%M:%S') return current_time ### 参考: from dotenv import load_dotenv load_dotenv() ### 设置openai的API key dashscope.api_key = os.environ['dashscope_api_key'] bing_search_api_key = os.environ['bing_api_key'] # print('dashscope api key:', dashscope.api_key) ## export DASHSCOPE_API_KEY="sk-948adb3e65414e55961a9ad9d22d186b" # dashscope.api_key = "sk-948adb3e65414e55961a9ad9d22d186b" ### make it look nice from the start st.set_page_config(layout='wide', initial_sidebar_state='auto', page_icon="🤖", page_title="本地化大模型智能数据分析演示") ## layout settings. st.title("本地化国产大模型智能数据分析演示") st.subheader("Artificial Intelligence Data Analysis Center for Professionals") # st.markdown("_声明:内容由人工智能生成,仅供参考。如果您本人使用或对外传播本服务生成的输出,您应当主动核查输出内容的真实性、准确性,避免传播虚假信息。_") st.warning('本站点无法准确输出中文变量的图表,本地化大模型中图表可以正常显示中文。') file_path = f"." # file_path = f"/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/gradio/中交建/产品演示DEMO/交付_Qwen数据分析" ### 环境设定 my_orginal_question = "" hist_message = [] # messages = [{'role': 'system', 'content': 'You are a helpful assistant.'}, # {'role': 'user', 'content': ""}] current_time = get_current_time() rand_num = str(current_time) + str(random.randint(1, 10000)) # data_show = st.empty() ### authentication with a local yaml file. import yaml from yaml.loader import SafeLoader with open('./config.yaml') as file: config = yaml.load(file, Loader=SafeLoader) authenticator = stauth.Authenticate( config['credentials'], config['cookie']['name'], config['cookie']['key'], config['cookie']['expiry_days'], config['preauthorized'] ) # authentication with a remove cloud-based database. # 导入云端用户数据库。 # DETA_KEY = "c0zegv33efm_4MBTaoQAn76GzUfsZeKV64Uh9qMY3WZb" # load_dotenv(".env") # DETA_KEY = os.getenv("DETA_KEY") # print(DETA_KEY) # deta = Deta(DETA_KEY) # mybase is the name of the database in Deta. You can change it to any name you want. # credentials = {"usernames":{}} # # credentials = {"users": {}} # # db = db() # users = [] # email = [] # passwords = [] # names = [] # for row in db.fetch_all_users(): # users.append(row["username"]) # email.append(row["email"]) # names.append(row["key"]) # passwords.append(row["password"]) # hashed_passwords = stauth.Hasher(passwords).generate() ## 需要严格的按照yaml文件的格式来定义如下几个字段。 # for un, name, pw in zip(users, names, hashed_passwords): # # user_dict = {"name":name,"password":pw} # user_dict = {"name": un, "password": pw} # # credentials["usernames"].update({un:user_dict}) # credentials["usernames"].update({un: user_dict}) # ## sign-up模块,未完成。 # database_table = [] # # print(pd.DataFrame(credentials)) # for i in credentials['usernames'].keys(): # # print("i:",i) # # print("name",credentials['usernames'][i]['name']) # # print("password",credentials['usernames'][i]['password']) # database_table.append([i,credentials['usernames'][i]['name'],credentials['usernames'][i]['password']]) # print("database_table:",database_table) # authenticator = stauth.Authenticate( # credentials=credentials, cookie_name="joeshi_gpt", key='abcedefg', cookie_expiry_days=30) user, authentication_status, username = authenticator.login('用户登录', 'main') # print("name", name, "username", username) # ## sign-up widget,未完成。 # try: # if authenticator.register_user('新用户注册', preauthorization=False): # # for list in database_table: # # db.update_user(username=list[0], name=list[1], password=list[2]) # db.update_user(username=list[-1][0], name=list[-1][1], password=list[-1][2]) # # st.success('User registered successfully') # st.success('注册成功!') # except Exception as e: # st.error(e) # ## clear conversion. def reset_all(): # st.session_state.conversation = None # st.session_state.chat_history = None # st.session_state.messages = [] # # st.session_state.messages # message_placeholder = st.empty() # data_show = st.empty() # upload_file() # uploaded_file = None st.rerun() return None ##TODO:在带有聊天历史的情况下,重新整理这个模块。 def regenerate(): ## 因为可能没有历史,第一次的时候,所以需要处理异常。 try: print('----'*25) print('st.session_state.messages: ', st.session_state.messages) print('----'*25) # st.session_state.messages = st.session_state.messages[0] # st.session_state.messages # message_placeholder = st.empty() main(st.session_state.messages[0]['content']) except Exception as e: print('Error:', e) pass return None ### 对长传数据进行描述性统计,获得列表名 def dataframe_describe(df): df = pd.DataFrame(df) return pd.DataFrame({'column_name': df.columns, 'data_type': df.dtypes.values}) if authentication_status: with st.sidebar: st.markdown( """