shrut27's picture
Update app.py
6462b67 verified
from jarvis.db.figshare import data
import pandas as pd
import streamlit as st
import os
from openai import OpenAI
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# Retrieve API key from environment variable
api_key = os.getenv("OPENAI_API_KEY")
if api_key is None:
st.error("API key not found. Make sure to set the OPENAI_API_KEY environment variable.")
raise ValueError("API key not found")
client = OpenAI(api_key=api_key)
@st.cache_data()
def load_data():
dataset = data(dataset="dft_3d")
return pd.DataFrame(dataset)
def process_materials_data(query, df, target_property, threshold, optimization_goal):
if df.empty:
return "No data available for analysis."
# Data Interpretation and Recommendations
summary = df.describe().to_dict()
recommendations_prompt = f"Based on the following summary statistics of {target_property}:\n{summary}, provide a summary of the results, key findings, and actionable recommendations based on the data and answer the user query {query} based on the dataset {df}."
try:
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a materials science assistant summarizing data insights and making recommendations."},
{"role": "user", "content": recommendations_prompt}
]
)
insights = response.choices[0].message.content.strip()
except Exception as e:
insights = f"Error generating recommendations: {e}"
# Trends Analysis and Candidates Suggestions
comparison_prompt = f"Analyze trends in the {target_property} data for the following materials:\n{df.head(13).to_dict()}. Identify promising candidates with {target_property} {optimization_goal} {threshold} and provide a detailed comparison of materials. Include the user query {query} answer as well and dont ask any follow up questions"
try:
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a materials scientist analyzing trends and suggesting promising materials."},
{"role": "user", "content": comparison_prompt}
]
)
trends = response.choices[0].message.content.strip()
except Exception as e:
trends = f"Error analyzing trends: {e}"
# Hypothesis Generation
hypothesis_prompt = f"Generate a hypothesis based on trends in the {target_property} data:\n{df.head(5).to_dict()} based on the user query {query} and also suggest possible material combinations which could improve the performance of material. Don't ask any follow up questions"
try:
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a research assistant generating hypotheses based on material data."},
{"role": "user", "content": hypothesis_prompt}
]
)
hypothesis = response.choices[0].message.content.strip()
except Exception as e:
hypothesis = f"Error generating hypothesis: {e}"
# Research Report Based on Query and Findings
report = f"""
# Research Report for Query: {query.capitalize()}
## 1. Findings from the Query:
Based on the user query **'{query}'**, the following key findings and insights were obtained from the dataset:
- **Insights and Recommendations**:
{insights}
- **Trends and Comparisons**:
{trends}
## 2. Hypothesis:
Based on the analysis, the following hypothesis was generated:
{hypothesis}
## 3. Recommendations for Further Research:
It is suggested to explore the materials with higher or lower values of {target_property}, considering the trends identified, to optimize the material properties for the query query {query}.
"""
return insights, trends, hypothesis, report
st.title("Piezoelectric Material Discovery Assistant")
dataset = load_data()
query = st.text_input("Enter your query (e.g., 'Find materials with high piezoelectric constants'):")
df = pd.DataFrame(dataset)
target_property = st.selectbox("Select a property to analyze:", ["dfpt_piezo_max_dij", "optb88vdw_bandgap", "density","min_ir_mode","max_ir_mode"])
threshold = st.number_input("Threshold value:", min_value=0.0, value=1.0)
optimization_goal = st.selectbox("Optimization goal:", ["above", "below"])
if st.button("Analyze Data"):
filtered_df = df[df[target_property].notnull()]
insights, trends, hypothesis, report = process_materials_data(query, filtered_df, target_property, threshold, optimization_goal)
st.write("### Insights and Recommendations")
st.write(insights)
st.write("### Trends and Promising Candidates")
st.write(trends)
st.write("### Generated Hypothesis")
st.write(hypothesis)
st.write("### Research Report")
st.download_button("Download Research Report", report, file_name="research_report.txt")