Spaces:

chkla
/

PromptCardsPlayground

Sleeping

App Files Files Community

PromptCardsPlayground / app.py

chkla

Rename app_anno.py to app.py

419ec10 over 1 year ago

raw

history blame

6.99 kB

	import pandas as pd
	import streamlit as st
	from langchain import PromptTemplate, HuggingFaceHub, LLMChain
	from langchain.llms import OpenAI
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import os
	import re


	def extract_positive_negative(text):
	pattern = r'\b(?:positive\|negative)\b'
	result = re.findall(pattern, text)
	return result

	def classify_text(text, llm_chain, api):
	if api == "HuggingFace":
	classification = llm_chain.run(str(text))
	elif api == "OpenAI":
	classification = llm_chain.run(str(text))
	classification = re.sub(r'\s', '', classification)
	return classification.lower()

	def classify_csv(df, llm_chain, api):
	df["label_gold"] = df["label"]
	del df["label"]
	df["label_pred"] = df["text"].apply(classify_text, llm_chain=llm_chain, api=api)
	return df

	def classify_csv_zero(zero_file, llm_chain, api):
	df = pd.read_csv(zero_file, sep=';')
	df["label"] = df["text"].apply(classify_text, llm_chain=llm_chain, api=api)
	return df

	def evaluate_performance(df):
	merged_df = df
	correct_preds = sum(merged_df["label_gold"] == merged_df["label_pred"])
	total_preds = len(merged_df)
	percentage_overlap = correct_preds / total_preds * 100

	return percentage_overlap

	def display_home():
	st.write("Please select an API and a model to classify the text. We currently support HuggingFace and OpenAI.")
	api = st.selectbox("Select an API", ["HuggingFace", "OpenAI"])

	if api == "HuggingFace":
	model = st.selectbox("Select a model", ["google/flan-t5-xl", "databricks/dolly-v1-6b"])
	api_key_hug = st.text_input("HuggingFace API Key")
	elif api == "OpenAI":
	model = None
	api_key_openai = st.text_input("OpenAI API Key")

	st.write("Please select a temperature for the model. The higher the temperature, the more creative the model will be.")
	temperature = st.slider("Set the temperature", min_value=0.0, max_value=1.0, value=0.0, step=0.01)

	st.write("We provide two different setups for the annotation task. In the first setup (Test), you can upload a CSV file with gold labels and evaluate the performance of the model. In the second setup (Zero-Shot), you can upload a CSV file without gold labels and use the model to classify the text.")
	setup = st.selectbox("Setup", ["Test", "Zero-Shot"])

	if setup == "Test":
	gold_file = st.file_uploader("Upload Gold Labels CSV file with a text and a label column", type=["csv"])
	elif setup == "Zero-Shot":
	gold_file = None
	zero_file = st.file_uploader("Upload CSV file with a text column", type=["csv"])

	st.write("Please enter the prompt template below. You can use the following variables: {text} (text to classify).")
	prompt_template = st.text_area("Enter your task description", """Instruction: Identify the sentiment of a text. Please read the text and provide one of these responses: "positive" or "negative".\nText to classify in "positive" or "negative": {text}\nAnswer:""", height=200)

	classify_button = st.button("Run Classification/ Annotation")

	if classify_button:
	if prompt_template:
	prompt = PromptTemplate(
	template=prompt_template,
	input_variables=["text"]
	)

	if api == "HuggingFace":
	if api_key_hug:
	os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key_hug
	llm_chain = LLMChain(prompt=prompt, llm=HuggingFaceHub(repo_id=model, model_kwargs={"temperature": temperature, "max_length": 128}))
	elif not api_key_hug:
	st.warning("Please enter your HuggingFace API key to classify the text.")
	elif api == "OpenAI":
	if api_key_openai:
	os.environ["OPENAI_API_KEY"] = api_key_openai
	llm_chain = LLMChain(prompt=prompt, llm=OpenAI(temperature=temperature))
	elif not api_key_openai:
	st.warning("Please enter your OpenAI API key to classify the text.")

	if setup == "Zero-Shot":
	if zero_file is not None:
	df_predicted = classify_csv_zero(zero_file, llm_chain, api)
	st.write(df_predicted)
	st.download_button(
	label="Download CSV",
	data=df_predicted.to_csv(index=False),
	file_name="classified_zero-shot_data.csv",
	mime="text/csv"
	)
	elif setup == "Test":
	if gold_file is not None:
	df = pd.read_csv(gold_file, sep=';')
	if "label" not in df.columns:
	st.warning("Please make sure that the gold labels CSV file contains a column named 'label'.")
	else:
	df = classify_csv(df, llm_chain, api)
	st.write(df)
	st.download_button(
	label="Download CSV",
	data=df.to_csv(index=False),
	file_name="classified_test_data.csv",
	mime="text/csv"
	)
	percentage_overlap = evaluate_performance(df)
	st.write("Performance Evaluation")
	st.write(f"Percentage overlap between gold labels and predicted labels: {percentage_overlap:.2f}%")
	elif gold_file is None:
	st.warning("Please upload a gold labels CSV file to evaluate the performance of the model.")
	elif not prompt:
	st.warning("Please enter a prompt question to classify the text.")

	def main():
	st.set_page_config(page_title="PromptCards Playground", page_icon=":pencil2:")
	st.title("AInnotator")

	# add a menu to the sidebar
	if "current_page" not in st.session_state:
	st.session_state.current_page = "homepage"

	# Initialize selected_prompt in session_state if not set
	if "selected_prompt" not in st.session_state:
	st.session_state.selected_prompt = ""

	# Add a menu
	menu = ["Homepage", "Playground", "Prompt Archive", "Annotator", "About"]
	st.sidebar.title("About")
	st.sidebar.write("AInnotator 🤖🏷️ is a tool for creating artificial labels/ annotations. It is based on the concept of PromptCards, which are small, self-contained descriptions of a task that can be used to generate labels for a wide range of NLP tasks. Check out the GitHub repository and the PromptCards Archive for more information.")
	st.sidebar.write("---")
	st.sidebar.write("Check out the [PromptCards archive]() to find a wide range of prompts for different NLP tasks.")
	st.sidebar.write("---")
	st.sidebar.write("Made with ❤️ and 🤖.")

	display_home()

	if __name__ == "__main__":
	main()