Spaces:

chkla
/

PromptCardsPlayground

Sleeping

File size: 7,043 Bytes

import pandas as pd
import streamlit as st
from langchain import PromptTemplate, HuggingFaceHub, LLMChain
from langchain.llms import OpenAI
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os
import re


def extract_positive_negative(text):
    pattern = r'\b(?:positive|negative)\b'
    result = re.findall(pattern, text)
    return result

def classify_text(text, llm_chain, api):
    if api == "HuggingFace":
        classification = llm_chain.run(str(text))
    elif api == "OpenAI":
        classification = llm_chain.run(str(text))
        classification = re.sub(r'\s', '', classification)
    return classification.lower()

def classify_csv(df, llm_chain, api):
    df["label_gold"] = df["label"]
    del df["label"]
    df["label_pred"] = df["text"].apply(classify_text, llm_chain=llm_chain, api=api)
    return df

def classify_csv_zero(zero_file, llm_chain, api):
    df = pd.read_csv(zero_file, sep=';')
    df["label"] = df["text"].apply(classify_text, llm_chain=llm_chain, api=api)
    return df

def evaluate_performance(df):
    merged_df = df
    correct_preds = sum(merged_df["label_gold"] == merged_df["label_pred"])
    total_preds = len(merged_df)
    percentage_overlap = correct_preds / total_preds * 100

    return percentage_overlap

def display_home():
    st.write("Please select an API and a model to classify the text. We currently support HuggingFace and OpenAI.")
    api = st.selectbox("Select an API", ["HuggingFace", "OpenAI"])

    if api == "HuggingFace":
        model = st.selectbox("Select a model", ["google/flan-t5-xl", "databricks/dolly-v1-6b"])
        api_key_hug = st.text_input("HuggingFace API Key")
    elif api == "OpenAI":
        model = None
        api_key_openai = st.text_input("OpenAI API Key")

    st.write("Please select a temperature for the model. The higher the temperature, the more creative the model will be.")
    temperature = st.slider("Set the temperature", min_value=0.0, max_value=1.0, value=0.0, step=0.01)

    st.write("We provide two different setups for the annotation task. In the first setup (**Test**), you can upload a CSV file with gold labels and evaluate the performance of the model. In the second setup (**Zero-Shot**), you can upload a CSV file without gold labels and use the model to classify the text.")
    setup = st.selectbox("Setup", ["Test", "Zero-Shot"])

    if setup == "Test":
        gold_file = st.file_uploader("Upload Gold Labels CSV file with a text and a label column", type=["csv"])
    elif setup == "Zero-Shot":
        gold_file = None
        zero_file = st.file_uploader("Upload CSV file with a text column", type=["csv"])

    st.write("Please enter the prompt template below. You can use the following variables: {text} (text to classify).")
    prompt_template = st.text_area("Enter your task description", """Instruction: Identify the sentiment of a text. Please read the text and provide one of these responses: "positive" or "negative".\nText to classify in "positive" or "negative": {text}\nAnswer:""", height=200)

    classify_button = st.button("Run Classification/ Annotation")

    if classify_button:
        if prompt_template:
            prompt = PromptTemplate(
                template=prompt_template,
                input_variables=["text"]
            )

            if api == "HuggingFace":
                if api_key_hug:
                    os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key_hug
                    llm_chain = LLMChain(prompt=prompt, llm=HuggingFaceHub(repo_id=model, model_kwargs={"temperature": temperature, "max_length": 128}))
                elif not api_key_hug:
                    st.warning("Please enter your HuggingFace API key to classify the text.")
            elif api == "OpenAI":
                if api_key_openai:
                    os.environ["OPENAI_API_KEY"] = api_key_openai
                    llm_chain = LLMChain(prompt=prompt, llm=OpenAI(temperature=temperature))
                elif not api_key_openai:
                    st.warning("Please enter your OpenAI API key to classify the text.")

            if setup == "Zero-Shot":
                if zero_file is not None:
                    df_predicted = classify_csv_zero(zero_file, llm_chain, api)
                    st.write(df_predicted)
                    st.download_button(
                        label="Download CSV",
                        data=df_predicted.to_csv(index=False),
                        file_name="classified_zero-shot_data.csv",
                        mime="text/csv"
                    )
            elif setup == "Test":
                if gold_file is not None:
                    df = pd.read_csv(gold_file, sep=';')
                    if "label" not in df.columns:
                        st.warning("Please make sure that the gold labels CSV file contains a column named 'label'.")
                    else:
                        df = classify_csv(df, llm_chain, api)
                        st.write(df)
                        st.download_button(
                            label="Download CSV",
                            data=df.to_csv(index=False),
                            file_name="classified_test_data.csv",
                            mime="text/csv"
                        )
                        percentage_overlap = evaluate_performance(df)
                        st.write("**Performance Evaluation**")
                        st.write(f"Percentage overlap between gold labels and predicted labels: {percentage_overlap:.2f}%")
                elif gold_file is None:
                    st.warning("Please upload a gold labels CSV file to evaluate the performance of the model.")
        elif not prompt:
            st.warning("Please enter a prompt question to classify the text.")

def main():
    st.set_page_config(page_title="PromptCards Playground", page_icon=":pencil2:")
    st.title("AInnotator")

    # add a menu to the sidebar
    if "current_page" not in st.session_state:
        st.session_state.current_page = "homepage"

    # Initialize selected_prompt in session_state if not set
    if "selected_prompt" not in st.session_state:
        st.session_state.selected_prompt = ""

    # Add a menu
    menu = ["Homepage", "Playground", "Prompt Archive", "Annotator", "About"]
    st.sidebar.title("About")
    st.sidebar.write("AInnotator 🤖🏷️ is a tool for creating artificial labels/ annotations. It is based on the concept of PromptCards, which are small, self-contained descriptions of a task that can be used to generate labels for a wide range of NLP tasks. Check out the GitHub repository and the PromptCards Archive for more information.")
    st.sidebar.write("---")
    st.sidebar.write("Check out the [PromptCards archive](https://huggingface.co/spaces/chkla/AnnotationPromptCards) to find a wide range of prompts for different NLP tasks.")
    st.sidebar.write("---")
    st.sidebar.write("Made with ❤️ and 🤖.")

    display_home()

if __name__ == "__main__":
    main()