from transformers import pipeline import streamlit as st import numpy as np from transformers import AutoTokenizer, TFAutoModelForSequenceClassification def bias_evaluation_presentation(): st.title("⚖️ App : Test for Bias & Toxicity") st.subheader("You can check yourself if the model is biased or toxic !") st.markdown(""" ### Evaluate a generated content As you're using our app to enhance your resume with the help of language models, it's important to keep in mind that while these tools can be incredibly powerful, they aren't perfect. Two key things we evaluate in the content generated are toxicity and bias. - Toxicity: This refers to any language that could be harmful or inappropriate, like offensive words or unprofessional phrasing. Our goal is to ensure that your resume remains positive and professional, free from any content that could be seen as disrespectful or inappropriate in a work setting. - Bias: Language models can sometimes unintentionally show preferences or stereotypes. This could result in outputs that lean towards certain genders, cultures, or other groups unfairly. We check for bias to make sure your resume reflects a balanced, fair tone and is inclusive for any workplace. By monitoring for these issues, we help you create a resume that's not only polished but also respectful and professional, ready to make the best impression! """) def toxicity_classif(text): model_path = "citizenlab/distilbert-base-multilingual-cased-toxicity" toxicity_classifier = pipeline("text-classification", model=model_path, tokenizer=model_path) result = toxicity_classifier(text) st.markdown(f""" ### Result **Toxicity evaluation** : this content is evaluated as **{result[0]["label"]}** with a score of {np.round(result[0]["score"], 2)*100}% """) def bias_classif(text): tokenizer = AutoTokenizer.from_pretrained("d4data/bias-detection-model") model = TFAutoModelForSequenceClassification.from_pretrained("d4data/bias-detection-model") classifier = pipeline('text-classification', model=model, tokenizer=tokenizer) # cuda = 0,1 based on gpu availability result = classifier(text) st.markdown(f""" **Bias evaluation** : this content is evaluated as **{result[0]["label"]}** with a score of {np.round(result[0]["score"], 2)*100}% """) bias_evaluation_presentation() txt = st.text_area( "Input the generated content here" ) toxicity_classif(txt) bias_classif(txt)