import functools import streamlit as st from streamlit_option_menu import option_menu import streamlit.components.v1 as html import pandas as pd import numpy as np from pathlib import Path import altair as alt from transformers import AutoTokenizer, AutoModelForSequenceClassification from transformers import RobertaConfig from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig import torch # from torch import cuda import gradio as gr import os import re import torch, gc device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # device tokenizer = AutoTokenizer.from_pretrained("devloverumar/chatgpt-content-detector") model = AutoModelForSequenceClassification.from_pretrained("devloverumar/chatgpt-content-detector", num_labels=2) # from PIL import Image # gc.collect() # torch.cuda.empty_cache() def text_to_sentences(text): clean_text = text.replace('\n', ' ') return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text) # function to concatenate sentences into chunks of size 900 or less def chunks_of_900(text, chunk_size = 900): sentences = text_to_sentences(text) chunks = [] current_chunk = "" for sentence in sentences: if len(current_chunk + sentence) <= chunk_size: if len(current_chunk)!=0: current_chunk += " "+sentence else: current_chunk += sentence else: chunks.append(current_chunk) current_chunk = sentence chunks.append(current_chunk) return chunks def predict(query): tokens = tokenizer.encode(query) all_tokens = len(tokens) tokens = tokens[:tokenizer.model_max_length - 2] used_tokens = len(tokens) tokens = torch.tensor([tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]).unsqueeze(0) mask = torch.ones_like(tokens) with torch.no_grad(): logits = model(tokens.to(device), attention_mask=mask.to(device))[0] probs = logits.softmax(dim=-1) real, fake = probs.detach().cpu().flatten().numpy().tolist() # Hello-SimpleAI/chatgpt-detector-roberta # fake, real = probs.detach().cpu().flatten().numpy().tolist() # PirateXX/AI-Content-Detector-V2 return real def findRealProb(text): chunksOfText = (chunks_of_900(text)) results = [] for chunk in chunksOfText: output = predict(chunk) results.append([output, len(chunk)]) ans = 0 cnt = 0 for prob, length in results: cnt += length ans = ans + prob*length realProb = ans/cnt return {"Real": realProb, "Fake": 1-realProb}, results TXT_TO_INSPECT=None def inspect_content(text): TXT_TO_INSPECT=text st.markdown(""" """, unsafe_allow_html=True) #Add a logo (optional) in the sidebar # logo = Image.open(r'C:\Users\13525\Desktop\Insights_Bees_logo.png') with st.sidebar: choose = option_menu("Forensic Examiner", ["Inspect Content","Generate Content","About", "Contact"], icons=['camera fill', 'kanban', 'book','person lines fill'], menu_icon="app-indicator", default_index=0, styles={ "container": {"padding": "0 5 5 5 !important", "background-color": "#fafafa"}, "icon": {"color": "orange", "font-size": "25px"}, "nav-link": {"font-size": "16px", "text-align": "left", "margin":"0px", "--hover-color": "#eee"}, "nav-link-selected": {"background-color": "#02ab21"}, } ) if choose == "Inspect Content": #Add the cover image for the cover page. Used a little trick to center the image st.markdown(""" """, unsafe_allow_html=True) col1, col2 = st.columns( [0.8, 0.2]) with col1: # To display the header text using css style st.markdown('

Inspect Content

', unsafe_allow_html=True) with col2: # To display brand logo st.image('./media/inspection-1.jpg', width=100 ) txt = st.text_area('Add Text here',height=300, max_chars=2000, value= ''' Cristiano Ronaldo is a Portuguese professional soccer player who currently plays as a forward for Manchester United and the Portugal national team. He is widely considered one of the greatest soccer players of all time, having won numerous awards and accolades throughout his career. Ronaldo began his professional career with Sporting CP in Portugal before moving to Manchester United in 2003. He spent six seasons with the club, winning three Premier League titles and one UEFA Champions League title. In 2009, he transferred to Real Madrid for a then-world record transfer fee of $131 million. He spent nine seasons with the club, winning four UEFA Champions League titles, two La Liga titles, and two Copa del Rey titles. In 2018, he transferred to Juventus, where he spent three seasons before returning to Manchester United in 2021. He has also had a successful international career with the Portugal national team, having won the UEFA European Championship in 2016 and the UEFA Nations League in 2019. ''', on_change=inspect_content) if TXT_TO_INSPECT is not None: with st.spinner('Loading the model..'): model.to(device) st.success(f'Model Loaded!', icon="✅") # st.success(f'Reported EER for the selected model {reported_eer}%') with st.spinner("Getting prediction..."): # print(audio.shape) predictions=findRealProb(txt) print('prediction_value',predictions) if predictions[0]['Fake'] > 0.5: # st.error(f"The Sample is spoof: \n Confidence {(prediction_value) }%", icon="🚨") st.error(f"This text is AI generated", icon="🚨") else: st.success(f"This text is real", icon="✅") # if choose == "Generate Content": # st.markdown(""" """, unsafe_allow_html=True) # st.markdown('

Comparison of Models

', unsafe_allow_html=True) # data_frame = get_data() # tab1, tab2 = st.tabs(["EER", "min-TDCF"]) # with tab1: # data_frame["EER ASVS 2019"] = data_frame["EER ASVS 2019"].astype('float64') # data_frame["EER ASVS 2021"] = data_frame["EER ASVS 2021"].astype('float64') # data_frame["Cross-dataset 19-21"] = data_frame["Cross-dataset 19-21"].astype('float64') # data = data_frame[["Model Name","EER ASVS 2019","EER ASVS 2021","Cross-dataset 19-21"]].reset_index(drop=True).melt('Model Name') # chart=alt.Chart(data).mark_line().encode( # x='Model Name', # y='value', # color='variable' # ) # st.altair_chart(chart, theme=None, use_container_width=True) # with tab2: # data_frame["min-TDCF ASVS 2019"] = data_frame["EER ASVS 2019"].astype('float64') # data_frame["min-TDCF ASVS 2021"] = data_frame["EER ASVS 2021"].astype('float64') # data_frame["min-TDCF Cross-dataset"] = data_frame["Cross-dataset 19-21"].astype('float64') # data = data_frame[["Model Name","min-TDCF ASVS 2019","min-TDCF ASVS 2021","min-TDCF Cross-dataset"]].reset_index(drop=True).melt('Model Name') # chart=alt.Chart(data).mark_line().encode( # x='Model Name', # y='value', # color='variable' # ) # st.altair_chart(chart, theme=None, use_container_width=True) # # Data table # st.markdown(""" """, unsafe_allow_html=True) # st.dataframe(data_frame, use_container_width=True) if choose == "About": st.markdown(""" """, unsafe_allow_html=True) st.markdown('

About

', unsafe_allow_html=True) if choose == "Contact": st.markdown(""" """, unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) with st.form(key='columns_in_form2',clear_on_submit=True): #set clear_on_submit=True so that the form will be reset/cleared once it's submitted #st.write('Please help us improve!') Name=st.text_input(label='Please Enter Your Name') #Collect user feedback Email=st.text_input(label='Please Enter Your Email') #Collect user feedback Message=st.text_input(label='Please Enter Your Message') #Collect user feedback submitted = st.form_submit_button('Submit') if submitted: st.write('Thanks for your contacting us. We will respond to your questions or inquiries as soon as possible!')