Spaces:
Runtime error
Runtime error
File size: 9,396 Bytes
e2a5271 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
import functools
import streamlit as st
from streamlit_option_menu import option_menu
import streamlit.components.v1 as html
import pandas as pd
import numpy as np
from pathlib import Path
import altair as alt
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import RobertaConfig
from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig
import torch
# from torch import cuda
import gradio as gr
import os
import re
import torch, gc
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device
tokenizer = AutoTokenizer.from_pretrained("devloverumar/chatgpt-content-detector")
model = AutoModelForSequenceClassification.from_pretrained("devloverumar/chatgpt-content-detector", num_labels=2)
# from PIL import Image
# gc.collect()
# torch.cuda.empty_cache()
def text_to_sentences(text):
clean_text = text.replace('\n', ' ')
return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
# function to concatenate sentences into chunks of size 900 or less
def chunks_of_900(text, chunk_size = 900):
sentences = text_to_sentences(text)
chunks = []
current_chunk = ""
for sentence in sentences:
if len(current_chunk + sentence) <= chunk_size:
if len(current_chunk)!=0:
current_chunk += " "+sentence
else:
current_chunk += sentence
else:
chunks.append(current_chunk)
current_chunk = sentence
chunks.append(current_chunk)
return chunks
def predict(query):
tokens = tokenizer.encode(query)
all_tokens = len(tokens)
tokens = tokens[:tokenizer.model_max_length - 2]
used_tokens = len(tokens)
tokens = torch.tensor([tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]).unsqueeze(0)
mask = torch.ones_like(tokens)
with torch.no_grad():
logits = model(tokens.to(device), attention_mask=mask.to(device))[0]
probs = logits.softmax(dim=-1)
real, fake = probs.detach().cpu().flatten().numpy().tolist() # Hello-SimpleAI/chatgpt-detector-roberta
# fake, real = probs.detach().cpu().flatten().numpy().tolist() # PirateXX/AI-Content-Detector-V2
return real
def findRealProb(text):
chunksOfText = (chunks_of_900(text))
results = []
for chunk in chunksOfText:
output = predict(chunk)
results.append([output, len(chunk)])
ans = 0
cnt = 0
for prob, length in results:
cnt += length
ans = ans + prob*length
realProb = ans/cnt
return {"Real": realProb, "Fake": 1-realProb}, results
TXT_TO_INSPECT=None
def inspect_content(text):
TXT_TO_INSPECT=text
st.markdown(""" <style> .appview-container .main .block-container {
max-width: 100%;
padding-top: 1rem;
padding-right: {1}rem;
padding-left: {1}rem;
padding-bottom: {1}rem;
}</style> """, unsafe_allow_html=True)
#Add a logo (optional) in the sidebar
# logo = Image.open(r'C:\Users\13525\Desktop\Insights_Bees_logo.png')
with st.sidebar:
choose = option_menu("Forensic Examiner", ["Inspect Content","Generate Content","About", "Contact"],
icons=['camera fill', 'kanban', 'book','person lines fill'],
menu_icon="app-indicator", default_index=0,
styles={
"container": {"padding": "0 5 5 5 !important", "background-color": "#fafafa"},
"icon": {"color": "orange", "font-size": "25px"},
"nav-link": {"font-size": "16px", "text-align": "left", "margin":"0px", "--hover-color": "#eee"},
"nav-link-selected": {"background-color": "#02ab21"},
}
)
if choose == "Inspect Content":
#Add the cover image for the cover page. Used a little trick to center the image
st.markdown(""" <style> .font {
font-size:25px ; font-family: 'Cooper Black'; color: #FF9633;}
</style> """, unsafe_allow_html=True)
col1, col2 = st.columns( [0.8, 0.2])
with col1: # To display the header text using css style
st.markdown('<p class="font">Inspect Content</p>', unsafe_allow_html=True)
with col2: # To display brand logo
st.image('./media/inspection-1.jpg', width=100 )
txt = st.text_area('Add Text here',height=300, max_chars=2000, value= '''
Cristiano Ronaldo is a Portuguese professional soccer player who currently plays
as a forward for Manchester United and the Portugal national team. He is widely
considered one of the greatest soccer players of all time, having won numerous
awards and accolades throughout his career. Ronaldo began his professional career
with Sporting CP in Portugal before moving to Manchester United in 2003.
He spent six seasons with the club, winning three Premier League titles
and one UEFA Champions League title. In 2009, he transferred to Real Madrid
for a then-world record transfer fee of $131 million. He spent nine seasons with
the club, winning four UEFA Champions League titles, two La Liga titles,
and two Copa del Rey titles. In 2018, he transferred to Juventus, where he spent
three seasons before returning to Manchester United in 2021. He has also had
a successful international career with the Portugal national team, having won
the UEFA European Championship in 2016 and the UEFA Nations League in 2019.
''', on_change=inspect_content)
if TXT_TO_INSPECT is not None:
with st.spinner('Loading the model..'):
model.to(device)
st.success(f'Model Loaded!', icon="β
")
# st.success(f'Reported EER for the selected model {reported_eer}%')
with st.spinner("Getting prediction..."):
# print(audio.shape)
predictions=findRealProb(txt)
print('prediction_value',predictions)
if predictions[0]['Fake'] > 0.5:
# st.error(f"The Sample is spoof: \n Confidence {(prediction_value) }%", icon="π¨")
st.error(f"This text is AI generated", icon="π¨")
else:
st.success(f"This text is real", icon="β
")
# if choose == "Generate Content":
# st.markdown(""" <style> .font {
# font-size:25px ; font-family: 'Cooper Black'; color: #FF9633;}
# </style> """, unsafe_allow_html=True)
# st.markdown('<p class="font">Comparison of Models</p>', unsafe_allow_html=True)
# data_frame = get_data()
# tab1, tab2 = st.tabs(["EER", "min-TDCF"])
# with tab1:
# data_frame["EER ASVS 2019"] = data_frame["EER ASVS 2019"].astype('float64')
# data_frame["EER ASVS 2021"] = data_frame["EER ASVS 2021"].astype('float64')
# data_frame["Cross-dataset 19-21"] = data_frame["Cross-dataset 19-21"].astype('float64')
# data = data_frame[["Model Name","EER ASVS 2019","EER ASVS 2021","Cross-dataset 19-21"]].reset_index(drop=True).melt('Model Name')
# chart=alt.Chart(data).mark_line().encode(
# x='Model Name',
# y='value',
# color='variable'
# )
# st.altair_chart(chart, theme=None, use_container_width=True)
# with tab2:
# data_frame["min-TDCF ASVS 2019"] = data_frame["EER ASVS 2019"].astype('float64')
# data_frame["min-TDCF ASVS 2021"] = data_frame["EER ASVS 2021"].astype('float64')
# data_frame["min-TDCF Cross-dataset"] = data_frame["Cross-dataset 19-21"].astype('float64')
# data = data_frame[["Model Name","min-TDCF ASVS 2019","min-TDCF ASVS 2021","min-TDCF Cross-dataset"]].reset_index(drop=True).melt('Model Name')
# chart=alt.Chart(data).mark_line().encode(
# x='Model Name',
# y='value',
# color='variable'
# )
# st.altair_chart(chart, theme=None, use_container_width=True)
# # Data table
# st.markdown(""" <style> .appview-container .main .block-container {
# max-width: 100%;
# padding-top: {1}rem;
# padding-right: {1}rem;
# padding-left: {1}rem;
# padding-bottom: {1}rem;
# }</style> """, unsafe_allow_html=True)
# st.dataframe(data_frame, use_container_width=True)
if choose == "About":
st.markdown(""" <style> .font {
font-size:35px ; font-family: 'Cooper Black'; color: #FF9633;}
</style> """, unsafe_allow_html=True)
st.markdown('<p class="font">About</p>', unsafe_allow_html=True)
if choose == "Contact":
st.markdown(""" <style> .font {
font-size:35px ; font-family: 'Cooper Black'; color: #FF9633;}
</style> """, unsafe_allow_html=True)
st.markdown('<p class="font">Contact Us</p>', unsafe_allow_html=True)
with st.form(key='columns_in_form2',clear_on_submit=True): #set clear_on_submit=True so that the form will be reset/cleared once it's submitted
#st.write('Please help us improve!')
Name=st.text_input(label='Please Enter Your Name') #Collect user feedback
Email=st.text_input(label='Please Enter Your Email') #Collect user feedback
Message=st.text_input(label='Please Enter Your Message') #Collect user feedback
submitted = st.form_submit_button('Submit')
if submitted:
st.write('Thanks for your contacting us. We will respond to your questions or inquiries as soon as possible!')
|