File size: 9,396 Bytes
e2a5271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221

import functools

import streamlit as st
from streamlit_option_menu import option_menu
import streamlit.components.v1 as html
import pandas as pd
import numpy as np
from pathlib import Path
import altair as alt
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import RobertaConfig
from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig
import torch
# from torch import cuda
import gradio as gr
import os
import re
import torch, gc



device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device

tokenizer = AutoTokenizer.from_pretrained("devloverumar/chatgpt-content-detector")
model = AutoModelForSequenceClassification.from_pretrained("devloverumar/chatgpt-content-detector", num_labels=2)
# from  PIL import Image
# gc.collect()
# torch.cuda.empty_cache()



def text_to_sentences(text):
    clean_text = text.replace('\n', ' ')
    return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)

# function to concatenate sentences into chunks of size 900 or less
def chunks_of_900(text, chunk_size = 900):
    sentences = text_to_sentences(text)
    chunks = []
    current_chunk = ""
    for sentence in sentences:
        if len(current_chunk + sentence) <= chunk_size:
            if len(current_chunk)!=0:
                current_chunk += " "+sentence
            else:
                current_chunk += sentence
        else:
            chunks.append(current_chunk)
            current_chunk = sentence
    chunks.append(current_chunk)
    return chunks
    
def predict(query):
    tokens = tokenizer.encode(query)
    all_tokens = len(tokens)
    tokens = tokens[:tokenizer.model_max_length - 2]
    used_tokens = len(tokens)
    tokens = torch.tensor([tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]).unsqueeze(0)
    mask = torch.ones_like(tokens)

    with torch.no_grad():
        logits = model(tokens.to(device), attention_mask=mask.to(device))[0]
        probs = logits.softmax(dim=-1)

    real, fake = probs.detach().cpu().flatten().numpy().tolist() # Hello-SimpleAI/chatgpt-detector-roberta       
#     fake, real = probs.detach().cpu().flatten().numpy().tolist() # PirateXX/AI-Content-Detector-V2

    return real

def findRealProb(text):
    chunksOfText = (chunks_of_900(text))
    results = []
    for chunk in chunksOfText:
        output = predict(chunk)
        results.append([output, len(chunk)])
    
    ans = 0
    cnt = 0
    for prob, length in results:
        cnt += length
        ans = ans + prob*length
    realProb = ans/cnt
    return {"Real": realProb, "Fake": 1-realProb}, results

TXT_TO_INSPECT=None
def inspect_content(text):
    TXT_TO_INSPECT=text

st.markdown(""" <style> .appview-container .main .block-container {
    max-width: 100%;
    padding-top: 1rem;
    padding-right: {1}rem;
    padding-left: {1}rem;
    padding-bottom: {1}rem;
}</style> """, unsafe_allow_html=True)
#Add a logo (optional) in the sidebar
# logo = Image.open(r'C:\Users\13525\Desktop\Insights_Bees_logo.png')
with st.sidebar:
    choose = option_menu("Forensic Examiner", ["Inspect Content","Generate Content","About", "Contact"],
                        icons=['camera fill', 'kanban', 'book','person lines fill'],
                        menu_icon="app-indicator", default_index=0,
                        styles={
        "container": {"padding": "0 5 5 5 !important", "background-color": "#fafafa"},
        "icon": {"color": "orange", "font-size": "25px"}, 
        "nav-link": {"font-size": "16px", "text-align": "left", "margin":"0px", "--hover-color": "#eee"},
        "nav-link-selected": {"background-color": "#02ab21"},
    }
    )


if choose == "Inspect Content":
    #Add the cover image for the cover page. Used a little trick to center the image
    st.markdown(""" <style> .font {
        font-size:25px ; font-family: 'Cooper Black'; color: #FF9633;} 
        </style> """, unsafe_allow_html=True)
    col1, col2 = st.columns( [0.8, 0.2])
    with col1:               # To display the header text using css style
        st.markdown('<p class="font">Inspect Content</p>', unsafe_allow_html=True)
        
    with col2:               # To display brand logo                
        st.image('./media/inspection-1.jpg', width=100 )

    txt = st.text_area('Add Text here',height=300, max_chars=2000, value= '''
    Cristiano Ronaldo is a Portuguese professional soccer player who currently plays 
    as a forward for Manchester United and the Portugal national team. He is widely 
    considered one of the greatest soccer players of all time, having won numerous 
    awards and accolades throughout his career. Ronaldo began his professional career 
    with Sporting CP in Portugal before moving to Manchester United in 2003. 
    He spent six seasons with the club, winning three Premier League titles 
    and one UEFA Champions League title. In 2009, he transferred to Real Madrid 
    for a then-world record transfer fee of $131 million. He spent nine seasons with 
    the club, winning four UEFA Champions League titles, two La Liga titles, 
    and two Copa del Rey titles. In 2018, he transferred to Juventus, where he spent 
    three seasons before returning to Manchester United in 2021. He has also had 
    a successful international career with the Portugal national team, having won 
    the UEFA European Championship in 2016 and the UEFA Nations League in 2019.
    ''', on_change=inspect_content)

    if TXT_TO_INSPECT is not None:
        with st.spinner('Loading the model..'):
            model.to(device)

            st.success(f'Model Loaded!', icon="βœ…")
            # st.success(f'Reported EER for the selected model {reported_eer}%')
        with st.spinner("Getting prediction..."):
            # print(audio.shape)
            predictions=findRealProb(txt)
            print('prediction_value',predictions)
            if predictions[0]['Fake'] > 0.5:
                # st.error(f"The Sample is spoof: \n Confidence {(prediction_value) }%",  icon="🚨")
                st.error(f"This text is AI generated",  icon="🚨")
            else:
                st.success(f"This text is real", icon="βœ…")
        

# if choose == "Generate Content":
#     st.markdown(""" <style> .font {
#         font-size:25px ; font-family: 'Cooper Black'; color: #FF9633;} 
#         </style> """, unsafe_allow_html=True)
#     st.markdown('<p class="font">Comparison of Models</p>', unsafe_allow_html=True)
#     data_frame = get_data()
#     tab1, tab2 = st.tabs(["EER", "min-TDCF"])
#     with tab1:
#         data_frame["EER ASVS 2019"] = data_frame["EER ASVS 2019"].astype('float64') 
#         data_frame["EER ASVS 2021"] = data_frame["EER ASVS 2021"].astype('float64') 
#         data_frame["Cross-dataset 19-21"] = data_frame["Cross-dataset 19-21"].astype('float64') 

#         data = data_frame[["Model Name","EER ASVS 2019","EER ASVS 2021","Cross-dataset 19-21"]].reset_index(drop=True).melt('Model Name')
#         chart=alt.Chart(data).mark_line().encode(
#             x='Model Name',
#             y='value',
#             color='variable'
#         )
#         st.altair_chart(chart, theme=None, use_container_width=True)
#     with tab2:
#         data_frame["min-TDCF ASVS 2019"] = data_frame["EER ASVS 2019"].astype('float64') 
#         data_frame["min-TDCF ASVS 2021"] = data_frame["EER ASVS 2021"].astype('float64') 
#         data_frame["min-TDCF Cross-dataset"] = data_frame["Cross-dataset 19-21"].astype('float64')

#         data = data_frame[["Model Name","min-TDCF ASVS 2019","min-TDCF ASVS 2021","min-TDCF Cross-dataset"]].reset_index(drop=True).melt('Model Name')
#         chart=alt.Chart(data).mark_line().encode(
#             x='Model Name',
#             y='value',
#             color='variable'
#         )
#         st.altair_chart(chart, theme=None, use_container_width=True)
#     # Data table
#     st.markdown(""" <style> .appview-container .main .block-container {
#         max-width: 100%;
#         padding-top: {1}rem;
#         padding-right: {1}rem;
#         padding-left: {1}rem;
#         padding-bottom: {1}rem;
#         }</style> """, unsafe_allow_html=True)
#     st.dataframe(data_frame, use_container_width=True)



if choose == "About":
    st.markdown(""" <style> .font {
        font-size:35px ; font-family: 'Cooper Black'; color: #FF9633;} 
        </style> """, unsafe_allow_html=True)
    st.markdown('<p class="font">About</p>', unsafe_allow_html=True)
if choose == "Contact":
    st.markdown(""" <style> .font {
        font-size:35px ; font-family: 'Cooper Black'; color: #FF9633;} 
        </style> """, unsafe_allow_html=True)
    st.markdown('<p class="font">Contact Us</p>', unsafe_allow_html=True)
    with st.form(key='columns_in_form2',clear_on_submit=True): #set clear_on_submit=True so that the form will be reset/cleared once it's submitted
        #st.write('Please help us improve!')
        Name=st.text_input(label='Please Enter Your Name') #Collect user feedback
        Email=st.text_input(label='Please Enter Your Email') #Collect user feedback
        Message=st.text_input(label='Please Enter Your Message') #Collect user feedback
        submitted = st.form_submit_button('Submit')
        if submitted:
            st.write('Thanks for your contacting us. We will respond to your questions or inquiries as soon as possible!')