Spaces:
Runtime error
Runtime error
has12zen
commited on
Commit
·
1c13527
1
Parent(s):
8343c12
Test
Browse files- .gitignore +2 -0
- app.py +16 -0
- example.txt +1 -0
- gist_stopwords.txt +1 -0
- requirements.txt +4 -0
- utils.py +104 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
A5/
|
2 |
+
__pycache__/
|
app.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from utils import *
|
3 |
+
|
4 |
+
with gr.Blocks() as demo:
|
5 |
+
gr.Markdown("# Enter the 2 Docs.")
|
6 |
+
with gr.Tab("Encrypt"):
|
7 |
+
with gr.Row():
|
8 |
+
with gr.Column():
|
9 |
+
encrypt_msg = gr.Textbox(lines=2, label="Doc1")
|
10 |
+
encrypt_key = gr.Textbox(lines=2, label="Doc2")
|
11 |
+
encrypt_output = gr.Textbox()
|
12 |
+
encrypt_button = gr.Button("Encrypt")
|
13 |
+
|
14 |
+
encrypt_button.click(final_main, inputs=[encrypt_msg, encrypt_key ], outputs=[encrypt_output])
|
15 |
+
|
16 |
+
demo.launch(share=False);
|
example.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
The benefits of exercise for physical and mental health are well-established. Regular exercise can help reduce the risk of chronic diseases, such as diabetes, heart disease, and cancer, as well as improve mood and cognitive function. However, it's important to choose activities that you enjoy and that are safe for your current level of fitness. Some popular options include walking, running, cycling, swimming, and yoga. Finding a workout buddy or joining a group fitness class can also help keep you motivated and accountable.
|
gist_stopwords.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0o,0s,3a,3b,3d,6b,6o,a,a1,a2,a3,a4,ab,able,about,above,abst,ac,accordance,according,accordingly,across,act,actually,ad,added,adj,ae,af,affected,affecting,affects,after,afterwards,ag,again,against,ah,ain,ain't,aj,al,all,allow,allows,almost,alone,along,already,also,although,always,am,among,amongst,amoungst,amount,an,and,announce,another,any,anybody,anyhow,anymore,anyone,anything,anyway,anyways,anywhere,ao,ap,apart,apparently,appear,appreciate,appropriate,approximately,ar,are,aren,arent,aren't,arise,around,as,a's,aside,ask,asking,associated,at,au,auth,av,available,aw,away,awfully,ax,ay,az,b,b1,b2,b3,ba,back,bc,bd,be,became,because,become,becomes,becoming,been,before,beforehand,begin,beginning,beginnings,begins,behind,being,believe,below,beside,besides,best,better,between,beyond,bi,bill,biol,bj,bk,bl,bn,both,bottom,bp,br,brief,briefly,bs,bt,bu,but,bx,by,c,c1,c2,c3,ca,call,came,can,cannot,cant,can't,cause,causes,cc,cd,ce,certain,certainly,cf,cg,ch,changes,ci,cit,cj,cl,clearly,cm,c'mon,cn,co,com,come,comes,con,concerning,consequently,consider,considering,contain,containing,contains,corresponding,could,couldn,couldnt,couldn't,course,cp,cq,cr,cry,cs,c's,ct,cu,currently,cv,cx,cy,cz,d,d2,da,date,dc,dd,de,definitely,describe,described,despite,detail,df,di,did,didn,didn't,different,dj,dk,dl,do,does,doesn,doesn't,doing,don,done,don't,down,downwards,dp,dr,ds,dt,du,due,during,dx,dy,e,e2,e3,ea,each,ec,ed,edu,ee,ef,effect,eg,ei,eight,eighty,either,ej,el,eleven,else,elsewhere,em,empty,en,end,ending,enough,entirely,eo,ep,eq,er,es,especially,est,et,et-al,etc,eu,ev,even,ever,every,everybody,everyone,everything,everywhere,ex,exactly,example,except,ey,f,f2,fa,far,fc,few,ff,fi,fifteen,fifth,fify,fill,find,fire,first,five,fix,fj,fl,fn,fo,followed,following,follows,for,former,formerly,forth,forty,found,four,fr,from,front,fs,ft,fu,full,further,furthermore,fy,g,ga,gave,ge,get,gets,getting,gi,give,given,gives,giving,gj,gl,go,goes,going,gone,got,gotten,gr,greetings,gs,gy,h,h2,h3,had,hadn,hadn't,happens,hardly,has,hasn,hasnt,hasn't,have,haven,haven't,having,he,hed,he'd,he'll,hello,help,hence,her,here,hereafter,hereby,herein,heres,here's,hereupon,hers,herself,hes,he's,hh,hi,hid,him,himself,his,hither,hj,ho,home,hopefully,how,howbeit,however,how's,hr,hs,http,hu,hundred,hy,i,i2,i3,i4,i6,i7,i8,ia,ib,ibid,ic,id,i'd,ie,if,ig,ignored,ih,ii,ij,il,i'll,im,i'm,immediate,immediately,importance,important,in,inasmuch,inc,indeed,index,indicate,indicated,indicates,information,inner,insofar,instead,interest,into,invention,inward,io,ip,iq,ir,is,isn,isn't,it,itd,it'd,it'll,its,it's,itself,iv,i've,ix,iy,iz,j,jj,jr,js,jt,ju,just,k,ke,keep,keeps,kept,kg,kj,km,know,known,knows,ko,l,l2,la,largely,last,lately,later,latter,latterly,lb,lc,le,least,les,less,lest,let,lets,let's,lf,like,liked,likely,line,little,lj,ll,ll,ln,lo,look,looking,looks,los,lr,ls,lt,ltd,m,m2,ma,made,mainly,make,makes,many,may,maybe,me,mean,means,meantime,meanwhile,merely,mg,might,mightn,mightn't,mill,million,mine,miss,ml,mn,mo,more,moreover,most,mostly,move,mr,mrs,ms,mt,mu,much,mug,must,mustn,mustn't,my,myself,n,n2,na,name,namely,nay,nc,nd,ne,near,nearly,necessarily,necessary,need,needn,needn't,needs,neither,never,nevertheless,new,next,ng,ni,nine,ninety,nj,nl,nn,no,nobody,non,none,nonetheless,noone,nor,normally,nos,not,noted,nothing,novel,now,nowhere,nr,ns,nt,ny,o,oa,ob,obtain,obtained,obviously,oc,od,of,off,often,og,oh,oi,oj,ok,okay,ol,old,om,omitted,on,once,one,ones,only,onto,oo,op,oq,or,ord,os,ot,other,others,otherwise,ou,ought,our,ours,ourselves,out,outside,over,overall,ow,owing,own,ox,oz,p,p1,p2,p3,page,pagecount,pages,par,part,particular,particularly,pas,past,pc,pd,pe,per,perhaps,pf,ph,pi,pj,pk,pl,placed,please,plus,pm,pn,po,poorly,possible,possibly,potentially,pp,pq,pr,predominantly,present,presumably,previously,primarily,probably,promptly,proud,provides,ps,pt,pu,put,py,q,qj,qu,que,quickly,quite,qv,r,r2,ra,ran,rather,rc,rd,re,readily,really,reasonably,recent,recently,ref,refs,regarding,regardless,regards,related,relatively,research,research-articl,respectively,resulted,resulting,results,rf,rh,ri,right,rj,rl,rm,rn,ro,rq,rr,rs,rt,ru,run,rv,ry,s,s2,sa,said,same,saw,say,saying,says,sc,sd,se,sec,second,secondly,section,see,seeing,seem,seemed,seeming,seems,seen,self,selves,sensible,sent,serious,seriously,seven,several,sf,shall,shan,shan't,she,shed,she'd,she'll,shes,she's,should,shouldn,shouldn't,should've,show,showed,shown,showns,shows,si,side,significant,significantly,similar,similarly,since,sincere,six,sixty,sj,sl,slightly,sm,sn,so,some,somebody,somehow,someone,somethan,something,sometime,sometimes,somewhat,somewhere,soon,sorry,sp,specifically,specified,specify,specifying,sq,sr,ss,st,still,stop,strongly,sub,substantially,successfully,such,sufficiently,suggest,sup,sure,sy,system,sz,t,t1,t2,t3,take,taken,taking,tb,tc,td,te,tell,ten,tends,tf,th,than,thank,thanks,thanx,that,that'll,thats,that's,that've,the,their,theirs,them,themselves,then,thence,there,thereafter,thereby,thered,therefore,therein,there'll,thereof,therere,theres,there's,thereto,thereupon,there've,these,they,theyd,they'd,they'll,theyre,they're,they've,thickv,thin,think,third,this,thorough,thoroughly,those,thou,though,thoughh,thousand,three,throug,through,throughout,thru,thus,ti,til,tip,tj,tl,tm,tn,to,together,too,took,top,toward,towards,tp,tq,tr,tried,tries,truly,try,trying,ts,t's,tt,tv,twelve,twenty,twice,two,tx,u,u201d,ue,ui,uj,uk,um,un,under,unfortunately,unless,unlike,unlikely,until,unto,uo,up,upon,ups,ur,us,use,used,useful,usefully,usefulness,uses,using,usually,ut,v,va,value,various,vd,ve,ve,very,via,viz,vj,vo,vol,vols,volumtype,vq,vs,vt,vu,w,wa,want,wants,was,wasn,wasnt,wasn't,way,we,wed,we'd,welcome,well,we'll,well-b,went,were,we're,weren,werent,weren't,we've,what,whatever,what'll,whats,what's,when,whence,whenever,when's,where,whereafter,whereas,whereby,wherein,wheres,where's,whereupon,wherever,whether,which,while,whim,whither,who,whod,whoever,whole,who'll,whom,whomever,whos,who's,whose,why,why's,wi,widely,will,willing,wish,with,within,without,wo,won,wonder,wont,won't,words,world,would,wouldn,wouldnt,wouldn't,www,x,x1,x2,x3,xf,xi,xj,xk,xl,xn,xo,xs,xt,xv,xx,y,y2,yes,yet,yj,yl,you,youd,you'd,you'll,your,youre,you're,yours,yourself,yourselves,you've,yr,ys,yt,z,zero,zi,zz
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy
|
2 |
+
pandas
|
3 |
+
nltk
|
4 |
+
gradio
|
utils.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from nltk.stem import WordNetLemmatizer
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import math
|
5 |
+
import nltk
|
6 |
+
import re
|
7 |
+
|
8 |
+
nltk.download("wordnet")
|
9 |
+
# nltk.download("omw-1.4")
|
10 |
+
|
11 |
+
# Initialize wordnet lemmatizer
|
12 |
+
wnl = WordNetLemmatizer()
|
13 |
+
|
14 |
+
file3 = './example.txt'
|
15 |
+
|
16 |
+
files = [file3]
|
17 |
+
|
18 |
+
gist_file = open("gist_stopwords.txt", "r")
|
19 |
+
try:
|
20 |
+
content = gist_file.read()
|
21 |
+
stopwords = content.split(",")
|
22 |
+
finally:
|
23 |
+
gist_file.close()
|
24 |
+
|
25 |
+
def read_file(name):
|
26 |
+
with open(name,'r') as file:
|
27 |
+
contents = file.read();
|
28 |
+
return contents
|
29 |
+
|
30 |
+
def process_string(name):
|
31 |
+
text = ''.join(c.lower() for c in name)
|
32 |
+
# remove punctuation using regex that matches only words or digits or underscore of length 1 or more
|
33 |
+
tokens = re.findall(r'\w+', text)
|
34 |
+
# remove commonly used words like 'is', 'the', 'a', etc.
|
35 |
+
filtered_tokens = [token for token in tokens if token not in stopwords]
|
36 |
+
# convert words to their root form ie 'running' to 'run'
|
37 |
+
root_tokens = [wnl.lemmatize(token,pos='n') for token in filtered_tokens]
|
38 |
+
return root_tokens
|
39 |
+
|
40 |
+
def process_tokens(tokens,st_global_words):
|
41 |
+
# global st_global_words
|
42 |
+
freq_dict = {}
|
43 |
+
tf_dict = {}
|
44 |
+
for word in st_global_words:
|
45 |
+
freq_dict[word] = tokens.count(word)
|
46 |
+
tf_dict[word] = freq_dict[word]/len(tokens)
|
47 |
+
return freq_dict, tf_dict
|
48 |
+
|
49 |
+
def main(input1,input2):
|
50 |
+
processed_files = [ read_file(file) for file in files ]
|
51 |
+
processed_files.append(input1)
|
52 |
+
processed_files.append(input2)
|
53 |
+
processed_strings = [ process_string(file) for file in processed_files ]
|
54 |
+
st_global_words = set()
|
55 |
+
for tokens in processed_strings:
|
56 |
+
st_global_words.update(tokens)
|
57 |
+
processed_tokens = []
|
58 |
+
for tokens in processed_strings:
|
59 |
+
freq_dict, tf_dict = process_tokens(tokens,st_global_words)
|
60 |
+
processed_tokens.append((freq_dict, tf_dict))
|
61 |
+
idf_dict = {}
|
62 |
+
for word in st_global_words:
|
63 |
+
cnt = 0
|
64 |
+
for freq_dict, tf_dict in processed_tokens:
|
65 |
+
if freq_dict[word] > 0:
|
66 |
+
cnt += 1
|
67 |
+
idf_dict[word] = math.log(len(processed_tokens)/cnt)
|
68 |
+
|
69 |
+
df = pd.DataFrame({'word': list(st_global_words)})
|
70 |
+
df['idf_col']= [idf_dict[word] for word in st_global_words]
|
71 |
+
for i, (freq_dict, tf_dict) in enumerate(processed_tokens):
|
72 |
+
freq_col = [freq_dict[word] for word in st_global_words]
|
73 |
+
tf_col = [tf_dict[word] for word in st_global_words]
|
74 |
+
df['freq_{}'.format(i+1)] = freq_col
|
75 |
+
df['tf_{}'.format(i+1)] = tf_col
|
76 |
+
df[f'tfidf_{i+1}'] = df[f'tf_{i+1}'] * df['idf_col']
|
77 |
+
|
78 |
+
tf_idf_cols = [col for col in df.columns if 'tfidf' in col]
|
79 |
+
tf_idf_vals = []
|
80 |
+
for i in range(len(tf_idf_cols)):
|
81 |
+
tf_idf_vals.append(df[tf_idf_cols[i]].values)
|
82 |
+
tf_idf_vals = np.array(tf_idf_vals)
|
83 |
+
return tf_idf_vals
|
84 |
+
|
85 |
+
def cosine_diff(A,B):
|
86 |
+
dot_product = sum(A[i]*B[i] for i in range(len(A)))
|
87 |
+
norm_A = math.sqrt(sum([A[i]**2 for i in range(len(A))]))
|
88 |
+
norm_B = math.sqrt(sum([B[i]**2 for i in range(len(B))]))
|
89 |
+
similarity = dot_product / (norm_A * norm_B)
|
90 |
+
return similarity
|
91 |
+
|
92 |
+
def euclidean(A,B):
|
93 |
+
su = 0
|
94 |
+
for i in range(len(A)):
|
95 |
+
su += (A[i]-B[i])**2
|
96 |
+
|
97 |
+
return math.sqrt(su)
|
98 |
+
|
99 |
+
def final_main(input1,input2):
|
100 |
+
tf_idf_vals = main(input1,input2)
|
101 |
+
outputString = ""
|
102 |
+
outputString+= f"Cosine sim: {cosine_diff(tf_idf_vals[1],tf_idf_vals[2])}\n"
|
103 |
+
outputString+= f"Euclidean difference: {euclidean(tf_idf_vals[1],tf_idf_vals[2])}\n"
|
104 |
+
return outputString
|