AhmedSSabir
commited on
Commit
•
9be3938
1
Parent(s):
02397eb
Update app.py
Browse files
app.py
CHANGED
@@ -48,20 +48,22 @@ def get_sim(x):
|
|
48 |
# Load pre-trained model
|
49 |
|
50 |
#model = GPT2LMHeadModel.from_pretrained('distilgpt2', output_hidden_states = True, output_attentions = True)
|
51 |
-
model = GPT2LMHeadModel.from_pretrained('gpt2', output_hidden_states = True, output_attentions = True)
|
52 |
#model = gr.Interface.load('huggingface/distilgpt2', output_hidden_states = True, output_attentions = True)
|
53 |
|
54 |
#model.eval()
|
55 |
#tokenizer = gr.Interface.load('huggingface/distilgpt2')
|
56 |
|
57 |
#tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
|
58 |
-
tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
|
59 |
#tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
|
|
|
|
|
|
|
60 |
|
|
|
|
|
61 |
|
62 |
|
63 |
-
tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
|
64 |
-
model = GPT2LMHeadModel.from_pretrained('distilgpt2')
|
65 |
|
66 |
def sentence_prob_mean(text):
|
67 |
# Tokenize the input text and add special tokens
|
@@ -85,37 +87,38 @@ def sentence_prob_mean(text):
|
|
85 |
# Compute the mean probability across the tokens
|
86 |
mean_prob = torch.mean(gathered_probs).item()
|
87 |
|
|
|
88 |
|
89 |
|
90 |
-
def cloze_prob(text):
|
91 |
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
|
110 |
-
|
111 |
|
112 |
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
|
117 |
|
118 |
-
|
119 |
|
120 |
|
121 |
|
|
|
48 |
# Load pre-trained model
|
49 |
|
50 |
#model = GPT2LMHeadModel.from_pretrained('distilgpt2', output_hidden_states = True, output_attentions = True)
|
51 |
+
#model = GPT2LMHeadModel.from_pretrained('gpt2', output_hidden_states = True, output_attentions = True)
|
52 |
#model = gr.Interface.load('huggingface/distilgpt2', output_hidden_states = True, output_attentions = True)
|
53 |
|
54 |
#model.eval()
|
55 |
#tokenizer = gr.Interface.load('huggingface/distilgpt2')
|
56 |
|
57 |
#tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
|
|
|
58 |
#tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
|
59 |
+
#tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
|
60 |
+
|
61 |
+
|
62 |
|
63 |
+
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
64 |
+
model = GPT2LMHeadModel.from_pretrained('gpt2')
|
65 |
|
66 |
|
|
|
|
|
67 |
|
68 |
def sentence_prob_mean(text):
|
69 |
# Tokenize the input text and add special tokens
|
|
|
87 |
# Compute the mean probability across the tokens
|
88 |
mean_prob = torch.mean(gathered_probs).item()
|
89 |
|
90 |
+
return mean_prob
|
91 |
|
92 |
|
93 |
+
# def cloze_prob(text):
|
94 |
|
95 |
+
# whole_text_encoding = tokenizer.encode(text)
|
96 |
+
# text_list = text.split()
|
97 |
+
# stem = ' '.join(text_list[:-1])
|
98 |
+
# stem_encoding = tokenizer.encode(stem)
|
99 |
+
# cw_encoding = whole_text_encoding[len(stem_encoding):]
|
100 |
+
# tokens_tensor = torch.tensor([whole_text_encoding])
|
101 |
|
102 |
+
# with torch.no_grad():
|
103 |
+
# outputs = model(tokens_tensor)
|
104 |
+
# predictions = outputs[0]
|
105 |
+
|
106 |
+
# logprobs = []
|
107 |
+
# start = -1-len(cw_encoding)
|
108 |
+
# for j in range(start,-1,1):
|
109 |
+
# raw_output = []
|
110 |
+
# for i in predictions[-1][j]:
|
111 |
+
# raw_output.append(i.item())
|
112 |
|
113 |
+
# logprobs.append(np.log(softmax(raw_output)))
|
114 |
|
115 |
|
116 |
+
# conditional_probs = []
|
117 |
+
# for cw,prob in zip(cw_encoding,logprobs):
|
118 |
+
# conditional_probs.append(prob[cw])
|
119 |
|
120 |
|
121 |
+
# return np.exp(np.sum(conditional_probs))
|
122 |
|
123 |
|
124 |
|