File size: 4,302 Bytes
f38b41e b196c77 f38b41e e844435 f38b41e 6d56bc4 f38b41e 18cd94a 9be3938 f38b41e 18cd94a f38b41e 9be3938 f38b41e 9be3938 f38b41e 78464e7 9be3938 78464e7 f38b41e 0aa8ed6 3582b44 f38b41e 3582b44 0aa8ed6 f38b41e 3582b44 f38b41e 0aa8ed6 f38b41e 0aa8ed6 f38b41e 02397eb 78464e7 0d826b8 6770b3a 78464e7 db30631 78464e7 f38b41e 0aa8ed6 f38b41e 0aa8ed6 f38b41e c505acd 959c369 e844435 a571a7b f38b41e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
#!/usr/bin/env python3
from doctest import OutputChecker
import sys
import torch
import re
import os
import gradio as gr
import requests
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from torch.nn.functional import softmax
import numpy as np
# just for the sake of this demo, we use cloze prob to initialize the hypothesis
#url = "https://github.com/simonepri/lm-scorer/tree/master/lm_scorer/models"
#resp = requests.get(url)
from sentence_transformers import SentenceTransformer, util
model_sts = SentenceTransformer('stsb-distilbert-base')
#model_sts = SentenceTransformer('roberta-large-nli-stsb-mean-tokens')
#batch_size = 1
#scorer = LMScorer.from_pretrained('gpt2' , device=device, batch_size=batch_size)
#import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import numpy as np
import re
def get_sim(x):
x = str(x)[1:-1]
x = str(x)[1:-1]
return x
# Load pre-trained model
#model = GPT2LMHeadModel.from_pretrained('distilgpt2', output_hidden_states = True, output_attentions = True)
#model = GPT2LMHeadModel.from_pretrained('gpt2', output_hidden_states = True, output_attentions = True)
#model = gr.Interface.load('huggingface/distilgpt2', output_hidden_states = True, output_attentions = True)
#model.eval()
#tokenizer = gr.Interface.load('huggingface/distilgpt2')
#tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
#tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
#tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
def sentence_prob_mean(text):
# Tokenize the input text and add special tokens
input_ids = tokenizer.encode(text, return_tensors='pt')
# Obtain model outputs
with torch.no_grad():
outputs = model(input_ids, labels=input_ids)
logits = outputs.logits # logits are the model outputs before applying softmax
# Shift logits and labels so that tokens are aligned:
shift_logits = logits[..., :-1, :].contiguous()
shift_labels = input_ids[..., 1:].contiguous()
# Calculate the softmax probabilities
probs = softmax(shift_logits, dim=-1)
# Gather the probabilities of the actual token IDs
gathered_probs = torch.gather(probs, 2, shift_labels.unsqueeze(-1)).squeeze(-1)
# Compute the mean probability across the tokens
mean_prob = torch.mean(gathered_probs).item()
return mean_prob
def cos_sim(a, b):
return np.inner(a, b) / (np.linalg.norm(a) * (np.linalg.norm(b)))
def Visual_re_ranker(caption_man, caption_woman, visual_context_label, context_prob):
caption_man = caption_man
caption_woman = caption_woman
visual_context_label = visual_context_label
context_prob = context_prob
caption_emb_man = model_sts.encode(caption_man, convert_to_tensor=True)
caption_emb_woman = model_sts.encode(caption_woman, convert_to_tensor=True)
context_label_emb = model_sts.encode(visual_context_label, convert_to_tensor=True)
sim_m = cosine_scores = util.pytorch_cos_sim(caption_emb_man, context_label_emb)
sim_m = sim_m.cpu().numpy()
sim_m = get_sim(sim_m)
sim_w = cosine_scores = util.pytorch_cos_sim(caption_emb_woman, context_label_emb)
sim_w = sim_w.cpu().numpy()
sim_w = get_sim(sim_w)
LM_man = sentence_prob_mean(caption_man)
LM_woman = sentence_prob_mean(caption_woman)
#LM = scorer.sentence_score(caption, reduce="mean")
score_man = pow(float(LM_man),pow((1-float(sim_m))/(1+ float(sim_m)),1-float(context_prob)))
score_woman = pow(float(LM_woman),pow((1-float(sim_w))/(1+ float(sim_w)),1-float(context_prob)))
#return {"LM": float(LM)/1, "sim": float(sim)/1, "score": float(score)/1 }
return {"Man": float(score_man)/1, "Woman": float(score_woman)/1}
#return LM, sim, score
demo = gr.Interface(
fn=Visual_re_ranker,
description="Demo for Women Wearing Lipstick: Measuring the Bias Between Object and Its Related Gender (distilbert)",
inputs=[gr.Textbox(value="a man riding a motorcycle on a road") , gr.Textbox(value="a woman riding a motorcycle on a road"), gr.Textbox(value="motor scooter"), gr.Textbox(value="0.2183")],
outputs="label",
)
demo.launch()
|