File size: 6,597 Bytes
d336c8b 0b155d7 d336c8b 0b155d7 d336c8b 63c44d1 17871e5 d336c8b a814c2f 63c44d1 8b93855 09ffd9d b9ab5e0 63c44d1 d336c8b 17871e5 0cdc59c 8b93855 a814c2f 17871e5 612a9ab 17871e5 63c44d1 17871e5 09ffd9d 3264246 612a9ab 3264246 612a9ab 17871e5 3264246 17871e5 3264246 17871e5 3264246 17871e5 6c9df61 17871e5 d336c8b 63c44d1 09ffd9d 63c44d1 612a9ab 63c44d1 7eb62d4 8589cca 7eb62d4 8589cca 7eb62d4 8589cca 7eb62d4 63c44d1 8589cca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
# import numpy as np
import requests
import torch
from PIL import Image
from torch.nn.functional import cosine_similarity
from transformers import AutoImageProcessor, AutoModel
from transformers import ViTImageProcessor, ViTModel
from transformers import pipeline
# import transformers
#
# print(transformers.__version__)
#
# img_urls = ["https://img0.baidu.com/it/u=3704428154,2884159591&fm=253&fmt=auto&app=138&f=JPEG?w=889&h=500",
# "https://img0.baidu.com/it/u=3704428154,2884159591&fm=253&fmt=auto&app=138&f=JPEG?w=889&h=500"]
#
# image_real = Image.open(requests.get(img_urls[0], stream=True).raw).convert("RGB")
# image_gen = Image.open(requests.get(img_urls[1], stream=True).raw).convert("RGB")
#
# # DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE = torch.device('cpu')
# pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", device=DEVICE, pool=True)
#
# # 1提取图片特征向量
# outputs = pipe([image_real, image_gen])
#
# # get the length of a single output
# print(len(outputs[0][0]))
# # show outputs
# print(outputs)
#
# # 768
# # [[[-0.03909236937761307, 0.43381670117378235, -0.06913255900144577,
#
# # 2计算图片相似度
# similarity_score = cosine_similarity(torch.Tensor(outputs[0]),
# torch.Tensor(outputs[1]), dim=1)
#
# print(similarity_score)
# tensor([0.6043])
# pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224", device=DEVICE)
# output = pipe(image_real)
#
# # 其中第一个维度是批量大小,最后两个维度是嵌入形状。
# print(np.array(outputs).shape)
# # (1, 197, 768)
# 第二种方式推理图片相似度
# processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
# model = AutoModel.from_pretrained("google/vit-base-patch16-224").to(DEVICE)
# processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
# model = AutoModel.from_pretrained("google/vit-base-patch16-224-in21k").to(DEVICE)
# processor = AutoImageProcessor.from_pretrained("chanhua/autotrain-izefx-v3qh0")
# model = AutoModel.from_pretrained("chanhua/autotrain-izefx-v3qh0").to(DEVICE)
# processor = ViTImageProcessor.from_pretrained('google/vit-large-patch16-224-in21k')
# model = ViTModel.from_pretrained('google/vit-large-patch16-224-in21k')
# processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')
# model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
# tensor([0.6061], device='cuda:0', grad_fn=<SumBackward1>)
# pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", device=DEVICE, pool=True)
# pipe = pipeline(task="image-feature-extraction", model_name="chanhua/autotrain-izefx-v3qh0", device=DEVICE, pool=True)
# pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224", device=DEVICE, pool=True, revision="29e7a1e183")
# 推理
def infer4(url1, url2):
try:
pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224-in21k", device=DEVICE, pool=True)
print("进入推理")
print("打开图片1")
# image_real = Image.open(requests.get(url1, stream=True).raw).convert("RGB")
image_real = Image.open(url1).convert('RGB')
print("打开图片2")
# image_gen = Image.open(requests.get(url2, stream=True).raw).convert("RGB")
image_gen = Image.open(url2).convert('RGB')
print("利用模型获取图片特征向量")
outputs = pipe([image_real, image_gen])
print(f"得到图片特征向量计算相似度: {outputs}")
similarity_score = cosine_similarity(torch.Tensor(outputs[0]), torch.Tensor(outputs[1]), dim=1)
print(f"得到图片相似度: {similarity_score}")
t_cpu = similarity_score.cpu()
# 然后提取这个值
return t_cpu.item()
except Exception as e:
print(f"发生了一个错误: {e}")
return 0.0
finally:
# 无论是否发生异常,都会执行此代码块
print("这是finally块")
# 推理
def infer2(url):
processor = AutoImageProcessor.from_pretrained('google/vit-large-patch16-224-in21k')
model = ViTModel.from_pretrained('google/vit-large-patch16-224-in21k')
# image_real = Image.open(requests.get(img_urls[0], stream=True).raw).convert("RGB")
# image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
image = Image.open(url).convert('RGB')
inputs = processor(images=image, return_tensors="pt").to(DEVICE)
outputs = model(**inputs)
# last_hidden_states = outputs.last_hidden_state
return outputs.pooler_output
# 计算相似度
def infer1(image1, image2):
try:
embed_real = infer2(image1)
embed_gen = infer2(image2)
similarity_score = cosine_similarity(embed_real, embed_gen, dim=1)
print(similarity_score)
# 如果你想在CPU上操作这个值,你需要先将tensor移动到CPU
t_cpu = similarity_score.cpu()
# 然后提取这个值
return t_cpu.item()
except Exception as e:
print(f"发生了一个错误: {e}")
return 0.0
finally:
# 无论是否发生异常,都会执行此代码块
print("这是finally块")
# 输出图片向量
def similarity_cpu(image1, image2):
try:
embed_real = xl_infer(image1)
embed_gen = xl_infer(image2)
similarity_score = cosine_similarity(embed_real, embed_gen, dim=1)
print(similarity_score)
# 如果你想在CPU上操作这个值,你需要先将tensor移动到CPU
t_cpu = similarity_score.cpu()
# 然后提取这个值
return t_cpu.item()
except Exception as e:
print(f"发生了一个错误: {e}")
return '异常'+ str(e)
finally:
# 无论是否发生异常,都会执行此代码块
print("这是finally块")
# 推理
def xl_infer(url):
image = url.convert('RGB')
# processor = AutoImageProcessor.from_pretrained('facebook/dinov2-giant')
# model = AutoModel.from_pretrained('facebook/dinov2-giant')
processor = AutoImageProcessor.from_pretrained('google/vit-large-patch16-224-in21k')
# model = ViTModel.from_pretrained('google/vit-large-patch16-224-in21k')
model = AutoModel.from_pretrained('google/vit-large-patch16-224-in21k')
inputs = processor(images=image, return_tensors="pt").to(DEVICE)
outputs = model(**inputs)
return outputs.pooler_output |