介绍
2023_07_17更新:增加了pixiv数据集进行训练
使用danburoo2021数据集对clip(ViT-L/14)模型进行微调。
0-3 epoch学习率为4e-6,权重衰减为1e-3
4-8 epoch学习率为1e-6,权重衰减为1e-3
标签预处理过程:
for i in range(length):
# 加载并且缩放图片
if not is_image(data_from_db.path[i]):
continue
try:
img = self.preprocess(
Image.open(data_from_db.path[i].replace("./", "/mnt/lvm/danbooru2021/danbooru2021/")))
except Exception as e:
#print(e)
continue
# 处理标签
tags = json.loads(data_from_db.tags[i])
# 优先选择人物和作品标签
category_group = {}
for tag in tags:
category_group.setdefault(tag["category"], []).append(tag)
# category_group=groupby(tags, key=lambda x: (x["category"]))
character_list = category_group[4] if 4 in category_group else []
# 作品需要过滤以bad开头的
work_list = list(filter(
lambda e:
e["name"] != "original"
, category_group[3])) if 3 in category_group else []
# work_list= category_group[5] if 5 in category_group else []
general_list = category_group[0] if 0 in category_group else []
caption = ""
caption_2 = None
for character in character_list:
if len(work_list) != 0:
# 去除括号内作品内容
character["name"] = re.sub(u"\\(.*?\\)", "", character["name"])
caption += character["name"].replace("_", " ")
caption += ","
caption = caption[:-1]
caption += " "
if len(work_list) != 0:
caption += "from "
for work in work_list:
caption += work["name"].replace("_", " ")
caption += " "
# 普通标签
if len(general_list) != 0:
caption += "with "
if len(general_list) > 20:
general_list_1 = general_list[:int(len(general_list) / 2)]
general_list_2 = general_list[int(len(general_list) / 2):]
caption_2 = caption
for general in general_list_1:
if general["name"].find("girl") == -1 and general["name"].find("boy") == -1 and len(
re.findall(is_contain, general["name"])) != 0:
caption_2 += general["name"].replace("_", " ")
caption_2 += ","
caption_2 = caption_2[:-1]
for general in general_list_2:
if general["name"].find("girl") == -1 and general["name"].find("boy") == -1 and len(
re.findall(is_contain, general["name"])) != 0:
caption += general["name"].replace("_", " ")
caption += ","
caption = caption[:-1]
else:
for general in general_list:
# 如果标签数据目大于20 则拆分成两个caption
if general["name"].find("girl") == -1 and general["name"].find("boy") == -1 and len(
re.findall(is_contain, general["name"])) != 0:
caption += general["name"].replace("_", " ")
caption += ","
caption = caption[:-1]
# 标签汇总成语句
# tokenize语句
# 返回
# 过长截断 不行的话用huggingface的
text_1 = clip.tokenize(texts=caption, truncate=True)
text_2= None
if caption_2 is not None:
text_2 = clip.tokenize(texts=caption_2, truncate=True)
# 处理逻辑
# print(img)
yield img, text_1[0]
if text_2 is not None:
yield img, text_2[0]
使用
from PIL import Image
import requests
from transformers import CLIPProcessor, CLIPModel
model = CLIPModel.from_pretrained("OysterQAQ/DanbooruCLIP")
processor = CLIPProcessor.from_pretrained("OysterQAQ/DanbooruCLIP")
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
Feedback
Where to send questions or comments about the model
Please use this Google Form
- Downloads last month
- 341
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.