larry1129 commited on
Commit
91633ba
1 Parent(s): 42bbfea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -106
app.py CHANGED
@@ -1,112 +1,81 @@
1
- import spaces # 必须在最顶部导入
2
  import gradio as gr
3
  import os
 
 
 
4
 
5
  # 获取 Hugging Face 访问令牌
6
  hf_token = os.getenv("HF_API_TOKEN")
7
 
8
- # 定义基础模型名称
9
- base_model_name = "unsloth/meta-llama-3.1-8b-bnb-4bit"
10
-
11
- # 定义 adapter 模型名称
12
- adapter_model_name = "larry1129/WooWoof_AI"
13
-
14
- # 定义全局变量用于缓存模型和分词器
15
- model = None
16
- tokenizer = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- # 定义提示生成函数
19
- def generate_prompt(instruction, input_text=""):
20
- if input_text:
21
- prompt = f"""### Instruction:
 
 
 
 
 
22
  {instruction}
23
  ### Input:
24
- {input_text}
25
  ### Response:
26
  """
27
- else:
28
- prompt = f"""### Instruction:
29
- {instruction}
30
- ### Response:
31
- """
32
- return prompt
33
-
34
- # 定义生成响应的函数,并使用 @spaces.GPU 装饰
35
- @spaces.GPU(duration=40) # 建议将 duration 增加到 120
36
- def generate_response(instruction, input_text):
37
- global model, tokenizer
38
-
39
- if model is None:
40
- print("开始加载模型...")
41
- # 检查 bitsandbytes 是否已安装
42
- import importlib.util
43
- if importlib.util.find_spec("bitsandbytes") is None:
44
- import subprocess
45
- subprocess.call(["pip", "install", "--upgrade", "bitsandbytes"])
46
-
47
- try:
48
- # 在函数内部导入需要 GPU 的库
49
- import torch
50
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
51
-
52
- from peft import PeftModel
53
-
54
- # 创建量化配置
55
- bnb_config = BitsAndBytesConfig(
56
- load_in_4bit=True,
57
- bnb_4bit_use_double_quant=True,
58
- bnb_4bit_quant_type="nf4",
59
- bnb_4bit_compute_dtype=torch.float16
60
- )
61
-
62
- # 加载分词器
63
- tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=hf_token)
64
- print("分词器加载成功。")
65
-
66
- # 加载基础模型
67
- base_model = AutoModelForCausalLM.from_pretrained(
68
- base_model_name,
69
- quantization_config=bnb_config,
70
- device_map="auto",
71
- use_auth_token=hf_token,
72
- trust_remote_code=True
73
- )
74
- print("基础模型加载成功。")
75
-
76
- # 加载适配器模型
77
- model = PeftModel.from_pretrained(
78
- base_model,
79
- adapter_model_name,
80
- torch_dtype=torch.float16,
81
- use_auth_token=hf_token
82
- )
83
- print("适配器模型加载成功。")
84
-
85
- # 设置 pad_token
86
- tokenizer.pad_token = tokenizer.eos_token
87
- model.config.pad_token_id = tokenizer.pad_token_id
88
-
89
- # 切换到评估模式
90
- model.eval()
91
- print("模型已切换到评估模式。")
92
- except Exception as e:
93
- print("加载模型时出错:", e)
94
- raise e
95
- else:
96
- # 在函数内部导入需要的库
97
- import torch
98
-
99
- # 检查 model 和 tokenizer 是否已正确加载
100
- if model is None or tokenizer is None:
101
- print("模型或分词器未正确加载。")
102
- raise ValueError("模型或分词器未正确加载。")
103
-
104
- # 生成提示
105
- prompt = generate_prompt(instruction, input_text)
106
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
107
-
108
  with torch.no_grad():
109
- outputs = model.generate(
110
  input_ids=inputs["input_ids"],
111
  attention_mask=inputs.get("attention_mask"),
112
  max_new_tokens=128,
@@ -114,23 +83,28 @@ def generate_response(instruction, input_text):
114
  top_p=0.95,
115
  do_sample=True,
116
  )
117
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
118
  response = response.split("### Response:")[-1].strip()
119
  return response
120
 
121
- # 创建 Gradio 接口
 
 
 
 
 
 
122
  iface = gr.Interface(
123
- fn=generate_response,
124
  inputs=[
125
- gr.Textbox(lines=2, placeholder="Instruction", label="Instruction"),
126
- gr.Textbox(lines=2, placeholder="Input", label="Input (Option)")
127
  ],
128
  outputs="text",
129
- title="WooWoof AI",
130
- description="Based on LLAMA 3.1 for pet related",
131
  allow_flagging="never"
132
  )
133
 
134
  # 启动 Gradio 接口
135
- iface.launch(share=True)
136
-
 
 
1
  import gradio as gr
2
  import os
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, LlavaNextProcessor, LlavaNextForConditionalGeneration
5
+ from PIL import Image
6
 
7
  # 获取 Hugging Face 访问令牌
8
  hf_token = os.getenv("HF_API_TOKEN")
9
 
10
+ # 定义模型名称
11
+ vqa_model_name = "llava-hf/llava-v1.6-mistral-7b-hf"
12
+ language_model_name = "larry1129/WooWoof_AI_Vision_merged_16bit_3b"
13
+
14
+ # 全局变量用于缓存模型和分词器
15
+ vqa_processor = None
16
+ vqa_model = None
17
+ language_tokenizer = None
18
+ language_model = None
19
+
20
+ # 初始化看图说话模型
21
+ def load_vqa_model():
22
+ global vqa_processor, vqa_model
23
+ if vqa_processor is None or vqa_model is None:
24
+ vqa_processor = LlavaNextProcessor.from_pretrained(vqa_model_name, use_auth_token=hf_token)
25
+ vqa_model = LlavaNextForConditionalGeneration.from_pretrained(
26
+ vqa_model_name,
27
+ torch_dtype=torch.float16,
28
+ low_cpu_mem_usage=True
29
+ ).to("cuda:0")
30
+ return vqa_processor, vqa_model
31
+
32
+ # 初始化纯语言模型
33
+ def load_language_model():
34
+ global language_tokenizer, language_model
35
+ if language_tokenizer is None or language_model is None:
36
+ language_tokenizer = AutoTokenizer.from_pretrained(language_model_name, use_auth_token=hf_token)
37
+ language_model = AutoModelForCausalLM.from_pretrained(
38
+ language_model_name,
39
+ device_map="auto",
40
+ torch_dtype=torch.float16
41
+ )
42
+ language_tokenizer.pad_token = language_tokenizer.eos_token
43
+ language_model.config.pad_token_id = language_tokenizer.pad_token_id
44
+ language_model.eval()
45
+ return language_tokenizer, language_model
46
+
47
+ # 从图片生成描述
48
+ def generate_image_description(image):
49
+ vqa_processor, vqa_model = load_vqa_model()
50
+ conversation = [
51
+ {
52
+ "role": "user",
53
+ "content": [
54
+ {"type": "text", "text": "What is shown in this image?"},
55
+ {"type": "image"},
56
+ ],
57
+ },
58
+ ]
59
+ prompt = vqa_processor.apply_chat_template(conversation, add_generation_prompt=True)
60
+ inputs = vqa_processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")
61
 
62
+ with torch.no_grad():
63
+ output = vqa_model.generate(**inputs, max_new_tokens=100)
64
+ image_description = vqa_processor.decode(output[0], skip_special_tokens=True)
65
+ return image_description
66
+
67
+ # 使用纯语言模型生成最终回答
68
+ def generate_language_response(instruction, image_description):
69
+ language_tokenizer, language_model = load_language_model()
70
+ prompt = f"""### Instruction:
71
  {instruction}
72
  ### Input:
73
+ {image_description}
74
  ### Response:
75
  """
76
+ inputs = language_tokenizer(prompt, return_tensors="pt").to(language_model.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  with torch.no_grad():
78
+ outputs = language_model.generate(
79
  input_ids=inputs["input_ids"],
80
  attention_mask=inputs.get("attention_mask"),
81
  max_new_tokens=128,
 
83
  top_p=0.95,
84
  do_sample=True,
85
  )
86
+ response = language_tokenizer.decode(outputs[0], skip_special_tokens=True)
87
  response = response.split("### Response:")[-1].strip()
88
  return response
89
 
90
+ # 整合的 Gradio 接口函数
91
+ def process_image_and_text(image, instruction):
92
+ image_description = generate_image_description(image)
93
+ final_response = generate_language_response(instruction, image_description)
94
+ return f"图片描述: {image_description}\n\n最终回答: {final_response}"
95
+
96
+ # 创建 Gradio 界面
97
  iface = gr.Interface(
98
+ fn=process_image_and_text,
99
  inputs=[
100
+ gr.Image(type="pil", label="上传图片"),
101
+ gr.Textbox(lines=2, placeholder="Instruction", label="Instruction")
102
  ],
103
  outputs="text",
104
+ title="WooWoof AI - 图片和文本交互",
105
+ description="输入图片并添加指令,生成基于图片描述的回答。",
106
  allow_flagging="never"
107
  )
108
 
109
  # 启动 Gradio 接口
110
+ iface.launch()