Ketengan-Diffusion-Lab commited on
Commit
5ee7893
1 Parent(s): cddd847

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -55
app.py CHANGED
@@ -1,63 +1,49 @@
1
  import gradio as gr
2
  import torch
3
- import transformers
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
  from PIL import Image
6
- import warnings
7
-
8
- # disable some warnings
9
- transformers.logging.set_verbosity_error()
10
- transformers.logging.disable_progress_bar()
11
- warnings.filterwarnings('ignore')
12
-
13
- # set device
14
- torch.set_default_device('cuda') # or 'cpu'
15
-
16
- model_name = 'cognitivecomputations/dolphin-vision-7b'
17
-
18
- # create model
19
- model = AutoModelForCausalLM.from_pretrained(
20
- model_name,
21
- torch_dtype=torch.float16,
22
- device_map='auto',
23
- trust_remote_code=True)
24
- tokenizer = AutoTokenizer.from_pretrained(
25
- model_name,
26
- trust_remote_code=True)
27
-
28
- def inference(prompt, image):
29
- messages = [
30
- {"role": "user", "content": f'<image>\n{prompt}'}
31
- ]
32
- text = tokenizer.apply_chat_template(
33
- messages,
34
- tokenize=False,
35
- add_generation_prompt=True
36
- )
37
-
38
- text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
39
- input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
40
-
41
- image_tensor = model.process_images([image], model.config).to(dtype=model.dtype)
42
-
43
- # generate
44
- output_ids = model.generate(
45
- input_ids,
46
- images=image_tensor,
47
- max_new_tokens=2048,
48
- use_cache=True)[0]
49
-
50
- return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  with gr.Blocks() as demo:
 
 
53
  with gr.Row():
54
- with gr.Column():
55
- prompt_input = gr.Textbox(label="Prompt", placeholder="Describe this image in detail")
56
- image_input = gr.Image(label="Image", type="pil")
57
- submit_button = gr.Button("Submit")
58
- with gr.Column():
59
- output_text = gr.Textbox(label="Output")
60
-
61
- submit_button.click(fn=inference, inputs=[prompt_input, image_input], outputs=output_text)
 
 
 
 
62
 
 
63
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModel, AutoTokenizer
 
4
  from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ # Disable gradient computation
7
+ torch.set_grad_enabled(False)
8
+
9
+ # Initialize model and tokenizer
10
+ model = AutoModel.from_pretrained('internlm/internlm-xcomposer2d5-7b',
11
+ torch_dtype=torch.bfloat16,
12
+ trust_remote_code=True).cuda().eval()
13
+ tokenizer = AutoTokenizer.from_pretrained('internlm/internlm-xcomposer2d5-7b',
14
+ trust_remote_code=True)
15
+ model.tokenizer = tokenizer
16
+
17
+ # Define the function to process input and generate a response
18
+ def analyze_image(query, image):
19
+ image = Image.open(image)
20
+ # Convert image to required format
21
+ image_path = './input_image.png'
22
+ image.save(image_path)
23
+ image_list = [image_path]
24
+
25
+ with torch.autocast(device_type='cuda', dtype=torch.float16):
26
+ response, _ = model.chat(tokenizer, query, image_list, do_sample=False, num_beams=3, use_meta=True)
27
+
28
+ return response
29
+
30
+ # Create Gradio interface
31
  with gr.Blocks() as demo:
32
+ gr.Markdown("## Image Analysis Tool using Hugging Face's `internlm-xcomposer2d5-7b`")
33
+
34
  with gr.Row():
35
+ query_input = gr.Textbox(label="Enter your query", placeholder="Analyze the given image in a detailed manner")
36
+
37
+ with gr.Row():
38
+ image_input = gr.Image(label="Upload an Image", type="file")
39
+
40
+ with gr.Row():
41
+ result_output = gr.Textbox(label="Result", placeholder="Model response will appear here", interactive=False)
42
+
43
+ with gr.Row():
44
+ submit_button = gr.Button("Submit")
45
+
46
+ submit_button.click(fn=analyze_image, inputs=[query_input, image_input], outputs=result_output)
47
 
48
+ # Launch the Gradio interface
49
  demo.launch()