# Copyright (c) Alibaba, Inc. and its affiliates. import os import gradio as gr import modelscope_studio.components.antd as antd import modelscope_studio.components.base as ms from PIL import Image import secrets import tempfile from http import HTTPStatus from urllib3.exceptions import HTTPError from pathlib import Path #os.environ['DASHSCOPE_HTTP_BASE_URL'] = 'https://dashscope.aliyuncs.com/api/v1' import dashscope from dashscope import MultiModalConversation API_KEY = os.environ['API_KEY'] BASE_URL = os.environ['DASHSCOPE_HTTP_BASE_URL'] dashscope.api_key = API_KEY dashscope.base_http_api_url = 'https://ga-lmsys-dashscope.aliyuncs.com/api/v1' is_modelscope_studio = os.getenv('MODELSCOPE_ENVIRONMENT') == 'studio' def get_text(text: str, cn_text: str): if is_modelscope_studio: return cn_text return text def resolve_image(filename): return os.path.join(os.path.dirname(__file__), filename) DEMO_LIST = [ { "description": "Evaluate the integral of the functions graphed using the formula for circles: ", "image": resolve_image("./examples/1.webp") }, { "description": "回答图中问题", "image": resolve_image("./examples/2.png") }, { "description": "图片中的滤液E是什么化学物质?", "image": resolve_image("./examples/3.png") }, { "description": "I want to know the volume of this sofa", "image": resolve_image("./examples/4.png") }, ] def process_image(image, shouldConvert=False): # 获取上传文件的目录 uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str( Path(tempfile.gettempdir()) / "gradio") os.makedirs(uploaded_file_dir, exist_ok=True) # 创建临时文件路径 name = f"tmp{secrets.token_hex(20)}.jpg" filename = os.path.join(uploaded_file_dir, name) # 保存上传的图片 if shouldConvert: new_img = Image.new('RGB', size=(image.width, image.height), color=(255, 255, 255)) new_img.paste(image, (0, 0), mask=image) image = new_img image.save(filename) return filename def generate(image, query): imageFile = process_image(image) content = [ {'image': f'file://{imageFile}'}, {'text': query} ] messages = [ {'role': 'user', 'content': content}, ] print('messages:', messages) responses = MultiModalConversation.call( model='qvq-72b-preview', messages=messages, stream=True, ) for response in responses: if not response.status_code == HTTPStatus.OK: raise HTTPError(f'response.code: {response.code}\nresponse.message: {response.message}') response = response.output.choices[0].message.content if len(response) > 0 and response[0]['text']: print(response[0]['text']) yield response[0]['text'] if __name__ == "__main__": def on_clear(): return { input: gr.update(value=None), **{ item: gr.update(value=None) for item in input_image }, } with gr.Blocks() as demo: with ms.Application() as app: with antd.ConfigProvider( locale="zh_CN" if is_modelscope_studio else None, theme=dict(token=dict(colorPrimary="#a855f7"))): with antd.Card(elem_style=dict(marginBottom=12), styles=dict(body=dict(padding=4))): with antd.Flex(elem_style=dict(width="100%"), justify="center", align="center", gap=14): with ms.Div(elem_style=dict(flexShrink=0)): antd.Image( resolve_image("./cutelogo.jpg"), preview=False, height=60) with ms.Div(): antd.Typography.Title( "QVQ-72B-Preview", elem_style=dict(margin=0, fontSize=24), level=1) with ms.AutoLoading(): with antd.Row(gutter=[8, 8], align="stretch"): with antd.Col(xs=24, md=8): with antd.Space(direction="vertical", elem_style=dict(width="100%")): with antd.Space(direction="vertical", elem_style=dict(width="100%"), elem_id="input-container"): with ms.Fragment(): input_image = gr.Image( type="pil", label="Upload", sources=["upload"]), input = antd.Input.Textarea( placeholder=get_text("Ask a question", "输入一个问题"), auto_size=dict(maxRows=6, minRows=2), allow_clear=True) with antd.Flex(align="center", justify="space-between"): antd.Typography.Text( get_text("Warning: This model only supports single-turn dialogue.", "注:当前模型只支持单轮对话,如需中文回答,提示词加“用中文回答”"), type="warning") tour_btn = antd.Button(get_text("Tour", "使用指引"), variant="filled", color="default") with antd.Row(gutter=8): with antd.Col(span=12): clear_btn = antd.Button(get_text("Clear", "清除"), block=True) with antd.Col(span=12): submit_btn = antd.Button( get_text("Submit", "提交"), type="primary", block=True, elem_id="submit-btn") antd.Divider(get_text("Example", "示例")) with antd.Flex(gap="small", wrap=True): for item in DEMO_LIST: def bind_on_example(_item): def on_example(): return gr.update( value=_item[ 'description'] ), gr.update( value=_item['image']) return on_example with antd.Card( hoverable=True, elem_style=dict( width="100%")) as example: if "description" in item: antd.Typography.Text( item["description"]) if "image" in item: antd.Image(item["image"], preview=False) example.click( fn=bind_on_example(item), outputs=[input, input_image[0]]) with antd.Col(xs=24, md=16): with antd.Card(title=get_text("Answer", "答案"), elem_style=dict(height="100%"), elem_id="output-container"): output = gr.Markdown( show_copy_button=True, latex_delimiters=[{ "left": '$$', "right": '$$', "display": True }, { "left": '$', "right": '$', "display": False, }, { "left": '\\(', "right": '\\)', "display": False, }, { "left": '\\[', "right": '\\]', "display": True }]) with antd.Tour(props=dict(open=False)) as tour: antd.Tour.Step( title=get_text("Step 1", "步骤 1"), description=get_text("Upload image and enter text", "传入图片和文本"), get_target= "() => document.querySelector('#input-container')") antd.Tour.Step( title=get_text("Step 2","步骤 2"), description=get_text("Click submit button", "点击提交按钮"), get_target= "() => document.querySelector('#submit-btn')") antd.Tour.Step( title=get_text("Step 3","步骤 3"), description=get_text("Wait for result", "等待结果返回"), get_target= "() => document.querySelector('#output-container')" ) tour_btn.click(fn=lambda: gr.update(props=dict(open=True)), outputs=[tour]) gr.on([tour.finish, tour.close], fn=lambda: gr.update(props=dict(open=False)), outputs=[tour]) submit_btn.click( fn=generate, inputs=[*input_image, input], outputs=[output]) clear_btn.click( fn=on_clear, outputs=[*input_image, input]) demo.queue(default_concurrency_limit=50).launch()