Spaces:

hahahafofo
/

image2text_prompt_generator

Runtime error

App Files Files Community

hahafofo commited on Apr 16, 2023

Commit

390173a

•

1 Parent(s): cd1b772

add chatglm

Browse files

Files changed (5) hide show

app.py +80 -5
requirements.txt +2 -0
utils/chatglm.py +192 -0
utils/generator.py +18 -4
utils/translate.py +40 -4

app.py CHANGED Viewed

@@ -1,11 +1,13 @@
 import gradio as gr
 import torch
 from utils.exif import get_image_info
 from utils.generator import generate_prompt
 from utils.image2text import git_image2text, w14_image2text, clip_image2text
 from utils.translate import en2zh as translate_en2zh
 from utils.translate import zh2en as translate_zh2en
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -51,18 +53,85 @@ def image_generate_prompter(
     return "\n".join(prompter_list), "\n".join(prompter_zh_list)
 with gr.Blocks(title="Prompt生成器") as block:
     with gr.Column():
         with gr.Tab('文本生成'):
             with gr.Row():
                 input_text = gr.Textbox(lines=6, label='你的想法', placeholder='在此输入内容...')
                 translate_output = gr.Textbox(lines=6, label='翻译结果(Prompt输入)')
             output = gr.Textbox(lines=6, label='优化的 Prompt')
             output_zh = gr.Textbox(lines=6, label='优化的 Prompt(zh)')
             with gr.Row():
                 translate_btn = gr.Button('翻译')
                 generate_prompter_btn = gr.Button('优化Prompt')
         with gr.Tab('从图片中生成'):
@@ -94,13 +163,14 @@ with gr.Blocks(title="Prompt生成器") as block:
                         'microsoft',
                         'mj',
                         'gpt2_650k',
                     ],
                     value='gpt2_650k',
                     label='model_name'
                 )
                 prompt_min_length = gr.Slider(1, 512, 100, label='min_length', step=1)
                 prompt_max_length = gr.Slider(1, 512, 200, label='max_length', step=1)
-                prompt_num_return_sequences = gr.Slider(1, 30, 6, label='num_return_sequences', step=1)
             with gr.Accordion('BLIP参数', open=True):
                 blip_max_length = gr.Slider(1, 512, 100, label='max_length', step=1)
@@ -145,9 +215,14 @@ with gr.Blocks(title="Prompt生成器") as block:
         ],
         outputs=[output_img_prompter, output_img_prompter_zh]
     )
-    translate_btn.click(
-        fn=translate_zh2en,
         inputs=input_text,
         outputs=translate_output
     )

 import gradio as gr
 import torch
+import mdtex2html
 from utils.exif import get_image_info
 from utils.generator import generate_prompt
 from utils.image2text import git_image2text, w14_image2text, clip_image2text
 from utils.translate import en2zh as translate_en2zh
 from utils.translate import zh2en as translate_zh2en
+from utils.chatglm import chat2text
+from utils.chatglm import models as chatglm_models
 device = "cuda" if torch.cuda.is_available() else "cpu"
     return "\n".join(prompter_list), "\n".join(prompter_zh_list)
+def translate_input(text: str, chatglm_text: str) -> str:
+    if chatglm_text is not None and len(chatglm_text) > 0:
+        return translate_zh2en(chatglm_text)
+    return translate_zh2en(text)
 with gr.Blocks(title="Prompt生成器") as block:
     with gr.Column():
+        with gr.Tab('Chat'):
+            def revise(history, latest_message):
+                history[-1] = (history[-1][0], latest_message)
+                return history, ''
+            def revoke(history):
+                if len(history) >= 1:
+                    history.pop()
+                return history
+            def interrupt(allow_generate):
+                allow_generate[0] = False
+            def reset_state():
+                return [], []
+            with gr.Row():
+                with gr.Column(scale=4):
+                    chatbot = gr.Chatbot(elem_id="chat-box", show_label=False).style(height=800)
+                with gr.Column(scale=1):
+                    with gr.Row():
+                        max_length = gr.Slider(32, 4096, value=2048, step=1.0, label="Maximum length", interactive=True)
+                        top_p = gr.Slider(0.01, 1, value=0.7, step=0.01, label="Top P", interactive=True)
+                        temperature = gr.Slider(0.01, 5, value=0.95, step=0.01, label="Temperature", interactive=True)
+                    with gr.Row():
+                        query = gr.Textbox(show_label=False, placeholder="Prompts", lines=4).style(container=False)
+                        generate_button = gr.Button("生成")
+                    with gr.Row():
+                        continue_message = gr.Textbox(
+                            show_label=False, placeholder="Continue message", lines=2).style(container=False)
+                        continue_btn = gr.Button("续写")
+                        revise_message = gr.Textbox(
+                            show_label=False, placeholder="Revise message", lines=2).style(container=False)
+                        revise_btn = gr.Button("修订")
+                        revoke_btn = gr.Button("撤回")
+                        interrupt_btn = gr.Button("终止生成")
+                        reset_btn = gr.Button("清空")
+            history = gr.State([])
+            allow_generate = gr.State([True])
+            blank_input = gr.State("")
+            reset_btn.click(reset_state, outputs=[chatbot, history], show_progress=True)
+            generate_button.click(
+                chatglm_models.chatglm.predict_continue,
+                inputs=[query, blank_input, max_length, top_p, temperature, allow_generate, history],
+                outputs=[chatbot, query]
+            )
+            revise_btn.click(revise, inputs=[history, revise_message], outputs=[chatbot, revise_message])
+            revoke_btn.click(revoke, inputs=[history], outputs=[chatbot])
+            continue_btn.click(
+                chatglm_models.chatglm.predict_continue,
+                inputs=[query, continue_message, max_length, top_p, temperature, allow_generate, history],
+                outputs=[chatbot, query, continue_message]
+            )
+            interrupt_btn.click(interrupt, inputs=[allow_generate])
         with gr.Tab('文本生成'):
             with gr.Row():
                 input_text = gr.Textbox(lines=6, label='你的想法', placeholder='在此输入内容...')
+                chatglm_output = gr.Textbox(lines=6, label='ChatGLM', placeholder='在此输入内容...')
                 translate_output = gr.Textbox(lines=6, label='翻译结果(Prompt输入)')
             output = gr.Textbox(lines=6, label='优化的 Prompt')
             output_zh = gr.Textbox(lines=6, label='优化的 Prompt(zh)')
             with gr.Row():
+                chatglm_btn = gr.Button('召唤ChatGLM')
                 translate_btn = gr.Button('翻译')
                 generate_prompter_btn = gr.Button('优化Prompt')
         with gr.Tab('从图片中生成'):
                         'microsoft',
                         'mj',
                         'gpt2_650k',
+                        'gpt_neo_125m',
                     ],
                     value='gpt2_650k',
                     label='model_name'
                 )
                 prompt_min_length = gr.Slider(1, 512, 100, label='min_length', step=1)
                 prompt_max_length = gr.Slider(1, 512, 200, label='max_length', step=1)
+                prompt_num_return_sequences = gr.Slider(1, 30, 8, label='num_return_sequences', step=1)
             with gr.Accordion('BLIP参数', open=True):
                 blip_max_length = gr.Slider(1, 512, 100, label='max_length', step=1)
         ],
         outputs=[output_img_prompter, output_img_prompter_zh]
     )
+    chatglm_btn.click(
+        fn=chatglm_models.chatglm.generator_image_text,
         inputs=input_text,
+        outputs=chatglm_output,
+    )
+    translate_btn.click(
+        fn=translate_input,
+        inputs=[input_text, chatglm_output],
         outputs=translate_output
     )

requirements.txt CHANGED Viewed

@@ -10,3 +10,5 @@ protobuf<=3.20.1,>=3.12.2
 opencv-python==4.7.0.72
 huggingface-hub==0.13.2
 clip-interrogator==0.6.0

 opencv-python==4.7.0.72
 huggingface-hub==0.13.2
 clip-interrogator==0.6.0
+cpm_kernels==1.0.11
+mdtex2html==1.2.0

utils/chatglm.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import time
+from abc import ABC, abstractmethod
+from typing import List, Tuple
+import torch
+from transformers import AutoModel, AutoTokenizer
+from transformers import LogitsProcessor, LogitsProcessorList
+from .singleton import Singleton
+def parse_codeblock(text):
+    lines = text.split("\n")
+    for i, line in enumerate(lines):
+        if "```" in line:
+            if line != "```":
+                lines[i] = f'<pre><code class="{lines[i][3:]}">'
+            else:
+                lines[i] = '</code></pre>'
+        else:
+            if i > 0:
+                lines[i] = "<br/>" + line.replace("<", "&lt;").replace(">", "&gt;")
+    return "".join(lines)
+class BasePredictor(ABC):
+    @abstractmethod
+    def __init__(self, model_name):
+        self.model = None
+        self.tokenizer = None
+    @abstractmethod
+    def stream_chat_continue(self, *args, **kwargs):
+        raise NotImplementedError
+    def predict_continue(self, query, latest_message, max_length, top_p,
+                         temperature, allow_generate, history, *args,
+                         **kwargs):
+        if history is None:
+            history = []
+        allow_generate[0] = True
+        history.append((query, latest_message))
+        for response in self.stream_chat_continue(
+                self.model,
+                self.tokenizer,
+                query=query,
+                history=history,
+                max_length=max_length,
+                top_p=top_p,
+                temperature=temperature):
+            history[-1] = (history[-1][0], response)
+            yield history, '', ''
+            if not allow_generate[0]:
+                break
+class InvalidScoreLogitsProcessor(LogitsProcessor):
+    def __init__(self, start_pos=20005):
+        self.start_pos = start_pos
+    def __call__(self, input_ids: torch.LongTensor,
+                 scores: torch.FloatTensor) -> torch.FloatTensor:
+        if torch.isnan(scores).any() or torch.isinf(scores).any():
+            scores.zero_()
+            scores[..., self.start_pos] = 5e4
+        return scores
+class ChatGLM(BasePredictor):
+    def __init__(self, model_name="THUDM/chatglm-6b-int4"):
+        print(f'Loading model {model_name}')
+        start = time.perf_counter()
+        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            trust_remote_code=True,
+            resume_download=True
+        )
+        model = AutoModel.from_pretrained(
+            model_name,
+            trust_remote_code=True,
+            resume_download=True
+        ).half().to(self.device)
+        model = model.eval()
+        self.model = model
+        self.model_name = model_name
+        end = time.perf_counter()
+        print(
+            f'Successfully loaded model {model_name}, time cost: {end - start:.2f}s'
+        )
+    @torch.no_grad()
+    def generator_image_text(self, text):
+        response, history = self.model.chat(self.tokenizer, "描述画面:{}".format(text), history=[])
+        return response
+    @torch.no_grad()
+    def stream_chat_continue(self,
+                             model,
+                             tokenizer,
+                             query: str,
+                             history: List[Tuple[str, str]] = None,
+                             max_length: int = 2048,
+                             do_sample=True,
+                             top_p=0.7,
+                             temperature=0.95,
+                             logits_processor=None,
+                             **kwargs):
+        if history is None:
+            history = []
+        if logits_processor is None:
+            logits_processor = LogitsProcessorList()
+        if len(history) > 0:
+            answer = history[-1][1]
+        else:
+            answer = ''
+        logits_processor.append(
+            InvalidScoreLogitsProcessor(
+                start_pos=20005 if 'slim' not in self.model_name else 5))
+        gen_kwargs = {
+            "max_length": max_length,
+            "do_sample": do_sample,
+            "top_p": top_p,
+            "temperature": temperature,
+            "logits_processor": logits_processor,
+            **kwargs
+        }
+        if not history:
+            prompt = query
+        else:
+            prompt = ""
+            for i, (old_query, response) in enumerate(history):
+                if i != len(history) - 1:
+                    prompt += "[Round {}]\n问：{}\n答：{}\n".format(
+                        i, old_query, response)
+                else:
+                    prompt += "[Round {}]\n问：{}\n答：".format(i, old_query)
+        batch_input = tokenizer([prompt], return_tensors="pt", padding=True)
+        batch_input = batch_input.to(model.device)
+        batch_answer = tokenizer(answer, return_tensors="pt")
+        batch_answer = batch_answer.to(model.device)
+        input_length = len(batch_input['input_ids'][0])
+        final_input_ids = torch.cat(
+            [batch_input['input_ids'], batch_answer['input_ids'][:, :-2]],
+            dim=-1).cuda()
+        attention_mask = model.get_masks(
+            final_input_ids, device=final_input_ids.device)
+        batch_input['input_ids'] = final_input_ids
+        batch_input['attention_mask'] = attention_mask
+        input_ids = final_input_ids
+        MASK, gMASK = self.model.config.bos_token_id - 4, self.model.config.bos_token_id - 3
+        mask_token = MASK if MASK in input_ids else gMASK
+        mask_positions = [seq.tolist().index(mask_token) for seq in input_ids]
+        batch_input['position_ids'] = self.model.get_position_ids(
+            input_ids, mask_positions, device=input_ids.device)
+        for outputs in model.stream_generate(**batch_input, **gen_kwargs):
+            outputs = outputs.tolist()[0][input_length:]
+            response = tokenizer.decode(outputs)
+            response = model.process_response(response)
+            yield parse_codeblock(response)
+@Singleton
+class Models(object):
+    def __getattr__(self, item):
+        if item in self.__dict__:
+            return getattr(self, item)
+        if item == 'chatglm':
+            self.chatglm = ChatGLM("THUDM/chatglm-6b-int4")
+        return getattr(self, item)
+models = Models.instance()
+def chat2text(text: str) -> str:
+    return models.chatglm.generator_image_text(text)

utils/generator.py CHANGED Viewed

@@ -24,11 +24,16 @@ class Models(object):
         if item in ('gpt2_650k_pipe',):
             self.gpt2_650k_pipe = self.load_gpt2_650k_pipe()
         return getattr(self, item)
     @classmethod
-    def load_gpt2_650k_pipe(cls):
         return pipeline('text-generation', model='Ar4ikov/gpt2-650k-stable-diffusion-prompt-generator')
     @classmethod
@@ -62,7 +67,16 @@ def generate_prompt(
         model_name='microsoft',
 ):
     if model_name == 'gpt2_650k':
-        return generate_prompt_gpt2_650k(
             prompt=plain_text,
             min_length=min_length,
             max_length=max_length,
@@ -114,7 +128,7 @@ def generate_prompt_microsoft(
     return "\n".join(result)
-def generate_prompt_gpt2_650k(prompt: str, min_length=60, max_length: int = 255, num_return_sequences: int = 8) -> str:
     def get_valid_prompt(text: str) -> str:
         dot_split = text.split('.')[0]
         n_split = text.split('\n')[0]
@@ -130,7 +144,7 @@ def generate_prompt_gpt2_650k(prompt: str, min_length=60, max_length: int = 255,
         output += [
             get_valid_prompt(result['generated_text']) for result in
-            models.gpt2_650k_pipe(
                 prompt,
                 max_new_tokens=rand_length(min_length, max_length),
                 num_return_sequences=num_return_sequences

         if item in ('gpt2_650k_pipe',):
             self.gpt2_650k_pipe = self.load_gpt2_650k_pipe()
+        if item in ('gpt_neo_125m',):
+            self.gpt2_650k_pipe = self.load_gpt_neo_125m()
         return getattr(self, item)
     @classmethod
+    def load_gpt_neo_125m(cls):
+        return pipeline('text-generation', model='DrishtiSharma/StableDiffusion-Prompt-Generator-GPT-Neo-125M')
+    @classmethod
+    def load_gpt2_650k_pipe(cls):
         return pipeline('text-generation', model='Ar4ikov/gpt2-650k-stable-diffusion-prompt-generator')
     @classmethod
         model_name='microsoft',
 ):
     if model_name == 'gpt2_650k':
+        return generate_prompt_pipe(
+            models.gpt2_650k_pipe,
+            prompt=plain_text,
+            min_length=min_length,
+            max_length=max_length,
+            num_return_sequences=num_return_sequences,
+        )
+    elif model_name == 'gpt_neo_125m':
+        return generate_prompt_pipe(
+            models.gpt_neo_125m,
             prompt=plain_text,
             min_length=min_length,
             max_length=max_length,
     return "\n".join(result)
+def generate_prompt_pipe(pipe, prompt: str, min_length=60, max_length: int = 255, num_return_sequences: int = 8) -> str:
     def get_valid_prompt(text: str) -> str:
         dot_split = text.split('.')[0]
         n_split = text.split('\n')[0]
         output += [
             get_valid_prompt(result['generated_text']) for result in
+            pipe(
                 prompt,
                 max_new_tokens=rand_length(min_length, max_length),
                 num_return_sequences=num_return_sequences

utils/translate.py CHANGED Viewed

@@ -1,6 +1,10 @@
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import torch
 from .singleton import Singleton
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -18,8 +22,18 @@ class Models(object):
         if item in ('en2zh_model', 'en2zh_tokenizer',):
             self.en2zh_model, self.en2zh_tokenizer = self.load_en2zh_model()
         return getattr(self, item)
     @classmethod
     def load_en2zh_model(cls):
         en2zh_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-zh").eval()
@@ -37,14 +51,35 @@ class Models(object):
 models = Models.instance()
-def zh2en(text):
     with torch.no_grad():
         encoded = models.zh2en_tokenizer([text], return_tensors="pt")
         sequences = models.zh2en_model.generate(**encoded)
         return models.zh2en_tokenizer.batch_decode(sequences, skip_special_tokens=True)[0]
-def en2zh(text):
     with torch.no_grad():
         encoded = models.en2zh_tokenizer([text], return_tensors="pt")
         sequences = models.en2zh_model.generate(**encoded)
@@ -52,8 +87,9 @@ def en2zh(text):
 if __name__ == "__main__":
-    input = "青春不能回头，所以青春没有终点。 ——《火影忍者》"
-    en = zh2en(input)
     print(input, en)
     zh = en2zh(en)
     print(en, zh)

 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import torch
 from .singleton import Singleton
+from transformers import (
+    EncoderDecoderModel,
+    AutoTokenizer
+)
 device = "cuda" if torch.cuda.is_available() else "cpu"
         if item in ('en2zh_model', 'en2zh_tokenizer',):
             self.en2zh_model, self.en2zh_tokenizer = self.load_en2zh_model()
+        if item in ('wenyanwen2modern_tokenizer', 'wenyanwen2modern_model',):
+            self.wenyanwen2modern_tokenizer, self.wenyanwen2modern_model = self.load_wenyanwen2modern_model()
         return getattr(self, item)
+    @classmethod
+    def load_wenyanwen2modern_model(cls):
+        PRETRAINED = "raynardj/wenyanwen-ancient-translate-to-modern"
+        tokenizer = AutoTokenizer.from_pretrained(PRETRAINED)
+        model = EncoderDecoderModel.from_pretrained(PRETRAINED)
+        return tokenizer, model
     @classmethod
     def load_en2zh_model(cls):
         en2zh_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-zh").eval()
 models = Models.instance()
+def wenyanwen2modern(text: str) -> str:
+    tk_kwargs = dict(
+        truncation=True,
+        max_length=128,
+        padding="max_length",
+        return_tensors='pt')
+    inputs = models.wenyanwen2modern_tokenizer([text, ], **tk_kwargs)
+    with torch.no_grad():
+        return models.wenyanwen2modern_tokenizer.batch_decode(
+            models.wenyanwen2modern_model.generate(
+                inputs.input_ids,
+                attention_mask=inputs.attention_mask,
+                num_beams=3,
+                max_length=256,
+                bos_token_id=101,
+                eos_token_id=models.wenyanwen2modern_tokenizer.sep_token_id,
+                pad_token_id=models.wenyanwen2modern_tokenizer.pad_token_id,
+            ), skip_special_tokens=True)[0].replace(" ", "")
+def zh2en(text: str) -> str:
     with torch.no_grad():
         encoded = models.zh2en_tokenizer([text], return_tensors="pt")
         sequences = models.zh2en_model.generate(**encoded)
         return models.zh2en_tokenizer.batch_decode(sequences, skip_special_tokens=True)[0]
+def en2zh(text: str) -> str:
     with torch.no_grad():
         encoded = models.en2zh_tokenizer([text], return_tensors="pt")
         sequences = models.en2zh_model.generate(**encoded)
 if __name__ == "__main__":
+    input = "飞流直下三千尺，疑是银河落九天"
+    input_m = wenyanwen2modern(input)
+    en = zh2en(input_m)
     print(input, en)
     zh = en2zh(en)
     print(en, zh)