File size: 6,462 Bytes
04e62f7 8ccf6e8 04e62f7 875084c af05828 04e62f7 026c760 a37af71 1078884 04e62f7 a37af71 04e62f7 2057c79 04e62f7 f67eaea 04e62f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
# Thanks: https://huggingface.co/spaces/stabilityai/stable-diffusion-3-medium
import spaces
import os
import gradio as gr
import numpy as np
import random
import torch
from diffusers import StableDiffusion3Pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
device = "cuda"
dtype = torch.float16
repo = "aipicasso/emi-3"
t2i = StableDiffusion3Pipeline.from_pretrained(repo, torch_dtype=torch.bfloat16, token=os.environ["TOKEN"]).to(device)
model = AutoModelForCausalLM.from_pretrained(
"microsoft/Phi-3-mini-4k-instruct",
device_map="cuda",
torch_dtype=torch.bfloat16,
trust_remote_code=True,
token=os.environ["TOKEN"]
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", token=os.environ["TOKEN"])
upsampler = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
)
generation_args = {
"max_new_tokens": 226,
"return_full_text": False,
"temperature": 0.7,
"do_sample": True,
"top_p": 0.95
}
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1344
@spaces.GPU
def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
messages = [
{"role": "user", "content": "次のプロンプトを想像を膨らませて英語に翻訳してください。「目も髪もカラフルに染まっている少女がいて、虹のような背景に\"Emi 3\"と白い文字が書かれている」"},
{"role": "assistant", "content": "anime style, 1girl, looking at viewer, serene expression, gentle smile, multicolored hair, rainbow gradient hair, wavy long hair, heterochromia, purple left eye, blue right eye, pastel color scheme, magical girl aesthetic, white text overlay \"Emi 3\", centered text, modern typography, ethereal lighting, soft glow, fantasy atmosphere, rainbow gradient background, dreamy atmosphere, sparkles, light particles, magical effects, depth of field, bokeh effect"},
{"role": "user", "content": "次のプロンプトを想像を膨らませて英語に翻訳してください。「漫画風の富士山」"},
{"role": "assistant", "content": "manga style, monochrome, no human, Illustration of snow-capped Mount Fuji. Clean, sharp line art with wispy clouds floating in the sky and 2-3 pine trees in the foreground. Dawn sky tinted pink, with the mountain casting deep blue shadows. Emphasize depth and perspective to capture the mountain's majesty. A glimpse of Lake Hakone visible at the bottom. "},
{"role": "user", "content": f"次のプロンプトを想像を膨らませて英語に翻訳してください。「{prompt}」" },
]
output = upsampler(messages, **generation_args)
upsampled_prompt=output[0]['generated_text']
print(upsampled_prompt)
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator().manual_seed(seed)
image = t2i(
prompt = upsampled_prompt,
negative_prompt = negative_prompt,
guidance_scale = guidance_scale,
num_inference_steps = num_inference_steps,
width = width,
height = height,
generator = generator
).images[0]
return image, seed, upsampled_prompt
examples = [
"目も髪もカラフルに染まっている少女がいて、虹のような背景に\"Emi 3\"と白い文字が書かれている",
"炎の魔法使いの少女",
"雷の魔法使いの少女",
"漫画風の富士山",
]
css="""
#col-container {
margin: 0 auto;
max-width: 580px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(f"""
# 日本語が入力できる Emi 3
""")
with gr.Row():
prompt = gr.Text(
label="プロンプト",
show_label=False,
max_lines=1,
placeholder="作りたい画像の特徴を入力してください",
container=False,
)
run_button = gr.Button("実行", scale=0)
result = gr.Image(label="結果", show_label=False)
generated_prompt = gr.Textbox(label="生成に使ったプロンプト", show_label=False, interactive=False)
with gr.Accordion("詳細設定", open=False):
negative_prompt = gr.Text(
label="ネガティブプロンプト",
max_lines=1,
placeholder="画像から排除したい要素を入力してください",
)
seed = gr.Slider(
label="乱数のシード",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(label="ランダム生成", value=True)
with gr.Row():
width = gr.Slider(
label="横",
minimum=256,
maximum=MAX_IMAGE_SIZE,
step=64,
value=1024,
)
height = gr.Slider(
label="縦",
minimum=256,
maximum=MAX_IMAGE_SIZE,
step=64,
value=1024,
)
with gr.Row():
guidance_scale = gr.Slider(
label="プロンプトの忠実さ",
minimum=0.0,
maximum=10.0,
step=0.1,
value=4.5,
)
num_inference_steps = gr.Slider(
label="推論回数",
minimum=1,
maximum=50,
step=1,
value=30,
)
gr.Examples(
examples = examples,
inputs = [prompt]
)
gr.on(
triggers=[run_button.click, prompt.submit, negative_prompt.submit],
fn = infer,
inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
outputs = [result, seed, generated_prompt]
)
demo.launch() |