Spaces:
Runtime error
Runtime error
Commit
•
8d49336
1
Parent(s):
e0e9e9a
v2 support
Browse files- app.py +157 -85
- requirements-local.txt +18 -0
- requirements.txt +1 -1
- train_dreambooth.py +6 -3
app.py
CHANGED
@@ -28,32 +28,40 @@ css = '''
|
|
28 |
'''
|
29 |
maximum_concepts = 3
|
30 |
|
31 |
-
#Pre download the files
|
32 |
-
|
|
|
|
|
33 |
safety_checker = snapshot_download(repo_id="multimodalart/sd-sc")
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
ziph.write(os.path.join(root, file),
|
40 |
-
os.path.relpath(os.path.join(root, file),
|
41 |
-
os.path.join(path, '..')))
|
42 |
|
43 |
def swap_text(option):
|
44 |
mandatory_liability = "You must have the right to do so and you are liable for the images you use, example:"
|
45 |
if(option == "object"):
|
46 |
instance_prompt_example = "cttoy"
|
47 |
freeze_for = 50
|
48 |
-
return [f"You are going to train `object`(s), upload 5-10 images of each object you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/cat-toy.png" />''', f"You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for]
|
49 |
elif(option == "person"):
|
50 |
instance_prompt_example = "julcto"
|
51 |
-
freeze_for =
|
52 |
-
return [f"You are going to train a `person`(s), upload 10-20 images of each person you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/person.png" />''', f"You should name the files with a unique word that represent your concept (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for]
|
53 |
elif(option == "style"):
|
54 |
instance_prompt_example = "trsldamrl"
|
55 |
freeze_for = 10
|
56 |
-
return [f"You are going to train a `style`, upload 10-20 images of the style you are planning on training on. Name the files with the words you would like {mandatory_liability}:", '''<img src="file/trsl_style.png" />''', f"You should name your files with a unique word that represent your concept (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
def count_files(*inputs):
|
59 |
file_counter = 0
|
@@ -69,10 +77,7 @@ def count_files(*inputs):
|
|
69 |
if(uses_custom):
|
70 |
Training_Steps = int(inputs[-3])
|
71 |
else:
|
72 |
-
|
73 |
-
Training_Steps = file_counter*200*2
|
74 |
-
else:
|
75 |
-
Training_Steps = file_counter*200
|
76 |
if(is_spaces):
|
77 |
summary_sentence = f'''You are going to train {concept_counter} {type_of_thing}(s), with {file_counter} images for {Training_Steps} steps. The training should take around {round(Training_Steps/1.1, 2)} seconds, or {round((Training_Steps/1.1)/60, 2)} minutes.
|
78 |
The setup, compression and uploading the model can take up to 20 minutes.<br>As the T4-Small GPU costs US$0.60 for 1h, <span style="font-size: 120%"><b>the estimated cost for this training is US${round((((Training_Steps/1.1)/3600)+0.3+0.1)*0.60, 2)}.</b></span><br><br>
|
@@ -82,6 +87,13 @@ def count_files(*inputs):
|
|
82 |
|
83 |
return([gr.update(visible=True), gr.update(visible=True, value=summary_sentence)])
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
def pad_image(image):
|
86 |
w, h = image.size
|
87 |
if w == h:
|
@@ -101,7 +113,9 @@ def train(*inputs):
|
|
101 |
|
102 |
torch.cuda.empty_cache()
|
103 |
if 'pipe' in globals():
|
|
|
104 |
del pipe
|
|
|
105 |
gc.collect()
|
106 |
|
107 |
if os.path.exists("output_model"): shutil.rmtree('output_model')
|
@@ -130,9 +144,9 @@ def train(*inputs):
|
|
130 |
os.makedirs('output_model',exist_ok=True)
|
131 |
uses_custom = inputs[-1]
|
132 |
type_of_thing = inputs[-4]
|
133 |
-
|
134 |
remove_attribution_after = inputs[-6]
|
135 |
-
|
|
|
136 |
if(uses_custom):
|
137 |
Training_Steps = int(inputs[-3])
|
138 |
Train_text_encoder_for = int(inputs[-2])
|
@@ -140,51 +154,100 @@ def train(*inputs):
|
|
140 |
Training_Steps = file_counter*200
|
141 |
if(type_of_thing == "object"):
|
142 |
Train_text_encoder_for=30
|
143 |
-
elif(type_of_thing == "person"):
|
144 |
-
Train_text_encoder_for=60
|
145 |
elif(type_of_thing == "style"):
|
146 |
Train_text_encoder_for=15
|
|
|
|
|
147 |
|
148 |
-
class_data_dir = None
|
149 |
stptxt = int((Training_Steps*Train_text_encoder_for)/100)
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
gc.collect()
|
176 |
torch.cuda.empty_cache()
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
181 |
|
182 |
-
#with zipfile.ZipFile('diffusers_model.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
|
183 |
-
# zipdir('output_model/', zipf)
|
184 |
if(not remove_attribution_after):
|
185 |
print("Archiving model file...")
|
186 |
with tarfile.open("diffusers_model.tar", "w") as tar:
|
187 |
-
tar.add("
|
188 |
if os.path.exists("intraining.lock"): os.remove("intraining.lock")
|
189 |
trained_file = open("hastrained.success", "w")
|
190 |
trained_file.close()
|
@@ -201,22 +264,27 @@ def train(*inputs):
|
|
201 |
hf_token = inputs[-5]
|
202 |
model_name = inputs[-7]
|
203 |
where_to_upload = inputs[-8]
|
204 |
-
push(model_name, where_to_upload, hf_token, True)
|
205 |
hardware_url = f"https://huggingface.co/spaces/{os.environ['SPACE_ID']}/hardware"
|
206 |
headers = { "authorization" : f"Bearer {hf_token}"}
|
207 |
body = {'flavor': 'cpu-basic'}
|
208 |
requests.post(hardware_url, json = body, headers=headers)
|
209 |
|
210 |
-
|
|
|
211 |
torch.cuda.empty_cache()
|
212 |
from diffusers import StableDiffusionPipeline
|
213 |
-
global
|
214 |
-
|
215 |
-
|
216 |
-
|
|
|
|
|
|
|
|
|
217 |
return(image)
|
218 |
|
219 |
-
def push(model_name, where_to_upload, hf_token, comes_from_automated=False):
|
220 |
if(not os.path.exists("model.ckpt")):
|
221 |
convert("output_model", "model.ckpt")
|
222 |
from huggingface_hub import HfApi, HfFolder, CommitOperationAdd
|
@@ -250,7 +318,7 @@ license: creativeml-openrail-m
|
|
250 |
tags:
|
251 |
- text-to-image
|
252 |
---
|
253 |
-
### {model_name} Dreambooth model trained by {api.whoami(token=hf_token)["name"]} with [Hugging Face Dreambooth Training Space](https://huggingface.co/spaces/multimodalart/dreambooth-training)
|
254 |
|
255 |
You run your new concept via `diffusers` [Colab Notebook for Inference](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_inference.ipynb). Don't forget to use the concept prompts!
|
256 |
|
@@ -371,21 +439,24 @@ with gr.Blocks(css=css) as demo:
|
|
371 |
top_description = gr.HTML(f'''
|
372 |
<div class="gr-prose" style="max-width: 80%">
|
373 |
<h2>You have successfully cloned the Dreambooth Training Space locally 🎉</h2>
|
374 |
-
<p>
|
375 |
</div>
|
376 |
''')
|
377 |
-
gr.Markdown("# Dreambooth Training UI")
|
378 |
-
gr.Markdown("Customize Stable Diffusion by training it on a few examples of concepts, up to 3 concepts on the same model. This Space is based on TheLastBen's [fast-DreamBooth Colab](https://colab.research.google.com/github/TheLastBen/fast-stable-diffusion/blob/main/fast-DreamBooth.ipynb) with [🧨 diffusers](https://github.com/huggingface/diffusers)")
|
379 |
|
380 |
with gr.Row() as what_are_you_training:
|
381 |
type_of_thing = gr.Dropdown(label="What would you like to train?", choices=["object", "person", "style"], value="object", interactive=True)
|
382 |
-
|
|
|
383 |
#Very hacky approach to emulate dynamically created Gradio components
|
384 |
with gr.Row() as upload_your_concept:
|
385 |
with gr.Column():
|
386 |
-
thing_description = gr.Markdown("You are going to train an `object`, please upload 5-10 images of the object you are planning on training on from different angles/perspectives. You must have the right to do so and you are liable for the images you use, example
|
|
|
387 |
thing_image_example = gr.HTML('''<img src="file/cat-toy.png" />''')
|
388 |
things_naming = gr.Markdown("You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `cttoy` here). Images will be automatically cropped to 512x512.")
|
|
|
389 |
with gr.Column():
|
390 |
file_collection = []
|
391 |
concept_collection = []
|
@@ -431,24 +502,19 @@ with gr.Blocks(css=css) as demo:
|
|
431 |
|
432 |
with gr.Accordion("Custom Settings", open=False):
|
433 |
swap_auto_calculated = gr.Checkbox(label="Use custom settings")
|
434 |
-
gr.Markdown("If not checked, the number of steps and % of frozen encoder will be tuned automatically according to the amount of images you upload and whether you are training an `object`, `person` or `style` as follows: The number of steps is calculated by number of images uploaded multiplied by
|
435 |
steps = gr.Number(label="How many steps", value=800)
|
436 |
perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
|
437 |
-
|
438 |
with gr.Box(visible=False) as training_summary:
|
439 |
training_summary_text = gr.HTML("", visible=False, label="Training Summary")
|
440 |
-
if
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
training_summary_checkbox = False
|
448 |
-
training_summary_model_name = ''
|
449 |
-
training_summary_where_to_upload = "My person profile"
|
450 |
-
training_summary_token_message = ""
|
451 |
-
training_summary_token = ""
|
452 |
train_btn = gr.Button("Start Training")
|
453 |
|
454 |
training_ongoing = gr.Markdown("## Training is ongoing ⌛... You can close this tab if you like or just wait. If you did not check the `Remove GPU After training`, you can come back here to try your model and upload it after training. Don't forget to remove the GPU attribution after you are done. ", visible=False)
|
@@ -462,6 +528,7 @@ with gr.Blocks(css=css) as demo:
|
|
462 |
gr.Markdown("## Try your model")
|
463 |
prompt = gr.Textbox(label="Type your prompt")
|
464 |
result_image = gr.Image()
|
|
|
465 |
generate_button = gr.Button("Generate Image")
|
466 |
|
467 |
with gr.Box(visible=False) as push_to_hub:
|
@@ -478,11 +545,16 @@ with gr.Blocks(css=css) as demo:
|
|
478 |
convert_button = gr.Button("Convert to CKPT", visible=False)
|
479 |
|
480 |
#Swap the examples and the % of text encoder trained depending if it is an object, person or style
|
481 |
-
type_of_thing.change(fn=swap_text, inputs=[type_of_thing], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder], queue=False, show_progress=False)
|
482 |
|
|
|
|
|
|
|
483 |
#Update the summary box below the UI according to how many images are uploaded and whether users are using custom settings or not
|
484 |
for file in file_collection:
|
|
|
485 |
file.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
|
|
|
486 |
steps.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
|
487 |
perc_txt_encoder.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
|
488 |
|
@@ -493,12 +565,12 @@ with gr.Blocks(css=css) as demo:
|
|
493 |
train_btn.click(lambda:gr.update(visible=True), inputs=None, outputs=training_ongoing)
|
494 |
|
495 |
#The main train function
|
496 |
-
train_btn.click(fn=train, inputs=is_visible+concept_collection+file_collection+[training_summary_where_to_upload]+[training_summary_model_name]+[training_summary_checkbox]+[training_summary_token]+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[result, try_your_model, push_to_hub, convert_button, training_ongoing, completed_training], queue=False)
|
497 |
|
498 |
#Button to generate an image from your trained model after training
|
499 |
-
generate_button.click(fn=generate, inputs=prompt, outputs=result_image, queue=False)
|
500 |
#Button to push the model to the Hugging Face Hub
|
501 |
-
push_button.click(fn=push, inputs=[model_name, where_to_upload, hf_token], outputs=[success_message_upload, result], queue=False)
|
502 |
#Button to convert the model to ckpt format
|
503 |
convert_button.click(fn=convert_to_ckpt, inputs=[], outputs=result, queue=False)
|
504 |
|
|
|
28 |
'''
|
29 |
maximum_concepts = 3
|
30 |
|
31 |
+
#Pre download the files
|
32 |
+
model_v1 = snapshot_download(repo_id="multimodalart/sd-fine-tunable")
|
33 |
+
#model_v2 = snapshot_download(repo_id="stabilityai/stable-diffusion-2")
|
34 |
+
model_v2_512 = snapshot_download(repo_id="stabilityai/stable-diffusion-2-base")
|
35 |
safety_checker = snapshot_download(repo_id="multimodalart/sd-sc")
|
36 |
|
37 |
+
model_to_load = model_v1
|
38 |
+
|
39 |
+
#with zipfile.ZipFile("mix.zip", 'r') as zip_ref:
|
40 |
+
# zip_ref.extractall(".")
|
|
|
|
|
|
|
41 |
|
42 |
def swap_text(option):
|
43 |
mandatory_liability = "You must have the right to do so and you are liable for the images you use, example:"
|
44 |
if(option == "object"):
|
45 |
instance_prompt_example = "cttoy"
|
46 |
freeze_for = 50
|
47 |
+
return [f"You are going to train `object`(s), upload 5-10 images of each object you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/cat-toy.png" />''', f"You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=False)]
|
48 |
elif(option == "person"):
|
49 |
instance_prompt_example = "julcto"
|
50 |
+
freeze_for = 65
|
51 |
+
return [f"You are going to train a `person`(s), upload 10-20 images of each person you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/person.png" />''', f"You should name the files with a unique word that represent your concept (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=False)]
|
52 |
elif(option == "style"):
|
53 |
instance_prompt_example = "trsldamrl"
|
54 |
freeze_for = 10
|
55 |
+
return [f"You are going to train a `style`, upload 10-20 images of the style you are planning on training on. Name the files with the words you would like {mandatory_liability}:", '''<img src="file/trsl_style.png" />''', f"You should name your files with a unique word that represent your concept (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=False)]
|
56 |
+
|
57 |
+
def swap_base_model(selected_model):
|
58 |
+
global model_to_load
|
59 |
+
if(selected_model == "v1-5"):
|
60 |
+
model_to_load = model_v1
|
61 |
+
elif(selected_model == "v2-768"):
|
62 |
+
model_to_load = model_v2
|
63 |
+
else:
|
64 |
+
model_to_load = model_v2_512
|
65 |
|
66 |
def count_files(*inputs):
|
67 |
file_counter = 0
|
|
|
77 |
if(uses_custom):
|
78 |
Training_Steps = int(inputs[-3])
|
79 |
else:
|
80 |
+
Training_Steps = file_counter*200
|
|
|
|
|
|
|
81 |
if(is_spaces):
|
82 |
summary_sentence = f'''You are going to train {concept_counter} {type_of_thing}(s), with {file_counter} images for {Training_Steps} steps. The training should take around {round(Training_Steps/1.1, 2)} seconds, or {round((Training_Steps/1.1)/60, 2)} minutes.
|
83 |
The setup, compression and uploading the model can take up to 20 minutes.<br>As the T4-Small GPU costs US$0.60 for 1h, <span style="font-size: 120%"><b>the estimated cost for this training is US${round((((Training_Steps/1.1)/3600)+0.3+0.1)*0.60, 2)}.</b></span><br><br>
|
|
|
87 |
|
88 |
return([gr.update(visible=True), gr.update(visible=True, value=summary_sentence)])
|
89 |
|
90 |
+
def update_steps(*files_list):
|
91 |
+
file_counter = 0
|
92 |
+
for i, files in enumerate(files_list):
|
93 |
+
if(files):
|
94 |
+
file_counter+=len(files)
|
95 |
+
return(gr.update(value=file_counter*200))
|
96 |
+
|
97 |
def pad_image(image):
|
98 |
w, h = image.size
|
99 |
if w == h:
|
|
|
113 |
|
114 |
torch.cuda.empty_cache()
|
115 |
if 'pipe' in globals():
|
116 |
+
global pipe, pipe_is_set
|
117 |
del pipe
|
118 |
+
pipe_is_set = False
|
119 |
gc.collect()
|
120 |
|
121 |
if os.path.exists("output_model"): shutil.rmtree('output_model')
|
|
|
144 |
os.makedirs('output_model',exist_ok=True)
|
145 |
uses_custom = inputs[-1]
|
146 |
type_of_thing = inputs[-4]
|
|
|
147 |
remove_attribution_after = inputs[-6]
|
148 |
+
experimental_face_improvement = inputs[-9]
|
149 |
+
which_model = inputs[-10]
|
150 |
if(uses_custom):
|
151 |
Training_Steps = int(inputs[-3])
|
152 |
Train_text_encoder_for = int(inputs[-2])
|
|
|
154 |
Training_Steps = file_counter*200
|
155 |
if(type_of_thing == "object"):
|
156 |
Train_text_encoder_for=30
|
|
|
|
|
157 |
elif(type_of_thing == "style"):
|
158 |
Train_text_encoder_for=15
|
159 |
+
elif(type_of_thing == "person"):
|
160 |
+
Train_text_encoder_for=65
|
161 |
|
|
|
162 |
stptxt = int((Training_Steps*Train_text_encoder_for)/100)
|
163 |
+
if (type_of_thing == "object" or type_of_thing == "style" or (type_of_thing == "person" and not experimental_face_improvement)):
|
164 |
+
args_general = argparse.Namespace(
|
165 |
+
image_captions_filename = True,
|
166 |
+
train_text_encoder = True if stptxt > 0 else False,
|
167 |
+
stop_text_encoder_training = stptxt,
|
168 |
+
save_n_steps = 0,
|
169 |
+
pretrained_model_name_or_path = model_to_load,
|
170 |
+
instance_data_dir="instance_images",
|
171 |
+
class_data_dir=None,
|
172 |
+
output_dir="output_model",
|
173 |
+
instance_prompt="",
|
174 |
+
seed=42,
|
175 |
+
resolution=512,
|
176 |
+
mixed_precision="fp16",
|
177 |
+
train_batch_size=1,
|
178 |
+
gradient_accumulation_steps=1,
|
179 |
+
use_8bit_adam=True,
|
180 |
+
learning_rate=2e-6,
|
181 |
+
lr_scheduler="polynomial",
|
182 |
+
lr_warmup_steps = 0,
|
183 |
+
max_train_steps=Training_Steps,
|
184 |
+
)
|
185 |
+
print("Starting single training...")
|
186 |
+
lock_file = open("intraining.lock", "w")
|
187 |
+
lock_file.close()
|
188 |
+
run_training(args_general)
|
189 |
+
else:
|
190 |
+
args_txt_encoder = argparse.Namespace(
|
191 |
+
image_captions_filename=True,
|
192 |
+
train_text_encoder=True,
|
193 |
+
dump_only_text_encoder=True,
|
194 |
+
pretrained_model_name_or_path=model_to_load,
|
195 |
+
save_n_steps=0,
|
196 |
+
instance_data_dir="instance_images",
|
197 |
+
class_data_dir="Mix",
|
198 |
+
output_dir="output_model",
|
199 |
+
with_prior_preservation=True,
|
200 |
+
prior_loss_weight=1.0,
|
201 |
+
instance_prompt="",
|
202 |
+
seed=42,
|
203 |
+
resolution=512,
|
204 |
+
mixed_precision="fp16",
|
205 |
+
train_batch_size=1,
|
206 |
+
gradient_accumulation_steps=1,
|
207 |
+
gradient_checkpointing=True,
|
208 |
+
use_8bit_adam=True,
|
209 |
+
learning_rate=2e-6,
|
210 |
+
lr_scheduler="polynomial",
|
211 |
+
lr_warmup_steps = 0,
|
212 |
+
max_train_steps=stptxt,
|
213 |
+
num_class_images=200
|
214 |
+
)
|
215 |
+
args_unet = argparse.Namespace(
|
216 |
+
image_captions_filename=True,
|
217 |
+
train_only_unet=True,
|
218 |
+
save_n_steps=0,
|
219 |
+
pretrained_model_name_or_path=model_to_load,
|
220 |
+
instance_data_dir="instance_images",
|
221 |
+
output_dir="output_model",
|
222 |
+
instance_prompt="",
|
223 |
+
seed=42,
|
224 |
+
resolution=512,
|
225 |
+
mixed_precision="fp16",
|
226 |
+
train_batch_size=1,
|
227 |
+
gradient_accumulation_steps=1,
|
228 |
+
use_8bit_adam=True,
|
229 |
+
learning_rate=2e-6,
|
230 |
+
lr_scheduler="polynomial",
|
231 |
+
lr_warmup_steps = 0,
|
232 |
+
max_train_steps=Training_Steps,
|
233 |
+
)
|
234 |
+
print("Starting multi-training...")
|
235 |
+
lock_file = open("intraining.lock", "w")
|
236 |
+
lock_file.close()
|
237 |
+
run_training(args_txt_encoder)
|
238 |
+
run_training(args_unet)
|
239 |
gc.collect()
|
240 |
torch.cuda.empty_cache()
|
241 |
+
if(which_model == "v1-5"):
|
242 |
+
print("Adding Safety Checker to the model...")
|
243 |
+
shutil.copytree(f"{safety_checker}/feature_extractor", "output_model/feature_extractor")
|
244 |
+
shutil.copytree(f"{safety_checker}/safety_checker", "output_model/safety_checker")
|
245 |
+
shutil.copy(f"model_index.json", "output_model/model_index.json")
|
246 |
|
|
|
|
|
247 |
if(not remove_attribution_after):
|
248 |
print("Archiving model file...")
|
249 |
with tarfile.open("diffusers_model.tar", "w") as tar:
|
250 |
+
tar.add("output_model", arcname=os.path.basename("output_model"))
|
251 |
if os.path.exists("intraining.lock"): os.remove("intraining.lock")
|
252 |
trained_file = open("hastrained.success", "w")
|
253 |
trained_file.close()
|
|
|
264 |
hf_token = inputs[-5]
|
265 |
model_name = inputs[-7]
|
266 |
where_to_upload = inputs[-8]
|
267 |
+
push(model_name, where_to_upload, hf_token, which_model, True)
|
268 |
hardware_url = f"https://huggingface.co/spaces/{os.environ['SPACE_ID']}/hardware"
|
269 |
headers = { "authorization" : f"Bearer {hf_token}"}
|
270 |
body = {'flavor': 'cpu-basic'}
|
271 |
requests.post(hardware_url, json = body, headers=headers)
|
272 |
|
273 |
+
pipe_is_set = False
|
274 |
+
def generate(prompt, steps):
|
275 |
torch.cuda.empty_cache()
|
276 |
from diffusers import StableDiffusionPipeline
|
277 |
+
global pipe_is_set
|
278 |
+
if(not pipe_is_set):
|
279 |
+
global pipe
|
280 |
+
pipe = StableDiffusionPipeline.from_pretrained("./output_model", torch_dtype=torch.float16)
|
281 |
+
pipe = pipe.to("cuda")
|
282 |
+
pipe_is_set = True
|
283 |
+
|
284 |
+
image = pipe(prompt, num_inference_steps=steps).images[0]
|
285 |
return(image)
|
286 |
|
287 |
+
def push(model_name, where_to_upload, hf_token, which_model, comes_from_automated=False):
|
288 |
if(not os.path.exists("model.ckpt")):
|
289 |
convert("output_model", "model.ckpt")
|
290 |
from huggingface_hub import HfApi, HfFolder, CommitOperationAdd
|
|
|
318 |
tags:
|
319 |
- text-to-image
|
320 |
---
|
321 |
+
### {model_name} Dreambooth model trained by {api.whoami(token=hf_token)["name"]} with [Hugging Face Dreambooth Training Space](https://huggingface.co/spaces/multimodalart/dreambooth-training) with the {which_model} base model
|
322 |
|
323 |
You run your new concept via `diffusers` [Colab Notebook for Inference](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_inference.ipynb). Don't forget to use the concept prompts!
|
324 |
|
|
|
439 |
top_description = gr.HTML(f'''
|
440 |
<div class="gr-prose" style="max-width: 80%">
|
441 |
<h2>You have successfully cloned the Dreambooth Training Space locally 🎉</h2>
|
442 |
+
<p>Do a <code>pip install requirements-local.txt</code></p>
|
443 |
</div>
|
444 |
''')
|
445 |
+
gr.Markdown("# Dreambooth Training UI 💭")
|
446 |
+
gr.Markdown("Customize Stable Diffusion v1 or v2 (new!) by training it on a few examples of concepts, up to 3 concepts on the same model. This Space is based on TheLastBen's [fast-DreamBooth Colab](https://colab.research.google.com/github/TheLastBen/fast-stable-diffusion/blob/main/fast-DreamBooth.ipynb) with [🧨 diffusers](https://github.com/huggingface/diffusers)")
|
447 |
|
448 |
with gr.Row() as what_are_you_training:
|
449 |
type_of_thing = gr.Dropdown(label="What would you like to train?", choices=["object", "person", "style"], value="object", interactive=True)
|
450 |
+
base_model_to_use = gr.Dropdown(label="Which base model would you like to use?", choices=["v1-5", "v2-512"], value="v1-5", interactive=True)
|
451 |
+
|
452 |
#Very hacky approach to emulate dynamically created Gradio components
|
453 |
with gr.Row() as upload_your_concept:
|
454 |
with gr.Column():
|
455 |
+
thing_description = gr.Markdown("You are going to train an `object`, please upload 5-10 images of the object you are planning on training on from different angles/perspectives. You must have the right to do so and you are liable for the images you use, example")
|
456 |
+
thing_experimental = gr.Checkbox(label="Improve faces (experimental) - takes 1.5x times training, can improve if you are training people's faces", visible=False, value=False)
|
457 |
thing_image_example = gr.HTML('''<img src="file/cat-toy.png" />''')
|
458 |
things_naming = gr.Markdown("You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `cttoy` here). Images will be automatically cropped to 512x512.")
|
459 |
+
|
460 |
with gr.Column():
|
461 |
file_collection = []
|
462 |
concept_collection = []
|
|
|
502 |
|
503 |
with gr.Accordion("Custom Settings", open=False):
|
504 |
swap_auto_calculated = gr.Checkbox(label="Use custom settings")
|
505 |
+
gr.Markdown("If not checked, the number of steps and % of frozen encoder will be tuned automatically according to the amount of images you upload and whether you are training an `object`, `person` or `style` as follows: The number of steps is calculated by number of images uploaded multiplied by 200. The text-encoder is frozen after 10% of the steps for a style, 30% of the steps for an object and 65% trained for persons.")
|
506 |
steps = gr.Number(label="How many steps", value=800)
|
507 |
perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
|
508 |
+
|
509 |
with gr.Box(visible=False) as training_summary:
|
510 |
training_summary_text = gr.HTML("", visible=False, label="Training Summary")
|
511 |
+
is_advanced_visible = True if is_spaces else False
|
512 |
+
training_summary_checkbox = gr.Checkbox(label="Automatically remove paid GPU attribution and upload model to the Hugging Face Hub after training", value=False, visible=is_advanced_visible)
|
513 |
+
training_summary_model_name = gr.Textbox(label="Name of your model", visible=False)
|
514 |
+
training_summary_where_to_upload = gr.Dropdown(["My personal profile", "Public Library"], label="Upload to", visible=False)
|
515 |
+
training_summary_token_message = gr.Markdown("[A Hugging Face write access token](https://huggingface.co/settings/tokens), go to \"New token\" -> Role : Write. A regular read token won't work here.", visible=False)
|
516 |
+
training_summary_token = gr.Textbox(label="Hugging Face Write Token", type="password", visible=False)
|
517 |
+
|
|
|
|
|
|
|
|
|
|
|
518 |
train_btn = gr.Button("Start Training")
|
519 |
|
520 |
training_ongoing = gr.Markdown("## Training is ongoing ⌛... You can close this tab if you like or just wait. If you did not check the `Remove GPU After training`, you can come back here to try your model and upload it after training. Don't forget to remove the GPU attribution after you are done. ", visible=False)
|
|
|
528 |
gr.Markdown("## Try your model")
|
529 |
prompt = gr.Textbox(label="Type your prompt")
|
530 |
result_image = gr.Image()
|
531 |
+
inference_steps = gr.Slider(minimum=1, maximum=150, value=50, step=1)
|
532 |
generate_button = gr.Button("Generate Image")
|
533 |
|
534 |
with gr.Box(visible=False) as push_to_hub:
|
|
|
545 |
convert_button = gr.Button("Convert to CKPT", visible=False)
|
546 |
|
547 |
#Swap the examples and the % of text encoder trained depending if it is an object, person or style
|
548 |
+
type_of_thing.change(fn=swap_text, inputs=[type_of_thing], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder, thing_experimental], queue=False, show_progress=False)
|
549 |
|
550 |
+
#Swap the base model
|
551 |
+
base_model_to_use.change(fn=swap_base_model, inputs=base_model_to_use, outputs=[])
|
552 |
+
|
553 |
#Update the summary box below the UI according to how many images are uploaded and whether users are using custom settings or not
|
554 |
for file in file_collection:
|
555 |
+
file.change(fn=update_steps,inputs=file_collection, outputs=steps)
|
556 |
file.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
|
557 |
+
|
558 |
steps.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
|
559 |
perc_txt_encoder.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
|
560 |
|
|
|
565 |
train_btn.click(lambda:gr.update(visible=True), inputs=None, outputs=training_ongoing)
|
566 |
|
567 |
#The main train function
|
568 |
+
train_btn.click(fn=train, inputs=is_visible+concept_collection+file_collection+[base_model_to_use]+[thing_experimental]+[training_summary_where_to_upload]+[training_summary_model_name]+[training_summary_checkbox]+[training_summary_token]+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[result, try_your_model, push_to_hub, convert_button, training_ongoing, completed_training], queue=False)
|
569 |
|
570 |
#Button to generate an image from your trained model after training
|
571 |
+
generate_button.click(fn=generate, inputs=[prompt, inference_steps], outputs=result_image, queue=False)
|
572 |
#Button to push the model to the Hugging Face Hub
|
573 |
+
push_button.click(fn=push, inputs=[model_name, where_to_upload, hf_token, base_model_to_use], outputs=[success_message_upload, result], queue=False)
|
574 |
#Button to convert the model to ckpt format
|
575 |
convert_button.click(fn=convert_to_ckpt, inputs=[], outputs=result, queue=False)
|
576 |
|
requirements-local.txt
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
--extra-index-url https://download.pytorch.org/whl/cu113
|
2 |
+
torch==1.12.1+cu113
|
3 |
+
torchvision==0.13.1+cu113
|
4 |
+
diffusers==0.9.0
|
5 |
+
accelerate==0.12.0
|
6 |
+
OmegaConf
|
7 |
+
wget
|
8 |
+
pytorch_lightning
|
9 |
+
huggingface_hub
|
10 |
+
ftfy
|
11 |
+
transformers
|
12 |
+
pyfiglet
|
13 |
+
triton==2.0.0.dev20220701
|
14 |
+
bitsandbytes
|
15 |
+
python-slugify
|
16 |
+
requests
|
17 |
+
tensorboard
|
18 |
+
pip install git+https://github.com/facebookresearch/xformers@7e4c02c#egg=xformers
|
requirements.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
--extra-index-url https://download.pytorch.org/whl/cu113
|
2 |
torch==1.12.1+cu113
|
3 |
torchvision==0.13.1+cu113
|
4 |
-
|
5 |
accelerate==0.12.0
|
6 |
OmegaConf
|
7 |
wget
|
|
|
1 |
--extra-index-url https://download.pytorch.org/whl/cu113
|
2 |
torch==1.12.1+cu113
|
3 |
torchvision==0.13.1+cu113
|
4 |
+
diffusers==0.9.0
|
5 |
accelerate==0.12.0
|
6 |
OmegaConf
|
7 |
wget
|
train_dreambooth.py
CHANGED
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
6 |
from typing import Optional
|
7 |
import subprocess
|
8 |
import sys
|
9 |
-
import gc
|
10 |
|
11 |
import torch
|
12 |
import torch.nn.functional as F
|
@@ -54,7 +54,7 @@ def parse_args():
|
|
54 |
"--class_data_dir",
|
55 |
type=str,
|
56 |
default=None,
|
57 |
-
required=False,
|
58 |
help="A folder containing the training data of class images.",
|
59 |
)
|
60 |
parser.add_argument(
|
@@ -334,6 +334,7 @@ class DreamBoothDataset(Dataset):
|
|
334 |
pt=pt.replace("_"," ")
|
335 |
pt=pt.replace("(","")
|
336 |
pt=pt.replace(")","")
|
|
|
337 |
instance_prompt = pt
|
338 |
sys.stdout.write(" [0;32m" +instance_prompt+" [0m")
|
339 |
sys.stdout.flush()
|
@@ -746,7 +747,7 @@ def run_training(args_imported):
|
|
746 |
pipeline.text_encoder.save_pretrained(frz_dir)
|
747 |
|
748 |
if args.save_n_steps >= 200:
|
749 |
-
if global_step < args.max_train_steps
|
750 |
ckpt_name = "_step_" + str(global_step+1)
|
751 |
save_dir = Path(args.output_dir+ckpt_name)
|
752 |
save_dir=str(save_dir)
|
@@ -770,6 +771,7 @@ def run_training(args_imported):
|
|
770 |
subprocess.call('cp -f '+frz_dir +'/*.* '+ save_dir+'/text_encoder', shell=True)
|
771 |
chkpth=args.Session_dir+"/"+inst+".ckpt"
|
772 |
subprocess.call('python /content/diffusers/scripts/convert_diffusers_to_original_stable_diffusion.py --model_path ' + save_dir + ' --checkpoint_path ' + chkpth + ' --half', shell=True)
|
|
|
773 |
i=i+args.save_n_steps
|
774 |
|
775 |
accelerator.wait_for_everyone()
|
@@ -819,3 +821,4 @@ def run_training(args_imported):
|
|
819 |
if __name__ == "__main__":
|
820 |
pass
|
821 |
#main()
|
|
|
|
6 |
from typing import Optional
|
7 |
import subprocess
|
8 |
import sys
|
9 |
+
import gc
|
10 |
|
11 |
import torch
|
12 |
import torch.nn.functional as F
|
|
|
54 |
"--class_data_dir",
|
55 |
type=str,
|
56 |
default=None,
|
57 |
+
#required=False,
|
58 |
help="A folder containing the training data of class images.",
|
59 |
)
|
60 |
parser.add_argument(
|
|
|
334 |
pt=pt.replace("_"," ")
|
335 |
pt=pt.replace("(","")
|
336 |
pt=pt.replace(")","")
|
337 |
+
pt=pt.replace("-","")
|
338 |
instance_prompt = pt
|
339 |
sys.stdout.write(" [0;32m" +instance_prompt+" [0m")
|
340 |
sys.stdout.flush()
|
|
|
747 |
pipeline.text_encoder.save_pretrained(frz_dir)
|
748 |
|
749 |
if args.save_n_steps >= 200:
|
750 |
+
if global_step < args.max_train_steps and global_step+1==i:
|
751 |
ckpt_name = "_step_" + str(global_step+1)
|
752 |
save_dir = Path(args.output_dir+ckpt_name)
|
753 |
save_dir=str(save_dir)
|
|
|
771 |
subprocess.call('cp -f '+frz_dir +'/*.* '+ save_dir+'/text_encoder', shell=True)
|
772 |
chkpth=args.Session_dir+"/"+inst+".ckpt"
|
773 |
subprocess.call('python /content/diffusers/scripts/convert_diffusers_to_original_stable_diffusion.py --model_path ' + save_dir + ' --checkpoint_path ' + chkpth + ' --half', shell=True)
|
774 |
+
subprocess.call('rm -r '+ save_dir, shell=True)
|
775 |
i=i+args.save_n_steps
|
776 |
|
777 |
accelerator.wait_for_everyone()
|
|
|
821 |
if __name__ == "__main__":
|
822 |
pass
|
823 |
#main()
|
824 |
+
|