CODELANDER hysts HF staff commited on
Commit
fa20cba
0 Parent(s):

Duplicate from hysts/ControlNet

Browse files

Co-authored-by: hysts <hysts@users.noreply.huggingface.co>

Files changed (25) hide show
  1. .gitattributes +34 -0
  2. .gitignore +162 -0
  3. .gitmodules +3 -0
  4. .pre-commit-config.yaml +47 -0
  5. .style.yapf +5 -0
  6. ControlNet +1 -0
  7. LICENSE +21 -0
  8. LICENSE.ControlNet +201 -0
  9. README.md +15 -0
  10. app.py +158 -0
  11. app_canny.py +91 -0
  12. app_depth.py +86 -0
  13. app_fake_scribble.py +83 -0
  14. app_hed.py +83 -0
  15. app_hough.py +97 -0
  16. app_normal.py +93 -0
  17. app_pose.py +89 -0
  18. app_scribble.py +77 -0
  19. app_scribble_interactive.py +103 -0
  20. app_seg.py +87 -0
  21. model.py +649 -0
  22. notebooks/notebook.ipynb +80 -0
  23. patch +128 -0
  24. requirements.txt +22 -0
  25. style.css +8 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ models/
2
+
3
+ # Byte-compiled / optimized / DLL files
4
+ __pycache__/
5
+ *.py[cod]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+ cover/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ .pybuilder/
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ # For a library or package, you might want to ignore these files since the code is
89
+ # intended to run in multiple environments; otherwise, check them in:
90
+ # .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # poetry
100
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
102
+ # commonly ignored for libraries.
103
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104
+ #poetry.lock
105
+
106
+ # pdm
107
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
108
+ #pdm.lock
109
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
110
+ # in version control.
111
+ # https://pdm.fming.dev/#use-with-ide
112
+ .pdm.toml
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+
133
+ # Spyder project settings
134
+ .spyderproject
135
+ .spyproject
136
+
137
+ # Rope project settings
138
+ .ropeproject
139
+
140
+ # mkdocs documentation
141
+ /site
142
+
143
+ # mypy
144
+ .mypy_cache/
145
+ .dmypy.json
146
+ dmypy.json
147
+
148
+ # Pyre type checker
149
+ .pyre/
150
+
151
+ # pytype static type analyzer
152
+ .pytype/
153
+
154
+ # Cython debug symbols
155
+ cython_debug/
156
+
157
+ # PyCharm
158
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
161
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
+ #.idea/
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "ControlNet"]
2
+ path = ControlNet
3
+ url = https://github.com/lllyasviel/ControlNet
.pre-commit-config.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ exclude: patch
2
+ repos:
3
+ - repo: https://github.com/pre-commit/pre-commit-hooks
4
+ rev: v4.2.0
5
+ hooks:
6
+ - id: check-executables-have-shebangs
7
+ - id: check-json
8
+ - id: check-merge-conflict
9
+ - id: check-shebang-scripts-are-executable
10
+ - id: check-toml
11
+ - id: check-yaml
12
+ - id: double-quote-string-fixer
13
+ - id: end-of-file-fixer
14
+ - id: mixed-line-ending
15
+ args: ['--fix=lf']
16
+ - id: requirements-txt-fixer
17
+ - id: trailing-whitespace
18
+ - repo: https://github.com/myint/docformatter
19
+ rev: v1.4
20
+ hooks:
21
+ - id: docformatter
22
+ args: ['--in-place']
23
+ - repo: https://github.com/pycqa/isort
24
+ rev: 5.12.0
25
+ hooks:
26
+ - id: isort
27
+ - repo: https://github.com/pre-commit/mirrors-mypy
28
+ rev: v0.991
29
+ hooks:
30
+ - id: mypy
31
+ args: ['--ignore-missing-imports']
32
+ additional_dependencies: ['types-python-slugify']
33
+ - repo: https://github.com/google/yapf
34
+ rev: v0.32.0
35
+ hooks:
36
+ - id: yapf
37
+ args: ['--parallel', '--in-place']
38
+ - repo: https://github.com/kynan/nbstripout
39
+ rev: 0.6.0
40
+ hooks:
41
+ - id: nbstripout
42
+ args: ['--extra-keys', 'metadata.interpreter metadata.kernelspec cell.metadata.pycharm']
43
+ - repo: https://github.com/nbQA-dev/nbQA
44
+ rev: 1.6.4
45
+ hooks:
46
+ - id: nbqa-isort
47
+ - id: nbqa-yapf
.style.yapf ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [style]
2
+ based_on_style = pep8
3
+ blank_line_before_nested_class_or_def = false
4
+ spaces_before_comment = 2
5
+ split_before_logical_operator = true
ControlNet ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit f4748e3630d8141d7765e2bd9b1e348f47847707
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 hysts
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
LICENSE.ControlNet ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ControlNet
3
+ emoji: 🌖
4
+ colorFrom: pink
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.23.0
8
+ python_version: 3.10.9
9
+ app_file: app.py
10
+ pinned: false
11
+ license: mit
12
+ duplicated_from: hysts/ControlNet
13
+ ---
14
+
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import pathlib
7
+ import shlex
8
+ import subprocess
9
+
10
+ import gradio as gr
11
+ import torch
12
+
13
+ if os.getenv('SYSTEM') == 'spaces':
14
+ with open('patch') as f:
15
+ subprocess.run(shlex.split('patch -p1'), stdin=f, cwd='ControlNet')
16
+
17
+ base_url = 'https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/'
18
+ names = [
19
+ 'body_pose_model.pth',
20
+ 'dpt_hybrid-midas-501f0c75.pt',
21
+ 'hand_pose_model.pth',
22
+ 'mlsd_large_512_fp32.pth',
23
+ 'mlsd_tiny_512_fp32.pth',
24
+ 'network-bsds500.pth',
25
+ 'upernet_global_small.pth',
26
+ ]
27
+ for name in names:
28
+ command = f'wget https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/{name} -O {name}'
29
+ out_path = pathlib.Path(f'ControlNet/annotator/ckpts/{name}')
30
+ if out_path.exists():
31
+ continue
32
+ subprocess.run(shlex.split(command), cwd='ControlNet/annotator/ckpts/')
33
+
34
+ from app_canny import create_demo as create_demo_canny
35
+ from app_depth import create_demo as create_demo_depth
36
+ from app_fake_scribble import create_demo as create_demo_fake_scribble
37
+ from app_hed import create_demo as create_demo_hed
38
+ from app_hough import create_demo as create_demo_hough
39
+ from app_normal import create_demo as create_demo_normal
40
+ from app_pose import create_demo as create_demo_pose
41
+ from app_scribble import create_demo as create_demo_scribble
42
+ from app_scribble_interactive import \
43
+ create_demo as create_demo_scribble_interactive
44
+ from app_seg import create_demo as create_demo_seg
45
+ from model import Model, download_all_controlnet_weights
46
+
47
+ DESCRIPTION = '''# [ControlNet v1.0](https://github.com/lllyasviel/ControlNet)
48
+
49
+ <p class="note">New ControlNet v1.1 is available <a href="https://huggingface.co/spaces/hysts/ControlNet-v1-1">here</a>.</p>
50
+ '''
51
+
52
+ SPACE_ID = os.getenv('SPACE_ID')
53
+ ALLOW_CHANGING_BASE_MODEL = SPACE_ID != 'hysts/ControlNet'
54
+
55
+ if SPACE_ID is not None:
56
+ DESCRIPTION += f'\n<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>'
57
+
58
+ if torch.cuda.is_available():
59
+ DESCRIPTION += '\n<p>Running on GPU 🔥</p>'
60
+ if os.getenv('SYSTEM') == 'spaces':
61
+ download_all_controlnet_weights()
62
+ else:
63
+ DESCRIPTION += '\n<p>Running on CPU 🥶 This demo does not work on CPU.'
64
+
65
+ MAX_IMAGES = int(os.getenv('MAX_IMAGES', '3'))
66
+ DEFAULT_NUM_IMAGES = min(MAX_IMAGES, int(os.getenv('DEFAULT_NUM_IMAGES', '1')))
67
+
68
+ DEFAULT_MODEL_ID = os.getenv('DEFAULT_MODEL_ID',
69
+ 'runwayml/stable-diffusion-v1-5')
70
+ model = Model(base_model_id=DEFAULT_MODEL_ID, task_name='canny')
71
+
72
+ with gr.Blocks(css='style.css') as demo:
73
+ gr.Markdown(DESCRIPTION)
74
+ with gr.Tabs():
75
+ with gr.TabItem('Canny'):
76
+ create_demo_canny(model.process_canny,
77
+ max_images=MAX_IMAGES,
78
+ default_num_images=DEFAULT_NUM_IMAGES)
79
+ with gr.TabItem('Hough'):
80
+ create_demo_hough(model.process_hough,
81
+ max_images=MAX_IMAGES,
82
+ default_num_images=DEFAULT_NUM_IMAGES)
83
+ with gr.TabItem('HED'):
84
+ create_demo_hed(model.process_hed,
85
+ max_images=MAX_IMAGES,
86
+ default_num_images=DEFAULT_NUM_IMAGES)
87
+ with gr.TabItem('Scribble'):
88
+ create_demo_scribble(model.process_scribble,
89
+ max_images=MAX_IMAGES,
90
+ default_num_images=DEFAULT_NUM_IMAGES)
91
+ with gr.TabItem('Scribble Interactive'):
92
+ create_demo_scribble_interactive(
93
+ model.process_scribble_interactive,
94
+ max_images=MAX_IMAGES,
95
+ default_num_images=DEFAULT_NUM_IMAGES)
96
+ with gr.TabItem('Fake Scribble'):
97
+ create_demo_fake_scribble(model.process_fake_scribble,
98
+ max_images=MAX_IMAGES,
99
+ default_num_images=DEFAULT_NUM_IMAGES)
100
+ with gr.TabItem('Pose'):
101
+ create_demo_pose(model.process_pose,
102
+ max_images=MAX_IMAGES,
103
+ default_num_images=DEFAULT_NUM_IMAGES)
104
+ with gr.TabItem('Segmentation'):
105
+ create_demo_seg(model.process_seg,
106
+ max_images=MAX_IMAGES,
107
+ default_num_images=DEFAULT_NUM_IMAGES)
108
+ with gr.TabItem('Depth'):
109
+ create_demo_depth(model.process_depth,
110
+ max_images=MAX_IMAGES,
111
+ default_num_images=DEFAULT_NUM_IMAGES)
112
+ with gr.TabItem('Normal map'):
113
+ create_demo_normal(model.process_normal,
114
+ max_images=MAX_IMAGES,
115
+ default_num_images=DEFAULT_NUM_IMAGES)
116
+
117
+ with gr.Accordion(label='Base model', open=False):
118
+ with gr.Row():
119
+ with gr.Column():
120
+ current_base_model = gr.Text(label='Current base model')
121
+ with gr.Column(scale=0.3):
122
+ check_base_model_button = gr.Button('Check current base model')
123
+ with gr.Row():
124
+ with gr.Column():
125
+ new_base_model_id = gr.Text(
126
+ label='New base model',
127
+ max_lines=1,
128
+ placeholder='runwayml/stable-diffusion-v1-5',
129
+ info=
130
+ 'The base model must be compatible with Stable Diffusion v1.5.',
131
+ interactive=ALLOW_CHANGING_BASE_MODEL)
132
+ with gr.Column(scale=0.3):
133
+ change_base_model_button = gr.Button(
134
+ 'Change base model', interactive=ALLOW_CHANGING_BASE_MODEL)
135
+ if not ALLOW_CHANGING_BASE_MODEL:
136
+ gr.Markdown(
137
+ '''The base model is not allowed to be changed in this Space so as not to slow down the demo, but it can be changed if you duplicate the Space. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a>'''
138
+ )
139
+
140
+ gr.Markdown('''### Related Spaces
141
+
142
+ - [Space using Anything-v4.0 as base model](https://huggingface.co/spaces/hysts/ControlNet-with-Anything-v4)
143
+ - https://huggingface.co/spaces/jonigata/PoseMaker2
144
+ - https://huggingface.co/spaces/diffusers/controlnet-openpose
145
+ - https://huggingface.co/spaces/diffusers/controlnet-canny
146
+ ''')
147
+
148
+ check_base_model_button.click(fn=lambda: model.base_model_id,
149
+ outputs=current_base_model,
150
+ queue=False)
151
+ new_base_model_id.submit(fn=model.set_base_model,
152
+ inputs=new_base_model_id,
153
+ outputs=current_base_model)
154
+ change_base_model_button.click(fn=model.set_base_model,
155
+ inputs=new_base_model_id,
156
+ outputs=current_base_model)
157
+
158
+ demo.queue(api_open=False, max_size=10).launch()
app_canny.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_canny2image.py
2
+ # The original license file is LICENSE.ControlNet in this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process, max_images=12, default_num_images=3):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Canny Edge Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ num_samples = gr.Slider(label='Images',
17
+ minimum=1,
18
+ maximum=max_images,
19
+ value=default_num_images,
20
+ step=1)
21
+ image_resolution = gr.Slider(label='Image Resolution',
22
+ minimum=256,
23
+ maximum=512,
24
+ value=512,
25
+ step=256)
26
+ canny_low_threshold = gr.Slider(
27
+ label='Canny low threshold',
28
+ minimum=1,
29
+ maximum=255,
30
+ value=100,
31
+ step=1)
32
+ canny_high_threshold = gr.Slider(
33
+ label='Canny high threshold',
34
+ minimum=1,
35
+ maximum=255,
36
+ value=200,
37
+ step=1)
38
+ num_steps = gr.Slider(label='Steps',
39
+ minimum=1,
40
+ maximum=100,
41
+ value=20,
42
+ step=1)
43
+ guidance_scale = gr.Slider(label='Guidance Scale',
44
+ minimum=0.1,
45
+ maximum=30.0,
46
+ value=9.0,
47
+ step=0.1)
48
+ seed = gr.Slider(label='Seed',
49
+ minimum=-1,
50
+ maximum=2147483647,
51
+ step=1,
52
+ randomize=True)
53
+ a_prompt = gr.Textbox(
54
+ label='Added Prompt',
55
+ value='best quality, extremely detailed')
56
+ n_prompt = gr.Textbox(
57
+ label='Negative Prompt',
58
+ value=
59
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
60
+ )
61
+ with gr.Column():
62
+ result = gr.Gallery(label='Output',
63
+ show_label=False,
64
+ elem_id='gallery').style(grid=2,
65
+ height='auto')
66
+ inputs = [
67
+ input_image,
68
+ prompt,
69
+ a_prompt,
70
+ n_prompt,
71
+ num_samples,
72
+ image_resolution,
73
+ num_steps,
74
+ guidance_scale,
75
+ seed,
76
+ canny_low_threshold,
77
+ canny_high_threshold,
78
+ ]
79
+ prompt.submit(fn=process, inputs=inputs, outputs=result)
80
+ run_button.click(fn=process,
81
+ inputs=inputs,
82
+ outputs=result,
83
+ api_name='canny')
84
+ return demo
85
+
86
+
87
+ if __name__ == '__main__':
88
+ from model import Model
89
+ model = Model()
90
+ demo = create_demo(model.process_canny)
91
+ demo.queue().launch()
app_depth.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_depth2image.py
2
+ # The original license file is LICENSE.ControlNet in this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process, max_images=12, default_num_images=3):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Depth Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ is_depth_image = gr.Checkbox(label='Is depth image',
17
+ value=False)
18
+ num_samples = gr.Slider(label='Images',
19
+ minimum=1,
20
+ maximum=max_images,
21
+ value=default_num_images,
22
+ step=1)
23
+ image_resolution = gr.Slider(label='Image Resolution',
24
+ minimum=256,
25
+ maximum=512,
26
+ value=512,
27
+ step=256)
28
+ detect_resolution = gr.Slider(label='Depth Resolution',
29
+ minimum=128,
30
+ maximum=512,
31
+ value=384,
32
+ step=1)
33
+ num_steps = gr.Slider(label='Steps',
34
+ minimum=1,
35
+ maximum=100,
36
+ value=20,
37
+ step=1)
38
+ guidance_scale = gr.Slider(label='Guidance Scale',
39
+ minimum=0.1,
40
+ maximum=30.0,
41
+ value=9.0,
42
+ step=0.1)
43
+ seed = gr.Slider(label='Seed',
44
+ minimum=-1,
45
+ maximum=2147483647,
46
+ step=1,
47
+ randomize=True)
48
+ a_prompt = gr.Textbox(
49
+ label='Added Prompt',
50
+ value='best quality, extremely detailed')
51
+ n_prompt = gr.Textbox(
52
+ label='Negative Prompt',
53
+ value=
54
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
55
+ )
56
+ with gr.Column():
57
+ result = gr.Gallery(label='Output',
58
+ show_label=False,
59
+ elem_id='gallery').style(grid=2,
60
+ height='auto')
61
+ inputs = [
62
+ input_image,
63
+ prompt,
64
+ a_prompt,
65
+ n_prompt,
66
+ num_samples,
67
+ image_resolution,
68
+ detect_resolution,
69
+ num_steps,
70
+ guidance_scale,
71
+ seed,
72
+ is_depth_image,
73
+ ]
74
+ prompt.submit(fn=process, inputs=inputs, outputs=result)
75
+ run_button.click(fn=process,
76
+ inputs=inputs,
77
+ outputs=result,
78
+ api_name='depth')
79
+ return demo
80
+
81
+
82
+ if __name__ == '__main__':
83
+ from model import Model
84
+ model = Model()
85
+ demo = create_demo(model.process_depth)
86
+ demo.queue().launch()
app_fake_scribble.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_fake_scribble2image.py
2
+ # The original license file is LICENSE.ControlNet in this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process, max_images=12, default_num_images=3):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Fake Scribble Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ num_samples = gr.Slider(label='Images',
17
+ minimum=1,
18
+ maximum=max_images,
19
+ value=default_num_images,
20
+ step=1)
21
+ image_resolution = gr.Slider(label='Image Resolution',
22
+ minimum=256,
23
+ maximum=512,
24
+ value=512,
25
+ step=256)
26
+ detect_resolution = gr.Slider(label='HED Resolution',
27
+ minimum=128,
28
+ maximum=512,
29
+ value=512,
30
+ step=1)
31
+ num_steps = gr.Slider(label='Steps',
32
+ minimum=1,
33
+ maximum=100,
34
+ value=20,
35
+ step=1)
36
+ guidance_scale = gr.Slider(label='Guidance Scale',
37
+ minimum=0.1,
38
+ maximum=30.0,
39
+ value=9.0,
40
+ step=0.1)
41
+ seed = gr.Slider(label='Seed',
42
+ minimum=-1,
43
+ maximum=2147483647,
44
+ step=1,
45
+ randomize=True)
46
+ a_prompt = gr.Textbox(
47
+ label='Added Prompt',
48
+ value='best quality, extremely detailed')
49
+ n_prompt = gr.Textbox(
50
+ label='Negative Prompt',
51
+ value=
52
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
53
+ )
54
+ with gr.Column():
55
+ result = gr.Gallery(label='Output',
56
+ show_label=False,
57
+ elem_id='gallery').style(grid=2,
58
+ height='auto')
59
+ inputs = [
60
+ input_image,
61
+ prompt,
62
+ a_prompt,
63
+ n_prompt,
64
+ num_samples,
65
+ image_resolution,
66
+ detect_resolution,
67
+ num_steps,
68
+ guidance_scale,
69
+ seed,
70
+ ]
71
+ prompt.submit(fn=process, inputs=inputs, outputs=result)
72
+ run_button.click(fn=process,
73
+ inputs=inputs,
74
+ outputs=result,
75
+ api_name='fake_scribble')
76
+ return demo
77
+
78
+
79
+ if __name__ == '__main__':
80
+ from model import Model
81
+ model = Model()
82
+ demo = create_demo(model.process_fake_scribble)
83
+ demo.queue().launch()
app_hed.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_hed2image.py
2
+ # The original license file is LICENSE.ControlNet in this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process, max_images=12, default_num_images=3):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with HED Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ num_samples = gr.Slider(label='Images',
17
+ minimum=1,
18
+ maximum=max_images,
19
+ value=default_num_images,
20
+ step=1)
21
+ image_resolution = gr.Slider(label='Image Resolution',
22
+ minimum=256,
23
+ maximum=512,
24
+ value=512,
25
+ step=256)
26
+ detect_resolution = gr.Slider(label='HED Resolution',
27
+ minimum=128,
28
+ maximum=512,
29
+ value=512,
30
+ step=1)
31
+ num_steps = gr.Slider(label='Steps',
32
+ minimum=1,
33
+ maximum=100,
34
+ value=20,
35
+ step=1)
36
+ guidance_scale = gr.Slider(label='Guidance Scale',
37
+ minimum=0.1,
38
+ maximum=30.0,
39
+ value=9.0,
40
+ step=0.1)
41
+ seed = gr.Slider(label='Seed',
42
+ minimum=-1,
43
+ maximum=2147483647,
44
+ step=1,
45
+ randomize=True)
46
+ a_prompt = gr.Textbox(
47
+ label='Added Prompt',
48
+ value='best quality, extremely detailed')
49
+ n_prompt = gr.Textbox(
50
+ label='Negative Prompt',
51
+ value=
52
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
53
+ )
54
+ with gr.Column():
55
+ result = gr.Gallery(label='Output',
56
+ show_label=False,
57
+ elem_id='gallery').style(grid=2,
58
+ height='auto')
59
+ inputs = [
60
+ input_image,
61
+ prompt,
62
+ a_prompt,
63
+ n_prompt,
64
+ num_samples,
65
+ image_resolution,
66
+ detect_resolution,
67
+ num_steps,
68
+ guidance_scale,
69
+ seed,
70
+ ]
71
+ prompt.submit(fn=process, inputs=inputs, outputs=result)
72
+ run_button.click(fn=process,
73
+ inputs=inputs,
74
+ outputs=result,
75
+ api_name='hed')
76
+ return demo
77
+
78
+
79
+ if __name__ == '__main__':
80
+ from model import Model
81
+ model = Model()
82
+ demo = create_demo(model.process_hed)
83
+ demo.queue().launch()
app_hough.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_hough2image.py
2
+ # The original license file is LICENSE.ControlNet in this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process, max_images=12, default_num_images=3):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Hough Line Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ num_samples = gr.Slider(label='Images',
17
+ minimum=1,
18
+ maximum=max_images,
19
+ value=default_num_images,
20
+ step=1)
21
+ image_resolution = gr.Slider(label='Image Resolution',
22
+ minimum=256,
23
+ maximum=512,
24
+ value=512,
25
+ step=256)
26
+ detect_resolution = gr.Slider(label='Hough Resolution',
27
+ minimum=128,
28
+ maximum=512,
29
+ value=512,
30
+ step=1)
31
+ mlsd_value_threshold = gr.Slider(
32
+ label='Hough value threshold (MLSD)',
33
+ minimum=0.01,
34
+ maximum=2.0,
35
+ value=0.1,
36
+ step=0.01)
37
+ mlsd_distance_threshold = gr.Slider(
38
+ label='Hough distance threshold (MLSD)',
39
+ minimum=0.01,
40
+ maximum=20.0,
41
+ value=0.1,
42
+ step=0.01)
43
+ num_steps = gr.Slider(label='Steps',
44
+ minimum=1,
45
+ maximum=100,
46
+ value=20,
47
+ step=1)
48
+ guidance_scale = gr.Slider(label='Guidance Scale',
49
+ minimum=0.1,
50
+ maximum=30.0,
51
+ value=9.0,
52
+ step=0.1)
53
+ seed = gr.Slider(label='Seed',
54
+ minimum=-1,
55
+ maximum=2147483647,
56
+ step=1,
57
+ randomize=True)
58
+ a_prompt = gr.Textbox(
59
+ label='Added Prompt',
60
+ value='best quality, extremely detailed')
61
+ n_prompt = gr.Textbox(
62
+ label='Negative Prompt',
63
+ value=
64
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
65
+ )
66
+ with gr.Column():
67
+ result = gr.Gallery(label='Output',
68
+ show_label=False,
69
+ elem_id='gallery').style(grid=2,
70
+ height='auto')
71
+ inputs = [
72
+ input_image,
73
+ prompt,
74
+ a_prompt,
75
+ n_prompt,
76
+ num_samples,
77
+ image_resolution,
78
+ detect_resolution,
79
+ num_steps,
80
+ guidance_scale,
81
+ seed,
82
+ mlsd_value_threshold,
83
+ mlsd_distance_threshold,
84
+ ]
85
+ prompt.submit(fn=process, inputs=inputs, outputs=result)
86
+ run_button.click(fn=process,
87
+ inputs=inputs,
88
+ outputs=result,
89
+ api_name='hough')
90
+ return demo
91
+
92
+
93
+ if __name__ == '__main__':
94
+ from model import Model
95
+ model = Model()
96
+ demo = create_demo(model.process_hough)
97
+ demo.queue().launch()
app_normal.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_normal2image.py
2
+ # The original license file is LICENSE.ControlNet in this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process, max_images=12, default_num_images=3):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Normal Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ is_normal_image = gr.Checkbox(label='Is normal image',
17
+ value=False)
18
+ num_samples = gr.Slider(label='Images',
19
+ minimum=1,
20
+ maximum=max_images,
21
+ value=default_num_images,
22
+ step=1)
23
+ image_resolution = gr.Slider(label='Image Resolution',
24
+ minimum=256,
25
+ maximum=512,
26
+ value=512,
27
+ step=256)
28
+ detect_resolution = gr.Slider(label='Normal Resolution',
29
+ minimum=128,
30
+ maximum=512,
31
+ value=384,
32
+ step=1)
33
+ bg_threshold = gr.Slider(
34
+ label='Normal background threshold',
35
+ minimum=0.0,
36
+ maximum=1.0,
37
+ value=0.4,
38
+ step=0.01)
39
+ num_steps = gr.Slider(label='Steps',
40
+ minimum=1,
41
+ maximum=100,
42
+ value=20,
43
+ step=1)
44
+ guidance_scale = gr.Slider(label='Guidance Scale',
45
+ minimum=0.1,
46
+ maximum=30.0,
47
+ value=9.0,
48
+ step=0.1)
49
+ seed = gr.Slider(label='Seed',
50
+ minimum=-1,
51
+ maximum=2147483647,
52
+ step=1,
53
+ randomize=True)
54
+ a_prompt = gr.Textbox(
55
+ label='Added Prompt',
56
+ value='best quality, extremely detailed')
57
+ n_prompt = gr.Textbox(
58
+ label='Negative Prompt',
59
+ value=
60
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
61
+ )
62
+ with gr.Column():
63
+ result = gr.Gallery(label='Output',
64
+ show_label=False,
65
+ elem_id='gallery').style(grid=2,
66
+ height='auto')
67
+ inputs = [
68
+ input_image,
69
+ prompt,
70
+ a_prompt,
71
+ n_prompt,
72
+ num_samples,
73
+ image_resolution,
74
+ detect_resolution,
75
+ num_steps,
76
+ guidance_scale,
77
+ seed,
78
+ bg_threshold,
79
+ is_normal_image,
80
+ ]
81
+ prompt.submit(fn=process, inputs=inputs, outputs=result)
82
+ run_button.click(fn=process,
83
+ inputs=inputs,
84
+ outputs=result,
85
+ api_name='normal')
86
+ return demo
87
+
88
+
89
+ if __name__ == '__main__':
90
+ from model import Model
91
+ model = Model()
92
+ demo = create_demo(model.process_normal)
93
+ demo.queue().launch()
app_pose.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_pose2image.py
2
+ # The original license file is LICENSE.ControlNet in this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process, max_images=12, default_num_images=3):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Human Pose')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ is_pose_image = gr.Checkbox(label='Is pose image',
17
+ value=False)
18
+ gr.Markdown(
19
+ 'You can use [PoseMaker2](https://huggingface.co/spaces/jonigata/PoseMaker2) to create pose images.'
20
+ )
21
+ num_samples = gr.Slider(label='Images',
22
+ minimum=1,
23
+ maximum=max_images,
24
+ value=default_num_images,
25
+ step=1)
26
+ image_resolution = gr.Slider(label='Image Resolution',
27
+ minimum=256,
28
+ maximum=512,
29
+ value=512,
30
+ step=256)
31
+ detect_resolution = gr.Slider(label='OpenPose Resolution',
32
+ minimum=128,
33
+ maximum=512,
34
+ value=512,
35
+ step=1)
36
+ num_steps = gr.Slider(label='Steps',
37
+ minimum=1,
38
+ maximum=100,
39
+ value=20,
40
+ step=1)
41
+ guidance_scale = gr.Slider(label='Guidance Scale',
42
+ minimum=0.1,
43
+ maximum=30.0,
44
+ value=9.0,
45
+ step=0.1)
46
+ seed = gr.Slider(label='Seed',
47
+ minimum=-1,
48
+ maximum=2147483647,
49
+ step=1,
50
+ randomize=True)
51
+ a_prompt = gr.Textbox(
52
+ label='Added Prompt',
53
+ value='best quality, extremely detailed')
54
+ n_prompt = gr.Textbox(
55
+ label='Negative Prompt',
56
+ value=
57
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
58
+ )
59
+ with gr.Column():
60
+ result = gr.Gallery(label='Output',
61
+ show_label=False,
62
+ elem_id='gallery').style(grid=2,
63
+ height='auto')
64
+ inputs = [
65
+ input_image,
66
+ prompt,
67
+ a_prompt,
68
+ n_prompt,
69
+ num_samples,
70
+ image_resolution,
71
+ detect_resolution,
72
+ num_steps,
73
+ guidance_scale,
74
+ seed,
75
+ is_pose_image,
76
+ ]
77
+ prompt.submit(fn=process, inputs=inputs, outputs=result)
78
+ run_button.click(fn=process,
79
+ inputs=inputs,
80
+ outputs=result,
81
+ api_name='pose')
82
+ return demo
83
+
84
+
85
+ if __name__ == '__main__':
86
+ from model import Model
87
+ model = Model()
88
+ demo = create_demo(model.process_pose)
89
+ demo.queue().launch()
app_scribble.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_scribble2image.py
2
+ # The original license file is LICENSE.ControlNet in this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process, max_images=12, default_num_images=3):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Scribble Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ num_samples = gr.Slider(label='Images',
17
+ minimum=1,
18
+ maximum=max_images,
19
+ value=default_num_images,
20
+ step=1)
21
+ image_resolution = gr.Slider(label='Image Resolution',
22
+ minimum=256,
23
+ maximum=512,
24
+ value=512,
25
+ step=256)
26
+ num_steps = gr.Slider(label='Steps',
27
+ minimum=1,
28
+ maximum=100,
29
+ value=20,
30
+ step=1)
31
+ guidance_scale = gr.Slider(label='Guidance Scale',
32
+ minimum=0.1,
33
+ maximum=30.0,
34
+ value=9.0,
35
+ step=0.1)
36
+ seed = gr.Slider(label='Seed',
37
+ minimum=-1,
38
+ maximum=2147483647,
39
+ step=1,
40
+ randomize=True)
41
+ a_prompt = gr.Textbox(
42
+ label='Added Prompt',
43
+ value='best quality, extremely detailed')
44
+ n_prompt = gr.Textbox(
45
+ label='Negative Prompt',
46
+ value=
47
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
48
+ )
49
+ with gr.Column():
50
+ result = gr.Gallery(label='Output',
51
+ show_label=False,
52
+ elem_id='gallery').style(grid=2,
53
+ height='auto')
54
+ inputs = [
55
+ input_image,
56
+ prompt,
57
+ a_prompt,
58
+ n_prompt,
59
+ num_samples,
60
+ image_resolution,
61
+ num_steps,
62
+ guidance_scale,
63
+ seed,
64
+ ]
65
+ prompt.submit(fn=process, inputs=inputs, outputs=result)
66
+ run_button.click(fn=process,
67
+ inputs=inputs,
68
+ outputs=result,
69
+ api_name='scribble')
70
+ return demo
71
+
72
+
73
+ if __name__ == '__main__':
74
+ from model import Model
75
+ model = Model()
76
+ demo = create_demo(model.process_scribble)
77
+ demo.queue().launch()
app_scribble_interactive.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_scribble2image_interactive.py
2
+ # The original license file is LICENSE.ControlNet in this repo.
3
+ import gradio as gr
4
+ import numpy as np
5
+
6
+
7
+ def create_canvas(w, h):
8
+ return np.zeros(shape=(h, w, 3), dtype=np.uint8) + 255
9
+
10
+
11
+ def create_demo(process, max_images=12, default_num_images=3):
12
+ with gr.Blocks() as demo:
13
+ with gr.Row():
14
+ gr.Markdown(
15
+ '## Control Stable Diffusion with Interactive Scribbles')
16
+ with gr.Row():
17
+ with gr.Column():
18
+ canvas_width = gr.Slider(label='Canvas Width',
19
+ minimum=256,
20
+ maximum=512,
21
+ value=512,
22
+ step=1)
23
+ canvas_height = gr.Slider(label='Canvas Height',
24
+ minimum=256,
25
+ maximum=512,
26
+ value=512,
27
+ step=1)
28
+ create_button = gr.Button(label='Start',
29
+ value='Open drawing canvas!')
30
+ input_image = gr.Image(source='upload',
31
+ type='numpy',
32
+ tool='sketch')
33
+ gr.Markdown(
34
+ value=
35
+ 'Do not forget to change your brush width to make it thinner. (Gradio do not allow developers to set brush width so you need to do it manually.) '
36
+ 'Just click on the small pencil icon in the upper right corner of the above block.'
37
+ )
38
+ create_button.click(fn=create_canvas,
39
+ inputs=[canvas_width, canvas_height],
40
+ outputs=input_image,
41
+ queue=False)
42
+ prompt = gr.Textbox(label='Prompt')
43
+ run_button = gr.Button(label='Run')
44
+ with gr.Accordion('Advanced options', open=False):
45
+ num_samples = gr.Slider(label='Images',
46
+ minimum=1,
47
+ maximum=max_images,
48
+ value=default_num_images,
49
+ step=1)
50
+ image_resolution = gr.Slider(label='Image Resolution',
51
+ minimum=256,
52
+ maximum=512,
53
+ value=512,
54
+ step=256)
55
+ num_steps = gr.Slider(label='Steps',
56
+ minimum=1,
57
+ maximum=100,
58
+ value=20,
59
+ step=1)
60
+ guidance_scale = gr.Slider(label='Guidance Scale',
61
+ minimum=0.1,
62
+ maximum=30.0,
63
+ value=9.0,
64
+ step=0.1)
65
+ seed = gr.Slider(label='Seed',
66
+ minimum=-1,
67
+ maximum=2147483647,
68
+ step=1,
69
+ randomize=True)
70
+ a_prompt = gr.Textbox(
71
+ label='Added Prompt',
72
+ value='best quality, extremely detailed')
73
+ n_prompt = gr.Textbox(
74
+ label='Negative Prompt',
75
+ value=
76
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
77
+ )
78
+ with gr.Column():
79
+ result = gr.Gallery(label='Output',
80
+ show_label=False,
81
+ elem_id='gallery').style(grid=2,
82
+ height='auto')
83
+ inputs = [
84
+ input_image,
85
+ prompt,
86
+ a_prompt,
87
+ n_prompt,
88
+ num_samples,
89
+ image_resolution,
90
+ num_steps,
91
+ guidance_scale,
92
+ seed,
93
+ ]
94
+ prompt.submit(fn=process, inputs=inputs, outputs=result)
95
+ run_button.click(fn=process, inputs=inputs, outputs=result)
96
+ return demo
97
+
98
+
99
+ if __name__ == '__main__':
100
+ from model import Model
101
+ model = Model()
102
+ demo = create_demo(model.process_scribble_interactive)
103
+ demo.queue().launch()
app_seg.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_seg2image.py
2
+ # The original license file is LICENSE.ControlNet in this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process, max_images=12, default_num_images=3):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Segmentation Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ is_segmentation_map = gr.Checkbox(
17
+ label='Is segmentation map', value=False)
18
+ num_samples = gr.Slider(label='Images',
19
+ minimum=1,
20
+ maximum=max_images,
21
+ value=default_num_images,
22
+ step=1)
23
+ image_resolution = gr.Slider(label='Image Resolution',
24
+ minimum=256,
25
+ maximum=512,
26
+ value=512,
27
+ step=256)
28
+ detect_resolution = gr.Slider(
29
+ label='Segmentation Resolution',
30
+ minimum=128,
31
+ maximum=512,
32
+ value=512,
33
+ step=1)
34
+ num_steps = gr.Slider(label='Steps',
35
+ minimum=1,
36
+ maximum=100,
37
+ value=20,
38
+ step=1)
39
+ guidance_scale = gr.Slider(label='Guidance Scale',
40
+ minimum=0.1,
41
+ maximum=30.0,
42
+ value=9.0,
43
+ step=0.1)
44
+ seed = gr.Slider(label='Seed',
45
+ minimum=-1,
46
+ maximum=2147483647,
47
+ step=1,
48
+ randomize=True)
49
+ a_prompt = gr.Textbox(
50
+ label='Added Prompt',
51
+ value='best quality, extremely detailed')
52
+ n_prompt = gr.Textbox(
53
+ label='Negative Prompt',
54
+ value=
55
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
56
+ )
57
+ with gr.Column():
58
+ result = gr.Gallery(label='Output',
59
+ show_label=False,
60
+ elem_id='gallery').style(grid=2,
61
+ height='auto')
62
+ inputs = [
63
+ input_image,
64
+ prompt,
65
+ a_prompt,
66
+ n_prompt,
67
+ num_samples,
68
+ image_resolution,
69
+ detect_resolution,
70
+ num_steps,
71
+ guidance_scale,
72
+ seed,
73
+ is_segmentation_map,
74
+ ]
75
+ prompt.submit(fn=process, inputs=inputs, outputs=result)
76
+ run_button.click(fn=process,
77
+ inputs=inputs,
78
+ outputs=result,
79
+ api_name='seg')
80
+ return demo
81
+
82
+
83
+ if __name__ == '__main__':
84
+ from model import Model
85
+ model = Model()
86
+ demo = create_demo(model.process_seg)
87
+ demo.queue().launch()
model.py ADDED
@@ -0,0 +1,649 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from gradio_*.py in https://github.com/lllyasviel/ControlNet/tree/f4748e3630d8141d7765e2bd9b1e348f47847707
2
+ # The original license file is LICENSE.ControlNet in this repo.
3
+ from __future__ import annotations
4
+
5
+ import gc
6
+ import pathlib
7
+ import sys
8
+
9
+ import cv2
10
+ import numpy as np
11
+ import PIL.Image
12
+ import torch
13
+ from diffusers import (ControlNetModel, DiffusionPipeline,
14
+ StableDiffusionControlNetPipeline,
15
+ UniPCMultistepScheduler)
16
+
17
+ repo_dir = pathlib.Path(__file__).parent
18
+ submodule_dir = repo_dir / 'ControlNet'
19
+ sys.path.append(submodule_dir.as_posix())
20
+
21
+ try:
22
+ from annotator.canny import apply_canny
23
+ from annotator.hed import apply_hed, nms
24
+ from annotator.midas import apply_midas
25
+ from annotator.mlsd import apply_mlsd
26
+ from annotator.openpose import apply_openpose
27
+ from annotator.uniformer import apply_uniformer
28
+ from annotator.util import HWC3, resize_image
29
+ except Exception:
30
+ pass
31
+
32
+ CONTROLNET_MODEL_IDS = {
33
+ 'canny': 'lllyasviel/sd-controlnet-canny',
34
+ 'hough': 'lllyasviel/sd-controlnet-mlsd',
35
+ 'hed': 'lllyasviel/sd-controlnet-hed',
36
+ 'scribble': 'lllyasviel/sd-controlnet-scribble',
37
+ 'pose': 'lllyasviel/sd-controlnet-openpose',
38
+ 'seg': 'lllyasviel/sd-controlnet-seg',
39
+ 'depth': 'lllyasviel/sd-controlnet-depth',
40
+ 'normal': 'lllyasviel/sd-controlnet-normal',
41
+ }
42
+
43
+
44
+ def download_all_controlnet_weights() -> None:
45
+ for model_id in CONTROLNET_MODEL_IDS.values():
46
+ ControlNetModel.from_pretrained(model_id)
47
+
48
+
49
+ class Model:
50
+ def __init__(self,
51
+ base_model_id: str = 'runwayml/stable-diffusion-v1-5',
52
+ task_name: str = 'canny'):
53
+ self.device = torch.device(
54
+ 'cuda:0' if torch.cuda.is_available() else 'cpu')
55
+ self.base_model_id = ''
56
+ self.task_name = ''
57
+ self.pipe = self.load_pipe(base_model_id, task_name)
58
+
59
+ def load_pipe(self, base_model_id: str, task_name) -> DiffusionPipeline:
60
+ if self.device.type == 'cpu':
61
+ return None
62
+ if base_model_id == self.base_model_id and task_name == self.task_name and hasattr(
63
+ self, 'pipe'):
64
+ return self.pipe
65
+ model_id = CONTROLNET_MODEL_IDS[task_name]
66
+ controlnet = ControlNetModel.from_pretrained(model_id,
67
+ torch_dtype=torch.float16)
68
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
69
+ base_model_id,
70
+ safety_checker=None,
71
+ controlnet=controlnet,
72
+ torch_dtype=torch.float16)
73
+ pipe.scheduler = UniPCMultistepScheduler.from_config(
74
+ pipe.scheduler.config)
75
+ pipe.enable_xformers_memory_efficient_attention()
76
+ pipe.to(self.device)
77
+ torch.cuda.empty_cache()
78
+ gc.collect()
79
+ self.base_model_id = base_model_id
80
+ self.task_name = task_name
81
+ return pipe
82
+
83
+ def set_base_model(self, base_model_id: str) -> str:
84
+ if not base_model_id or base_model_id == self.base_model_id:
85
+ return self.base_model_id
86
+ del self.pipe
87
+ torch.cuda.empty_cache()
88
+ gc.collect()
89
+ try:
90
+ self.pipe = self.load_pipe(base_model_id, self.task_name)
91
+ except Exception:
92
+ self.pipe = self.load_pipe(self.base_model_id, self.task_name)
93
+ return self.base_model_id
94
+
95
+ def load_controlnet_weight(self, task_name: str) -> None:
96
+ if task_name == self.task_name:
97
+ return
98
+ if 'controlnet' in self.pipe.__dict__:
99
+ del self.pipe.controlnet
100
+ torch.cuda.empty_cache()
101
+ gc.collect()
102
+ model_id = CONTROLNET_MODEL_IDS[task_name]
103
+ controlnet = ControlNetModel.from_pretrained(model_id,
104
+ torch_dtype=torch.float16)
105
+ controlnet.to(self.device)
106
+ torch.cuda.empty_cache()
107
+ gc.collect()
108
+ self.pipe.controlnet = controlnet
109
+ self.task_name = task_name
110
+
111
+ def get_prompt(self, prompt: str, additional_prompt: str) -> str:
112
+ if not prompt:
113
+ prompt = additional_prompt
114
+ else:
115
+ prompt = f'{prompt}, {additional_prompt}'
116
+ return prompt
117
+
118
+ @torch.autocast('cuda')
119
+ def run_pipe(
120
+ self,
121
+ prompt: str,
122
+ negative_prompt: str,
123
+ control_image: PIL.Image.Image,
124
+ num_images: int,
125
+ num_steps: int,
126
+ guidance_scale: float,
127
+ seed: int,
128
+ ) -> list[PIL.Image.Image]:
129
+ if seed == -1:
130
+ seed = np.random.randint(0, np.iinfo(np.int64).max)
131
+ generator = torch.Generator().manual_seed(seed)
132
+ return self.pipe(prompt=prompt,
133
+ negative_prompt=negative_prompt,
134
+ guidance_scale=guidance_scale,
135
+ num_images_per_prompt=num_images,
136
+ num_inference_steps=num_steps,
137
+ generator=generator,
138
+ image=control_image).images
139
+
140
+ @staticmethod
141
+ def preprocess_canny(
142
+ input_image: np.ndarray,
143
+ image_resolution: int,
144
+ low_threshold: int,
145
+ high_threshold: int,
146
+ ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
147
+ image = resize_image(HWC3(input_image), image_resolution)
148
+ control_image = apply_canny(image, low_threshold, high_threshold)
149
+ control_image = HWC3(control_image)
150
+ vis_control_image = 255 - control_image
151
+ return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
152
+ vis_control_image)
153
+
154
+ @torch.inference_mode()
155
+ def process_canny(
156
+ self,
157
+ input_image: np.ndarray,
158
+ prompt: str,
159
+ additional_prompt: str,
160
+ negative_prompt: str,
161
+ num_images: int,
162
+ image_resolution: int,
163
+ num_steps: int,
164
+ guidance_scale: float,
165
+ seed: int,
166
+ low_threshold: int,
167
+ high_threshold: int,
168
+ ) -> list[PIL.Image.Image]:
169
+ control_image, vis_control_image = self.preprocess_canny(
170
+ input_image=input_image,
171
+ image_resolution=image_resolution,
172
+ low_threshold=low_threshold,
173
+ high_threshold=high_threshold,
174
+ )
175
+ self.load_controlnet_weight('canny')
176
+ results = self.run_pipe(
177
+ prompt=self.get_prompt(prompt, additional_prompt),
178
+ negative_prompt=negative_prompt,
179
+ control_image=control_image,
180
+ num_images=num_images,
181
+ num_steps=num_steps,
182
+ guidance_scale=guidance_scale,
183
+ seed=seed,
184
+ )
185
+ return [vis_control_image] + results
186
+
187
+ @staticmethod
188
+ def preprocess_hough(
189
+ input_image: np.ndarray,
190
+ image_resolution: int,
191
+ detect_resolution: int,
192
+ value_threshold: float,
193
+ distance_threshold: float,
194
+ ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
195
+ input_image = HWC3(input_image)
196
+ control_image = apply_mlsd(
197
+ resize_image(input_image, detect_resolution), value_threshold,
198
+ distance_threshold)
199
+ control_image = HWC3(control_image)
200
+ image = resize_image(input_image, image_resolution)
201
+ H, W = image.shape[:2]
202
+ control_image = cv2.resize(control_image, (W, H),
203
+ interpolation=cv2.INTER_NEAREST)
204
+
205
+ vis_control_image = 255 - cv2.dilate(
206
+ control_image, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1)
207
+
208
+ return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
209
+ vis_control_image)
210
+
211
+ @torch.inference_mode()
212
+ def process_hough(
213
+ self,
214
+ input_image: np.ndarray,
215
+ prompt: str,
216
+ additional_prompt: str,
217
+ negative_prompt: str,
218
+ num_images: int,
219
+ image_resolution: int,
220
+ detect_resolution: int,
221
+ num_steps: int,
222
+ guidance_scale: float,
223
+ seed: int,
224
+ value_threshold: float,
225
+ distance_threshold: float,
226
+ ) -> list[PIL.Image.Image]:
227
+ control_image, vis_control_image = self.preprocess_hough(
228
+ input_image=input_image,
229
+ image_resolution=image_resolution,
230
+ detect_resolution=detect_resolution,
231
+ value_threshold=value_threshold,
232
+ distance_threshold=distance_threshold,
233
+ )
234
+ self.load_controlnet_weight('hough')
235
+ results = self.run_pipe(
236
+ prompt=self.get_prompt(prompt, additional_prompt),
237
+ negative_prompt=negative_prompt,
238
+ control_image=control_image,
239
+ num_images=num_images,
240
+ num_steps=num_steps,
241
+ guidance_scale=guidance_scale,
242
+ seed=seed,
243
+ )
244
+ return [vis_control_image] + results
245
+
246
+ @staticmethod
247
+ def preprocess_hed(
248
+ input_image: np.ndarray,
249
+ image_resolution: int,
250
+ detect_resolution: int,
251
+ ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
252
+ input_image = HWC3(input_image)
253
+ control_image = apply_hed(resize_image(input_image, detect_resolution))
254
+ control_image = HWC3(control_image)
255
+ image = resize_image(input_image, image_resolution)
256
+ H, W = image.shape[:2]
257
+ control_image = cv2.resize(control_image, (W, H),
258
+ interpolation=cv2.INTER_LINEAR)
259
+ return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
260
+ control_image)
261
+
262
+ @torch.inference_mode()
263
+ def process_hed(
264
+ self,
265
+ input_image: np.ndarray,
266
+ prompt: str,
267
+ additional_prompt: str,
268
+ negative_prompt: str,
269
+ num_images: int,
270
+ image_resolution: int,
271
+ detect_resolution: int,
272
+ num_steps: int,
273
+ guidance_scale: float,
274
+ seed: int,
275
+ ) -> list[PIL.Image.Image]:
276
+ control_image, vis_control_image = self.preprocess_hed(
277
+ input_image=input_image,
278
+ image_resolution=image_resolution,
279
+ detect_resolution=detect_resolution,
280
+ )
281
+ self.load_controlnet_weight('hed')
282
+ results = self.run_pipe(
283
+ prompt=self.get_prompt(prompt, additional_prompt),
284
+ negative_prompt=negative_prompt,
285
+ control_image=control_image,
286
+ num_images=num_images,
287
+ num_steps=num_steps,
288
+ guidance_scale=guidance_scale,
289
+ seed=seed,
290
+ )
291
+ return [vis_control_image] + results
292
+
293
+ @staticmethod
294
+ def preprocess_scribble(
295
+ input_image: np.ndarray,
296
+ image_resolution: int,
297
+ ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
298
+ image = resize_image(HWC3(input_image), image_resolution)
299
+ control_image = np.zeros_like(image, dtype=np.uint8)
300
+ control_image[np.min(image, axis=2) < 127] = 255
301
+ vis_control_image = 255 - control_image
302
+ return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
303
+ vis_control_image)
304
+
305
+ @torch.inference_mode()
306
+ def process_scribble(
307
+ self,
308
+ input_image: np.ndarray,
309
+ prompt: str,
310
+ additional_prompt: str,
311
+ negative_prompt: str,
312
+ num_images: int,
313
+ image_resolution: int,
314
+ num_steps: int,
315
+ guidance_scale: float,
316
+ seed: int,
317
+ ) -> list[PIL.Image.Image]:
318
+ control_image, vis_control_image = self.preprocess_scribble(
319
+ input_image=input_image,
320
+ image_resolution=image_resolution,
321
+ )
322
+ self.load_controlnet_weight('scribble')
323
+ results = self.run_pipe(
324
+ prompt=self.get_prompt(prompt, additional_prompt),
325
+ negative_prompt=negative_prompt,
326
+ control_image=control_image,
327
+ num_images=num_images,
328
+ num_steps=num_steps,
329
+ guidance_scale=guidance_scale,
330
+ seed=seed,
331
+ )
332
+ return [vis_control_image] + results
333
+
334
+ @staticmethod
335
+ def preprocess_scribble_interactive(
336
+ input_image: np.ndarray,
337
+ image_resolution: int,
338
+ ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
339
+ image = resize_image(HWC3(input_image['mask'][:, :, 0]),
340
+ image_resolution)
341
+ control_image = np.zeros_like(image, dtype=np.uint8)
342
+ control_image[np.min(image, axis=2) > 127] = 255
343
+ vis_control_image = 255 - control_image
344
+ return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
345
+ vis_control_image)
346
+
347
+ @torch.inference_mode()
348
+ def process_scribble_interactive(
349
+ self,
350
+ input_image: np.ndarray,
351
+ prompt: str,
352
+ additional_prompt: str,
353
+ negative_prompt: str,
354
+ num_images: int,
355
+ image_resolution: int,
356
+ num_steps: int,
357
+ guidance_scale: float,
358
+ seed: int,
359
+ ) -> list[PIL.Image.Image]:
360
+ control_image, vis_control_image = self.preprocess_scribble_interactive(
361
+ input_image=input_image,
362
+ image_resolution=image_resolution,
363
+ )
364
+ self.load_controlnet_weight('scribble')
365
+ results = self.run_pipe(
366
+ prompt=self.get_prompt(prompt, additional_prompt),
367
+ negative_prompt=negative_prompt,
368
+ control_image=control_image,
369
+ num_images=num_images,
370
+ num_steps=num_steps,
371
+ guidance_scale=guidance_scale,
372
+ seed=seed,
373
+ )
374
+ return [vis_control_image] + results
375
+
376
+ @staticmethod
377
+ def preprocess_fake_scribble(
378
+ input_image: np.ndarray,
379
+ image_resolution: int,
380
+ detect_resolution: int,
381
+ ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
382
+ input_image = HWC3(input_image)
383
+ control_image = apply_hed(resize_image(input_image, detect_resolution))
384
+ control_image = HWC3(control_image)
385
+ image = resize_image(input_image, image_resolution)
386
+ H, W = image.shape[:2]
387
+
388
+ control_image = cv2.resize(control_image, (W, H),
389
+ interpolation=cv2.INTER_LINEAR)
390
+ control_image = nms(control_image, 127, 3.0)
391
+ control_image = cv2.GaussianBlur(control_image, (0, 0), 3.0)
392
+ control_image[control_image > 4] = 255
393
+ control_image[control_image < 255] = 0
394
+
395
+ vis_control_image = 255 - control_image
396
+
397
+ return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
398
+ vis_control_image)
399
+
400
+ @torch.inference_mode()
401
+ def process_fake_scribble(
402
+ self,
403
+ input_image: np.ndarray,
404
+ prompt: str,
405
+ additional_prompt: str,
406
+ negative_prompt: str,
407
+ num_images: int,
408
+ image_resolution: int,
409
+ detect_resolution: int,
410
+ num_steps: int,
411
+ guidance_scale: float,
412
+ seed: int,
413
+ ) -> list[PIL.Image.Image]:
414
+ control_image, vis_control_image = self.preprocess_fake_scribble(
415
+ input_image=input_image,
416
+ image_resolution=image_resolution,
417
+ detect_resolution=detect_resolution,
418
+ )
419
+ self.load_controlnet_weight('scribble')
420
+ results = self.run_pipe(
421
+ prompt=self.get_prompt(prompt, additional_prompt),
422
+ negative_prompt=negative_prompt,
423
+ control_image=control_image,
424
+ num_images=num_images,
425
+ num_steps=num_steps,
426
+ guidance_scale=guidance_scale,
427
+ seed=seed,
428
+ )
429
+ return [vis_control_image] + results
430
+
431
+ @staticmethod
432
+ def preprocess_pose(
433
+ input_image: np.ndarray,
434
+ image_resolution: int,
435
+ detect_resolution: int,
436
+ is_pose_image: bool,
437
+ ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
438
+ input_image = HWC3(input_image)
439
+ if not is_pose_image:
440
+ control_image, _ = apply_openpose(
441
+ resize_image(input_image, detect_resolution))
442
+ control_image = HWC3(control_image)
443
+ image = resize_image(input_image, image_resolution)
444
+ H, W = image.shape[:2]
445
+ control_image = cv2.resize(control_image, (W, H),
446
+ interpolation=cv2.INTER_NEAREST)
447
+ else:
448
+ control_image = resize_image(input_image, image_resolution)
449
+
450
+ return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
451
+ control_image)
452
+
453
+ @torch.inference_mode()
454
+ def process_pose(
455
+ self,
456
+ input_image: np.ndarray,
457
+ prompt: str,
458
+ additional_prompt: str,
459
+ negative_prompt: str,
460
+ num_images: int,
461
+ image_resolution: int,
462
+ detect_resolution: int,
463
+ num_steps: int,
464
+ guidance_scale: float,
465
+ seed: int,
466
+ is_pose_image: bool,
467
+ ) -> list[PIL.Image.Image]:
468
+ control_image, vis_control_image = self.preprocess_pose(
469
+ input_image=input_image,
470
+ image_resolution=image_resolution,
471
+ detect_resolution=detect_resolution,
472
+ is_pose_image=is_pose_image,
473
+ )
474
+ self.load_controlnet_weight('pose')
475
+ results = self.run_pipe(
476
+ prompt=self.get_prompt(prompt, additional_prompt),
477
+ negative_prompt=negative_prompt,
478
+ control_image=control_image,
479
+ num_images=num_images,
480
+ num_steps=num_steps,
481
+ guidance_scale=guidance_scale,
482
+ seed=seed,
483
+ )
484
+ return [vis_control_image] + results
485
+
486
+ @staticmethod
487
+ def preprocess_seg(
488
+ input_image: np.ndarray,
489
+ image_resolution: int,
490
+ detect_resolution: int,
491
+ is_segmentation_map: bool,
492
+ ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
493
+ input_image = HWC3(input_image)
494
+ if not is_segmentation_map:
495
+ control_image = apply_uniformer(
496
+ resize_image(input_image, detect_resolution))
497
+ image = resize_image(input_image, image_resolution)
498
+ H, W = image.shape[:2]
499
+ control_image = cv2.resize(control_image, (W, H),
500
+ interpolation=cv2.INTER_NEAREST)
501
+ else:
502
+ control_image = resize_image(input_image, image_resolution)
503
+ return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
504
+ control_image)
505
+
506
+ @torch.inference_mode()
507
+ def process_seg(
508
+ self,
509
+ input_image: np.ndarray,
510
+ prompt: str,
511
+ additional_prompt: str,
512
+ negative_prompt: str,
513
+ num_images: int,
514
+ image_resolution: int,
515
+ detect_resolution: int,
516
+ num_steps: int,
517
+ guidance_scale: float,
518
+ seed: int,
519
+ is_segmentation_map: bool,
520
+ ) -> list[PIL.Image.Image]:
521
+ control_image, vis_control_image = self.preprocess_seg(
522
+ input_image=input_image,
523
+ image_resolution=image_resolution,
524
+ detect_resolution=detect_resolution,
525
+ is_segmentation_map=is_segmentation_map,
526
+ )
527
+ self.load_controlnet_weight('seg')
528
+ results = self.run_pipe(
529
+ prompt=self.get_prompt(prompt, additional_prompt),
530
+ negative_prompt=negative_prompt,
531
+ control_image=control_image,
532
+ num_images=num_images,
533
+ num_steps=num_steps,
534
+ guidance_scale=guidance_scale,
535
+ seed=seed,
536
+ )
537
+ return [vis_control_image] + results
538
+
539
+ @staticmethod
540
+ def preprocess_depth(
541
+ input_image: np.ndarray,
542
+ image_resolution: int,
543
+ detect_resolution: int,
544
+ is_depth_image: bool,
545
+ ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
546
+ input_image = HWC3(input_image)
547
+ if not is_depth_image:
548
+ control_image, _ = apply_midas(
549
+ resize_image(input_image, detect_resolution))
550
+ control_image = HWC3(control_image)
551
+ image = resize_image(input_image, image_resolution)
552
+ H, W = image.shape[:2]
553
+ control_image = cv2.resize(control_image, (W, H),
554
+ interpolation=cv2.INTER_LINEAR)
555
+ else:
556
+ control_image = resize_image(input_image, image_resolution)
557
+ return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
558
+ control_image)
559
+
560
+ @torch.inference_mode()
561
+ def process_depth(
562
+ self,
563
+ input_image: np.ndarray,
564
+ prompt: str,
565
+ additional_prompt: str,
566
+ negative_prompt: str,
567
+ num_images: int,
568
+ image_resolution: int,
569
+ detect_resolution: int,
570
+ num_steps: int,
571
+ guidance_scale: float,
572
+ seed: int,
573
+ is_depth_image: bool,
574
+ ) -> list[PIL.Image.Image]:
575
+ control_image, vis_control_image = self.preprocess_depth(
576
+ input_image=input_image,
577
+ image_resolution=image_resolution,
578
+ detect_resolution=detect_resolution,
579
+ is_depth_image=is_depth_image,
580
+ )
581
+ self.load_controlnet_weight('depth')
582
+ results = self.run_pipe(
583
+ prompt=self.get_prompt(prompt, additional_prompt),
584
+ negative_prompt=negative_prompt,
585
+ control_image=control_image,
586
+ num_images=num_images,
587
+ num_steps=num_steps,
588
+ guidance_scale=guidance_scale,
589
+ seed=seed,
590
+ )
591
+ return [vis_control_image] + results
592
+
593
+ @staticmethod
594
+ def preprocess_normal(
595
+ input_image: np.ndarray,
596
+ image_resolution: int,
597
+ detect_resolution: int,
598
+ bg_threshold: float,
599
+ is_normal_image: bool,
600
+ ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
601
+ input_image = HWC3(input_image)
602
+ if not is_normal_image:
603
+ _, control_image = apply_midas(resize_image(
604
+ input_image, detect_resolution),
605
+ bg_th=bg_threshold)
606
+ control_image = HWC3(control_image)
607
+ image = resize_image(input_image, image_resolution)
608
+ H, W = image.shape[:2]
609
+ control_image = cv2.resize(control_image, (W, H),
610
+ interpolation=cv2.INTER_LINEAR)
611
+ else:
612
+ control_image = resize_image(input_image, image_resolution)
613
+ return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
614
+ control_image)
615
+
616
+ @torch.inference_mode()
617
+ def process_normal(
618
+ self,
619
+ input_image: np.ndarray,
620
+ prompt: str,
621
+ additional_prompt: str,
622
+ negative_prompt: str,
623
+ num_images: int,
624
+ image_resolution: int,
625
+ detect_resolution: int,
626
+ num_steps: int,
627
+ guidance_scale: float,
628
+ seed: int,
629
+ bg_threshold: float,
630
+ is_normal_image: bool,
631
+ ) -> list[PIL.Image.Image]:
632
+ control_image, vis_control_image = self.preprocess_normal(
633
+ input_image=input_image,
634
+ image_resolution=image_resolution,
635
+ detect_resolution=detect_resolution,
636
+ bg_threshold=bg_threshold,
637
+ is_normal_image=is_normal_image,
638
+ )
639
+ self.load_controlnet_weight('normal')
640
+ results = self.run_pipe(
641
+ prompt=self.get_prompt(prompt, additional_prompt),
642
+ negative_prompt=negative_prompt,
643
+ control_image=control_image,
644
+ num_images=num_images,
645
+ num_steps=num_steps,
646
+ guidance_scale=guidance_scale,
647
+ seed=seed,
648
+ )
649
+ return [vis_control_image] + results
notebooks/notebook.ipynb ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {
7
+ "id": "8CnkIPtjn8Dc"
8
+ },
9
+ "outputs": [],
10
+ "source": [
11
+ "!git clone --recursive https://huggingface.co/spaces/hysts/ControlNet"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": null,
17
+ "metadata": {
18
+ "id": "IZlaYNTWoFPK"
19
+ },
20
+ "outputs": [],
21
+ "source": [
22
+ "%cd ControlNet"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": null,
28
+ "metadata": {
29
+ "id": "0zhLFnZUoWdp"
30
+ },
31
+ "outputs": [],
32
+ "source": [
33
+ "!cd ControlNet && git apply ../patch && cd .."
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": null,
39
+ "metadata": {
40
+ "id": "P_fzYrLvoIcI"
41
+ },
42
+ "outputs": [],
43
+ "source": [
44
+ "!pip install -q -r requirements.txt"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": null,
50
+ "metadata": {
51
+ "id": "GOfGng5Woktd"
52
+ },
53
+ "outputs": [],
54
+ "source": [
55
+ "import app"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "code",
60
+ "execution_count": null,
61
+ "metadata": {
62
+ "id": "7Cued230ol7T"
63
+ },
64
+ "outputs": [],
65
+ "source": []
66
+ }
67
+ ],
68
+ "metadata": {
69
+ "accelerator": "GPU",
70
+ "colab": {
71
+ "provenance": []
72
+ },
73
+ "gpuClass": "standard",
74
+ "language_info": {
75
+ "name": "python"
76
+ }
77
+ },
78
+ "nbformat": 4,
79
+ "nbformat_minor": 0
80
+ }
patch ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diff --git a/annotator/hed/__init__.py b/annotator/hed/__init__.py
2
+ index 42d8dc6..1587035 100644
3
+ --- a/annotator/hed/__init__.py
4
+ +++ b/annotator/hed/__init__.py
5
+ @@ -1,8 +1,12 @@
6
+ +import pathlib
7
+ +
8
+ import numpy as np
9
+ import cv2
10
+ import torch
11
+ from einops import rearrange
12
+
13
+ +root_dir = pathlib.Path(__file__).parents[2]
14
+ +
15
+
16
+ class Network(torch.nn.Module):
17
+ def __init__(self):
18
+ @@ -64,7 +68,7 @@ class Network(torch.nn.Module):
19
+ torch.nn.Sigmoid()
20
+ )
21
+
22
+ - self.load_state_dict({strKey.replace('module', 'net'): tenWeight for strKey, tenWeight in torch.load('./annotator/ckpts/network-bsds500.pth').items()})
23
+ + self.load_state_dict({strKey.replace('module', 'net'): tenWeight for strKey, tenWeight in torch.load(f'{root_dir}/annotator/ckpts/network-bsds500.pth').items()})
24
+ # end
25
+
26
+ def forward(self, tenInput):
27
+ diff --git a/annotator/midas/api.py b/annotator/midas/api.py
28
+ index 9fa305e..d8594ea 100644
29
+ --- a/annotator/midas/api.py
30
+ +++ b/annotator/midas/api.py
31
+ @@ -1,5 +1,7 @@
32
+ # based on https://github.com/isl-org/MiDaS
33
+
34
+ +import pathlib
35
+ +
36
+ import cv2
37
+ import torch
38
+ import torch.nn as nn
39
+ @@ -10,10 +12,11 @@ from .midas.midas_net import MidasNet
40
+ from .midas.midas_net_custom import MidasNet_small
41
+ from .midas.transforms import Resize, NormalizeImage, PrepareForNet
42
+
43
+ +root_dir = pathlib.Path(__file__).parents[2]
44
+
45
+ ISL_PATHS = {
46
+ - "dpt_large": "annotator/ckpts/dpt_large-midas-2f21e586.pt",
47
+ - "dpt_hybrid": "annotator/ckpts/dpt_hybrid-midas-501f0c75.pt",
48
+ + "dpt_large": f"{root_dir}/annotator/ckpts/dpt_large-midas-2f21e586.pt",
49
+ + "dpt_hybrid": f"{root_dir}/annotator/ckpts/dpt_hybrid-midas-501f0c75.pt",
50
+ "midas_v21": "",
51
+ "midas_v21_small": "",
52
+ }
53
+ diff --git a/annotator/mlsd/__init__.py b/annotator/mlsd/__init__.py
54
+ index 75db717..f310fe6 100644
55
+ --- a/annotator/mlsd/__init__.py
56
+ +++ b/annotator/mlsd/__init__.py
57
+ @@ -1,3 +1,5 @@
58
+ +import pathlib
59
+ +
60
+ import cv2
61
+ import numpy as np
62
+ import torch
63
+ @@ -8,8 +10,9 @@ from .models.mbv2_mlsd_tiny import MobileV2_MLSD_Tiny
64
+ from .models.mbv2_mlsd_large import MobileV2_MLSD_Large
65
+ from .utils import pred_lines
66
+
67
+ +root_dir = pathlib.Path(__file__).parents[2]
68
+
69
+ -model_path = './annotator/ckpts/mlsd_large_512_fp32.pth'
70
+ +model_path = f'{root_dir}/annotator/ckpts/mlsd_large_512_fp32.pth'
71
+ model = MobileV2_MLSD_Large()
72
+ model.load_state_dict(torch.load(model_path), strict=True)
73
+ model = model.cuda().eval()
74
+ diff --git a/annotator/openpose/__init__.py b/annotator/openpose/__init__.py
75
+ index 47d50a5..2369eed 100644
76
+ --- a/annotator/openpose/__init__.py
77
+ +++ b/annotator/openpose/__init__.py
78
+ @@ -1,4 +1,5 @@
79
+ import os
80
+ +import pathlib
81
+ os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
82
+
83
+ import torch
84
+ @@ -7,8 +8,10 @@ from . import util
85
+ from .body import Body
86
+ from .hand import Hand
87
+
88
+ -body_estimation = Body('./annotator/ckpts/body_pose_model.pth')
89
+ -hand_estimation = Hand('./annotator/ckpts/hand_pose_model.pth')
90
+ +root_dir = pathlib.Path(__file__).parents[2]
91
+ +
92
+ +body_estimation = Body(f'{root_dir}/annotator/ckpts/body_pose_model.pth')
93
+ +hand_estimation = Hand(f'{root_dir}/annotator/ckpts/hand_pose_model.pth')
94
+
95
+
96
+ def apply_openpose(oriImg, hand=False):
97
+ diff --git a/annotator/uniformer/__init__.py b/annotator/uniformer/__init__.py
98
+ index 500e53c..4061dbe 100644
99
+ --- a/annotator/uniformer/__init__.py
100
+ +++ b/annotator/uniformer/__init__.py
101
+ @@ -1,9 +1,12 @@
102
+ +import pathlib
103
+ +
104
+ from annotator.uniformer.mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot
105
+ from annotator.uniformer.mmseg.core.evaluation import get_palette
106
+
107
+ +root_dir = pathlib.Path(__file__).parents[2]
108
+
109
+ -checkpoint_file = "annotator/ckpts/upernet_global_small.pth"
110
+ -config_file = 'annotator/uniformer/exp/upernet_global_small/config.py'
111
+ +checkpoint_file = f"{root_dir}/annotator/ckpts/upernet_global_small.pth"
112
+ +config_file = f'{root_dir}/annotator/uniformer/exp/upernet_global_small/config.py'
113
+ model = init_segmentor(config_file, checkpoint_file).cuda()
114
+
115
+
116
+ diff --git a/annotator/util.py b/annotator/util.py
117
+ index 7cde937..10a6d58 100644
118
+ --- a/annotator/util.py
119
+ +++ b/annotator/util.py
120
+ @@ -25,7 +25,7 @@ def resize_image(input_image, resolution):
121
+ H, W, C = input_image.shape
122
+ H = float(H)
123
+ W = float(W)
124
+ - k = float(resolution) / min(H, W)
125
+ + k = float(resolution) / max(H, W)
126
+ H *= k
127
+ W *= k
128
+ H = int(np.round(H / 64.0)) * 64
requirements.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ addict==2.4.0
2
+ albumentations==1.3.0
3
+ einops==0.6.0
4
+ git+https://github.com/huggingface/accelerate@78151f8
5
+ git+https://github.com/huggingface/diffusers@fa6d52d
6
+ gradio==3.23.0
7
+ imageio==2.25.0
8
+ imageio-ffmpeg==0.4.8
9
+ kornia==0.6.9
10
+ omegaconf==2.3.0
11
+ open-clip-torch==2.13.0
12
+ opencv-contrib-python==4.7.0.68
13
+ opencv-python-headless==4.7.0.68
14
+ prettytable==3.6.0
15
+ pytorch-lightning==1.9.0
16
+ safetensors==0.2.8
17
+ timm==0.6.12
18
+ torch==1.13.1
19
+ torchvision==0.14.1
20
+ transformers==4.26.1
21
+ xformers==0.0.16
22
+ yapf==0.32.0
style.css ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ }
4
+
5
+ .note {
6
+ text-align: center;
7
+ font-size: 150%;
8
+ }