multimodalart's picture
Squashing commit
4450790 verified
raw
history blame
17.6 kB
import ast
import re
import os
import json
from git import Repo
import concurrent
import datetime
import concurrent.futures
import requests
builtin_nodes = set()
import sys
from urllib.parse import urlparse
from github import Github
def download_url(url, dest_folder, filename=None):
# Ensure the destination folder exists
if not os.path.exists(dest_folder):
os.makedirs(dest_folder)
# Extract filename from URL if not provided
if filename is None:
filename = os.path.basename(url)
# Full path to save the file
dest_path = os.path.join(dest_folder, filename)
# Download the file
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(dest_path, 'wb') as file:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
file.write(chunk)
else:
raise Exception(f"Failed to download file from {url}")
# prepare temp dir
if len(sys.argv) > 1:
temp_dir = sys.argv[1]
else:
temp_dir = os.path.join(os.getcwd(), ".tmp")
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
skip_update = '--skip-update' in sys.argv or '--skip-all' in sys.argv
skip_stat_update = '--skip-stat-update' in sys.argv or '--skip-all' in sys.argv
if not skip_stat_update:
g = Github(os.environ.get('GITHUB_TOKEN'))
else:
g = None
print(f"TEMP DIR: {temp_dir}")
parse_cnt = 0
def extract_nodes(code_text):
global parse_cnt
try:
if parse_cnt % 100 == 0:
print(f".", end="", flush=True)
parse_cnt += 1
code_text = re.sub(r'\\[^"\']', '', code_text)
parsed_code = ast.parse(code_text)
assignments = (node for node in parsed_code.body if isinstance(node, ast.Assign))
for assignment in assignments:
if isinstance(assignment.targets[0], ast.Name) and assignment.targets[0].id in ['NODE_CONFIG', 'NODE_CLASS_MAPPINGS']:
node_class_mappings = assignment.value
break
else:
node_class_mappings = None
if node_class_mappings:
s = set()
for key in node_class_mappings.keys:
if key is not None and isinstance(key.value, str):
s.add(key.value.strip())
return s
else:
return set()
except:
return set()
# scan
def scan_in_file(filename, is_builtin=False):
global builtin_nodes
try:
with open(filename, encoding='utf-8') as file:
code = file.read()
except UnicodeDecodeError:
with open(filename, encoding='cp949') as file:
code = file.read()
pattern = r"_CLASS_MAPPINGS\s*=\s*{([^}]*)}"
regex = re.compile(pattern, re.MULTILINE | re.DOTALL)
nodes = set()
class_dict = {}
nodes |= extract_nodes(code)
code = re.sub(r'^#.*?$', '', code, flags=re.MULTILINE)
def extract_keys(pattern, code):
keys = re.findall(pattern, code)
return {key.strip() for key in keys}
def update_nodes(nodes, new_keys):
nodes |= new_keys
patterns = [
r'^[^=]*_CLASS_MAPPINGS\["(.*?)"\]',
r'^[^=]*_CLASS_MAPPINGS\[\'(.*?)\'\]',
r'@register_node\("(.+)",\s*\".+"\)',
r'"(\w+)"\s*:\s*{"class":\s*\w+\s*'
]
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {executor.submit(extract_keys, pattern, code): pattern for pattern in patterns}
for future in concurrent.futures.as_completed(futures):
update_nodes(nodes, future.result())
matches = regex.findall(code)
for match in matches:
dict_text = match
key_value_pairs = re.findall(r"\"([^\"]*)\"\s*:\s*([^,\n]*)", dict_text)
for key, value in key_value_pairs:
class_dict[key.strip()] = value.strip()
key_value_pairs = re.findall(r"'([^']*)'\s*:\s*([^,\n]*)", dict_text)
for key, value in key_value_pairs:
class_dict[key.strip()] = value.strip()
for key, value in class_dict.items():
nodes.add(key.strip())
update_pattern = r"_CLASS_MAPPINGS.update\s*\({([^}]*)}\)"
update_match = re.search(update_pattern, code)
if update_match:
update_dict_text = update_match.group(1)
update_key_value_pairs = re.findall(r"\"([^\"]*)\"\s*:\s*([^,\n]*)", update_dict_text)
for key, value in update_key_value_pairs:
class_dict[key.strip()] = value.strip()
nodes.add(key.strip())
metadata = {}
lines = code.strip().split('\n')
for line in lines:
if line.startswith('@'):
if line.startswith("@author:") or line.startswith("@title:") or line.startswith("@nickname:") or line.startswith("@description:"):
key, value = line[1:].strip().split(':', 1)
metadata[key.strip()] = value.strip()
if is_builtin:
builtin_nodes += set(nodes)
else:
for x in builtin_nodes:
if x in nodes:
nodes.remove(x)
return nodes, metadata
def get_py_file_paths(dirname):
file_paths = []
for root, dirs, files in os.walk(dirname):
if ".git" in root or "__pycache__" in root:
continue
for file in files:
if file.endswith(".py"):
file_path = os.path.join(root, file)
file_paths.append(file_path)
return file_paths
def get_nodes(target_dir):
py_files = []
directories = []
for item in os.listdir(target_dir):
if ".git" in item or "__pycache__" in item:
continue
path = os.path.abspath(os.path.join(target_dir, item))
if os.path.isfile(path) and item.endswith(".py"):
py_files.append(path)
elif os.path.isdir(path):
directories.append(path)
return py_files, directories
def get_git_urls_from_json(json_file):
with open(json_file, encoding='utf-8') as file:
data = json.load(file)
custom_nodes = data.get('custom_nodes', [])
git_clone_files = []
for node in custom_nodes:
if node.get('install_type') == 'git-clone':
files = node.get('files', [])
if files:
git_clone_files.append((files[0], node.get('title'), node.get('preemptions'), node.get('nodename_pattern')))
git_clone_files.append(("https://github.com/comfyanonymous/ComfyUI", "ComfyUI", None, None))
return git_clone_files
def get_py_urls_from_json(json_file):
with open(json_file, encoding='utf-8') as file:
data = json.load(file)
custom_nodes = data.get('custom_nodes', [])
py_files = []
for node in custom_nodes:
if node.get('install_type') == 'copy':
files = node.get('files', [])
if files:
py_files.append((files[0], node.get('title'), node.get('preemptions'), node.get('nodename_pattern')))
return py_files
def clone_or_pull_git_repository(git_url):
repo_name = git_url.split("/")[-1].split(".")[0]
repo_dir = os.path.join(temp_dir, repo_name)
if os.path.exists(repo_dir):
try:
repo = Repo(repo_dir)
origin = repo.remote(name="origin")
origin.pull()
repo.git.submodule('update', '--init', '--recursive')
print(f"Pulling {repo_name}...")
except Exception as e:
print(f"Pulling {repo_name} failed: {e}")
else:
try:
Repo.clone_from(git_url, repo_dir, recursive=True)
print(f"Cloning {repo_name}...")
except Exception as e:
print(f"Cloning {repo_name} failed: {e}")
def update_custom_nodes():
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
node_info = {}
git_url_titles_preemptions = get_git_urls_from_json('custom-node-list.json')
def process_git_url_title(url, title, preemptions, node_pattern):
name = os.path.basename(url)
if name.endswith(".git"):
name = name[:-4]
node_info[name] = (url, title, preemptions, node_pattern)
if not skip_update:
clone_or_pull_git_repository(url)
def process_git_stats(git_url_titles_preemptions):
GITHUB_STATS_CACHE_FILENAME = 'github-stats-cache.json'
GITHUB_STATS_FILENAME = 'github-stats.json'
github_stats = {}
try:
with open(GITHUB_STATS_CACHE_FILENAME, 'r', encoding='utf-8') as file:
github_stats = json.load(file)
except FileNotFoundError:
pass
def is_rate_limit_exceeded():
return g.rate_limiting[0] == 0
if is_rate_limit_exceeded():
print(f"GitHub API Rate Limit Exceeded: remained - {(g.rate_limiting_resettime - datetime.datetime.now().timestamp())/60:.2f} min")
else:
def renew_stat(url):
if is_rate_limit_exceeded():
return
if 'github.com' not in url:
return None
print('.', end="")
sys.stdout.flush()
try:
# Parsing the URL
parsed_url = urlparse(url)
domain = parsed_url.netloc
path = parsed_url.path
path_parts = path.strip("/").split("/")
if len(path_parts) >= 2 and domain == "github.com":
owner_repo = "/".join(path_parts[-2:])
repo = g.get_repo(owner_repo)
owner = repo.owner
now = datetime.datetime.now(datetime.timezone.utc)
author_time_diff = now - owner.created_at
last_update = repo.pushed_at.strftime("%Y-%m-%d %H:%M:%S") if repo.pushed_at else 'N/A'
item = {
"stars": repo.stargazers_count,
"last_update": last_update,
"cached_time": now.timestamp(),
"author_account_age_days": author_time_diff.days,
}
return url, item
else:
print(f"\nInvalid URL format for GitHub repository: {url}\n")
except Exception as e:
print(f"\nERROR on {url}\n{e}")
return None
# resolve unresolved urls
with concurrent.futures.ThreadPoolExecutor(11) as executor:
futures = []
for url, title, preemptions, node_pattern in git_url_titles_preemptions:
if url not in github_stats:
futures.append(executor.submit(renew_stat, url))
for future in concurrent.futures.as_completed(futures):
url_item = future.result()
if url_item is not None:
url, item = url_item
github_stats[url] = item
# renew outdated cache
outdated_urls = []
for k, v in github_stats.items():
elapsed = (datetime.datetime.now().timestamp() - v['cached_time'])
if elapsed > 60*60*12: # 12 hours
outdated_urls.append(k)
with concurrent.futures.ThreadPoolExecutor(11) as executor:
for url in outdated_urls:
futures.append(executor.submit(renew_stat, url))
for future in concurrent.futures.as_completed(futures):
url_item = future.result()
if url_item is not None:
url, item = url_item
github_stats[url] = item
with open('github-stats-cache.json', 'w', encoding='utf-8') as file:
json.dump(github_stats, file, ensure_ascii=False, indent=4)
with open(GITHUB_STATS_FILENAME, 'w', encoding='utf-8') as file:
for v in github_stats.values():
if "cached_time" in v:
del v["cached_time"]
github_stats = dict(sorted(github_stats.items()))
json.dump(github_stats, file, ensure_ascii=False, indent=4)
print(f"Successfully written to {GITHUB_STATS_FILENAME}.")
if not skip_stat_update:
process_git_stats(git_url_titles_preemptions)
with concurrent.futures.ThreadPoolExecutor(11) as executor:
for url, title, preemptions, node_pattern in git_url_titles_preemptions:
executor.submit(process_git_url_title, url, title, preemptions, node_pattern)
py_url_titles_and_pattern = get_py_urls_from_json('custom-node-list.json')
def download_and_store_info(url_title_preemptions_and_pattern):
url, title, preemptions, node_pattern = url_title_preemptions_and_pattern
name = os.path.basename(url)
if name.endswith(".py"):
node_info[name] = (url, title, preemptions, node_pattern)
try:
download_url(url, temp_dir)
except:
print(f"[ERROR] Cannot download '{url}'")
with concurrent.futures.ThreadPoolExecutor(10) as executor:
executor.map(download_and_store_info, py_url_titles_and_pattern)
return node_info
def gen_json(node_info):
# scan from .py file
node_files, node_dirs = get_nodes(temp_dir)
comfyui_path = os.path.abspath(os.path.join(temp_dir, "ComfyUI"))
node_dirs.remove(comfyui_path)
node_dirs = [comfyui_path] + node_dirs
data = {}
for dirname in node_dirs:
py_files = get_py_file_paths(dirname)
metadata = {}
nodes = set()
for py in py_files:
nodes_in_file, metadata_in_file = scan_in_file(py, dirname == "ComfyUI")
nodes.update(nodes_in_file)
metadata.update(metadata_in_file)
dirname = os.path.basename(dirname)
if 'Jovimetrix' in dirname:
pass
if len(nodes) > 0 or (dirname in node_info and node_info[dirname][3] is not None):
nodes = list(nodes)
nodes.sort()
if dirname in node_info:
git_url, title, preemptions, node_pattern = node_info[dirname]
metadata['title_aux'] = title
if preemptions is not None:
metadata['preemptions'] = preemptions
if node_pattern is not None:
metadata['nodename_pattern'] = node_pattern
data[git_url] = (nodes, metadata)
else:
print(f"WARN: {dirname} is removed from custom-node-list.json")
for file in node_files:
nodes, metadata = scan_in_file(file)
if len(nodes) > 0 or (dirname in node_info and node_info[dirname][3] is not None):
nodes = list(nodes)
nodes.sort()
file = os.path.basename(file)
if file in node_info:
url, title, preemptions, node_pattern = node_info[file]
metadata['title_aux'] = title
if preemptions is not None:
metadata['preemptions'] = preemptions
if node_pattern is not None:
metadata['nodename_pattern'] = node_pattern
data[url] = (nodes, metadata)
else:
print(f"Missing info: {file}")
# scan from node_list.json file
extensions = [name for name in os.listdir(temp_dir) if os.path.isdir(os.path.join(temp_dir, name))]
for extension in extensions:
node_list_json_path = os.path.join(temp_dir, extension, 'node_list.json')
if os.path.exists(node_list_json_path):
git_url, title, preemptions, node_pattern = node_info[extension]
with open(node_list_json_path, 'r', encoding='utf-8') as f:
try:
node_list_json = json.load(f)
except Exception as e:
print(f"\nERROR: Invalid json format '{node_list_json_path}'")
print("------------------------------------------------------")
print(e)
print("------------------------------------------------------")
node_list_json = {}
metadata_in_url = {}
if git_url not in data:
nodes = set()
else:
nodes_in_url, metadata_in_url = data[git_url]
nodes = set(nodes_in_url)
for x, desc in node_list_json.items():
nodes.add(x.strip())
metadata_in_url['title_aux'] = title
if preemptions is not None:
metadata['preemptions'] = preemptions
if node_pattern is not None:
metadata_in_url['nodename_pattern'] = node_pattern
nodes = list(nodes)
nodes.sort()
data[git_url] = (nodes, metadata_in_url)
json_path = f"extension-node-map.json"
with open(json_path, "w", encoding='utf-8') as file:
json.dump(data, file, indent=4, sort_keys=True)
print("### ComfyUI Manager Node Scanner ###")
print("\n# Updating extensions\n")
updated_node_info = update_custom_nodes()
print("\n# 'extension-node-map.json' file is generated.\n")
gen_json(updated_node_info)
print("\nDONE.\n")