RefurnishAI

Sleeping

App Files Files Community

RefurnishAI / custom_nodes /ComfyUI-Manager /scanner.py

multimodalart HF staff

Squashing commit

4450790 verified 24 days ago

raw

history blame

17.6 kB

	import ast
	import re
	import os
	import json
	from git import Repo
	import concurrent
	import datetime
	import concurrent.futures
	import requests

	builtin_nodes = set()

	import sys

	from urllib.parse import urlparse
	from github import Github


	def download_url(url, dest_folder, filename=None):
	# Ensure the destination folder exists
	if not os.path.exists(dest_folder):
	os.makedirs(dest_folder)

	# Extract filename from URL if not provided
	if filename is None:
	filename = os.path.basename(url)

	# Full path to save the file
	dest_path = os.path.join(dest_folder, filename)

	# Download the file
	response = requests.get(url, stream=True)
	if response.status_code == 200:
	with open(dest_path, 'wb') as file:
	for chunk in response.iter_content(chunk_size=1024):
	if chunk:
	file.write(chunk)
	else:
	raise Exception(f"Failed to download file from {url}")


	# prepare temp dir
	if len(sys.argv) > 1:
	temp_dir = sys.argv[1]
	else:
	temp_dir = os.path.join(os.getcwd(), ".tmp")

	if not os.path.exists(temp_dir):
	os.makedirs(temp_dir)


	skip_update = '--skip-update' in sys.argv or '--skip-all' in sys.argv
	skip_stat_update = '--skip-stat-update' in sys.argv or '--skip-all' in sys.argv

	if not skip_stat_update:
	g = Github(os.environ.get('GITHUB_TOKEN'))
	else:
	g = None


	print(f"TEMP DIR: {temp_dir}")


	parse_cnt = 0


	def extract_nodes(code_text):
	global parse_cnt

	try:
	if parse_cnt % 100 == 0:
	print(f".", end="", flush=True)
	parse_cnt += 1

	code_text = re.sub(r'\\[^"\']', '', code_text)
	parsed_code = ast.parse(code_text)

	assignments = (node for node in parsed_code.body if isinstance(node, ast.Assign))

	for assignment in assignments:
	if isinstance(assignment.targets[0], ast.Name) and assignment.targets[0].id in ['NODE_CONFIG', 'NODE_CLASS_MAPPINGS']:
	node_class_mappings = assignment.value
	break
	else:
	node_class_mappings = None

	if node_class_mappings:
	s = set()

	for key in node_class_mappings.keys:
	if key is not None and isinstance(key.value, str):
	s.add(key.value.strip())

	return s
	else:
	return set()
	except:
	return set()


	# scan
	def scan_in_file(filename, is_builtin=False):
	global builtin_nodes

	try:
	with open(filename, encoding='utf-8') as file:
	code = file.read()
	except UnicodeDecodeError:
	with open(filename, encoding='cp949') as file:
	code = file.read()

	pattern = r"_CLASS_MAPPINGS\s=\s{([^}]*)}"
	regex = re.compile(pattern, re.MULTILINE \| re.DOTALL)

	nodes = set()
	class_dict = {}

	nodes \|= extract_nodes(code)
	code = re.sub(r'^#.*?$', '', code, flags=re.MULTILINE)

	def extract_keys(pattern, code):
	keys = re.findall(pattern, code)
	return {key.strip() for key in keys}

	def update_nodes(nodes, new_keys):
	nodes \|= new_keys

	patterns = [
	r'^[^=]_CLASS_MAPPINGS\["(.?)"\]',
	r'^[^=]_CLASS_MAPPINGS\[\'(.?)\'\]',
	r'@register_node$"(.+)",\s*\".+"$',
	r'"(\w+)"\s:\s{"class":\s\w+\s'
	]

	with concurrent.futures.ThreadPoolExecutor() as executor:
	futures = {executor.submit(extract_keys, pattern, code): pattern for pattern in patterns}
	for future in concurrent.futures.as_completed(futures):
	update_nodes(nodes, future.result())

	matches = regex.findall(code)
	for match in matches:
	dict_text = match

	key_value_pairs = re.findall(r"\"([^\"])\"\s:\s([^,\n])", dict_text)
	for key, value in key_value_pairs:
	class_dict[key.strip()] = value.strip()

	key_value_pairs = re.findall(r"'([^'])'\s:\s([^,\n])", dict_text)
	for key, value in key_value_pairs:
	class_dict[key.strip()] = value.strip()

	for key, value in class_dict.items():
	nodes.add(key.strip())

	update_pattern = r"_CLASS_MAPPINGS.update\s${([^}])}$"
	update_match = re.search(update_pattern, code)
	if update_match:
	update_dict_text = update_match.group(1)
	update_key_value_pairs = re.findall(r"\"([^\"])\"\s:\s([^,\n])", update_dict_text)
	for key, value in update_key_value_pairs:
	class_dict[key.strip()] = value.strip()
	nodes.add(key.strip())

	metadata = {}
	lines = code.strip().split('\n')
	for line in lines:
	if line.startswith('@'):
	if line.startswith("@author:") or line.startswith("@title:") or line.startswith("@nickname:") or line.startswith("@description:"):
	key, value = line[1:].strip().split(':', 1)
	metadata[key.strip()] = value.strip()

	if is_builtin:
	builtin_nodes += set(nodes)
	else:
	for x in builtin_nodes:
	if x in nodes:
	nodes.remove(x)

	return nodes, metadata


	def get_py_file_paths(dirname):
	file_paths = []

	for root, dirs, files in os.walk(dirname):
	if ".git" in root or "__pycache__" in root:
	continue

	for file in files:
	if file.endswith(".py"):
	file_path = os.path.join(root, file)
	file_paths.append(file_path)

	return file_paths


	def get_nodes(target_dir):
	py_files = []
	directories = []

	for item in os.listdir(target_dir):
	if ".git" in item or "__pycache__" in item:
	continue

	path = os.path.abspath(os.path.join(target_dir, item))

	if os.path.isfile(path) and item.endswith(".py"):
	py_files.append(path)
	elif os.path.isdir(path):
	directories.append(path)

	return py_files, directories


	def get_git_urls_from_json(json_file):
	with open(json_file, encoding='utf-8') as file:
	data = json.load(file)

	custom_nodes = data.get('custom_nodes', [])
	git_clone_files = []
	for node in custom_nodes:
	if node.get('install_type') == 'git-clone':
	files = node.get('files', [])
	if files:
	git_clone_files.append((files[0], node.get('title'), node.get('preemptions'), node.get('nodename_pattern')))

	git_clone_files.append(("https://github.com/comfyanonymous/ComfyUI", "ComfyUI", None, None))

	return git_clone_files


	def get_py_urls_from_json(json_file):
	with open(json_file, encoding='utf-8') as file:
	data = json.load(file)

	custom_nodes = data.get('custom_nodes', [])
	py_files = []
	for node in custom_nodes:
	if node.get('install_type') == 'copy':
	files = node.get('files', [])
	if files:
	py_files.append((files[0], node.get('title'), node.get('preemptions'), node.get('nodename_pattern')))

	return py_files


	def clone_or_pull_git_repository(git_url):
	repo_name = git_url.split("/")[-1].split(".")[0]
	repo_dir = os.path.join(temp_dir, repo_name)

	if os.path.exists(repo_dir):
	try:
	repo = Repo(repo_dir)
	origin = repo.remote(name="origin")
	origin.pull()
	repo.git.submodule('update', '--init', '--recursive')
	print(f"Pulling {repo_name}...")
	except Exception as e:
	print(f"Pulling {repo_name} failed: {e}")
	else:
	try:
	Repo.clone_from(git_url, repo_dir, recursive=True)
	print(f"Cloning {repo_name}...")
	except Exception as e:
	print(f"Cloning {repo_name} failed: {e}")


	def update_custom_nodes():
	if not os.path.exists(temp_dir):
	os.makedirs(temp_dir)

	node_info = {}

	git_url_titles_preemptions = get_git_urls_from_json('custom-node-list.json')

	def process_git_url_title(url, title, preemptions, node_pattern):
	name = os.path.basename(url)
	if name.endswith(".git"):
	name = name[:-4]

	node_info[name] = (url, title, preemptions, node_pattern)
	if not skip_update:
	clone_or_pull_git_repository(url)

	def process_git_stats(git_url_titles_preemptions):
	GITHUB_STATS_CACHE_FILENAME = 'github-stats-cache.json'
	GITHUB_STATS_FILENAME = 'github-stats.json'

	github_stats = {}
	try:
	with open(GITHUB_STATS_CACHE_FILENAME, 'r', encoding='utf-8') as file:
	github_stats = json.load(file)
	except FileNotFoundError:
	pass

	def is_rate_limit_exceeded():
	return g.rate_limiting[0] == 0

	if is_rate_limit_exceeded():
	print(f"GitHub API Rate Limit Exceeded: remained - {(g.rate_limiting_resettime - datetime.datetime.now().timestamp())/60:.2f} min")
	else:
	def renew_stat(url):
	if is_rate_limit_exceeded():
	return

	if 'github.com' not in url:
	return None

	print('.', end="")
	sys.stdout.flush()
	try:
	# Parsing the URL
	parsed_url = urlparse(url)
	domain = parsed_url.netloc
	path = parsed_url.path
	path_parts = path.strip("/").split("/")
	if len(path_parts) >= 2 and domain == "github.com":
	owner_repo = "/".join(path_parts[-2:])
	repo = g.get_repo(owner_repo)
	owner = repo.owner
	now = datetime.datetime.now(datetime.timezone.utc)
	author_time_diff = now - owner.created_at

	last_update = repo.pushed_at.strftime("%Y-%m-%d %H:%M:%S") if repo.pushed_at else 'N/A'
	item = {
	"stars": repo.stargazers_count,
	"last_update": last_update,
	"cached_time": now.timestamp(),
	"author_account_age_days": author_time_diff.days,
	}
	return url, item
	else:
	print(f"\nInvalid URL format for GitHub repository: {url}\n")
	except Exception as e:
	print(f"\nERROR on {url}\n{e}")

	return None

	# resolve unresolved urls
	with concurrent.futures.ThreadPoolExecutor(11) as executor:
	futures = []
	for url, title, preemptions, node_pattern in git_url_titles_preemptions:
	if url not in github_stats:
	futures.append(executor.submit(renew_stat, url))

	for future in concurrent.futures.as_completed(futures):
	url_item = future.result()
	if url_item is not None:
	url, item = url_item
	github_stats[url] = item

	# renew outdated cache
	outdated_urls = []
	for k, v in github_stats.items():
	elapsed = (datetime.datetime.now().timestamp() - v['cached_time'])
	if elapsed > 606012: # 12 hours
	outdated_urls.append(k)

	with concurrent.futures.ThreadPoolExecutor(11) as executor:
	for url in outdated_urls:
	futures.append(executor.submit(renew_stat, url))

	for future in concurrent.futures.as_completed(futures):
	url_item = future.result()
	if url_item is not None:
	url, item = url_item
	github_stats[url] = item

	with open('github-stats-cache.json', 'w', encoding='utf-8') as file:
	json.dump(github_stats, file, ensure_ascii=False, indent=4)

	with open(GITHUB_STATS_FILENAME, 'w', encoding='utf-8') as file:
	for v in github_stats.values():
	if "cached_time" in v:
	del v["cached_time"]

	github_stats = dict(sorted(github_stats.items()))

	json.dump(github_stats, file, ensure_ascii=False, indent=4)

	print(f"Successfully written to {GITHUB_STATS_FILENAME}.")

	if not skip_stat_update:
	process_git_stats(git_url_titles_preemptions)

	with concurrent.futures.ThreadPoolExecutor(11) as executor:
	for url, title, preemptions, node_pattern in git_url_titles_preemptions:
	executor.submit(process_git_url_title, url, title, preemptions, node_pattern)

	py_url_titles_and_pattern = get_py_urls_from_json('custom-node-list.json')

	def download_and_store_info(url_title_preemptions_and_pattern):
	url, title, preemptions, node_pattern = url_title_preemptions_and_pattern
	name = os.path.basename(url)
	if name.endswith(".py"):
	node_info[name] = (url, title, preemptions, node_pattern)

	try:
	download_url(url, temp_dir)
	except:
	print(f"[ERROR] Cannot download '{url}'")

	with concurrent.futures.ThreadPoolExecutor(10) as executor:
	executor.map(download_and_store_info, py_url_titles_and_pattern)

	return node_info


	def gen_json(node_info):
	# scan from .py file
	node_files, node_dirs = get_nodes(temp_dir)

	comfyui_path = os.path.abspath(os.path.join(temp_dir, "ComfyUI"))
	node_dirs.remove(comfyui_path)
	node_dirs = [comfyui_path] + node_dirs

	data = {}
	for dirname in node_dirs:
	py_files = get_py_file_paths(dirname)
	metadata = {}

	nodes = set()
	for py in py_files:
	nodes_in_file, metadata_in_file = scan_in_file(py, dirname == "ComfyUI")
	nodes.update(nodes_in_file)
	metadata.update(metadata_in_file)

	dirname = os.path.basename(dirname)

	if 'Jovimetrix' in dirname:
	pass

	if len(nodes) > 0 or (dirname in node_info and node_info[dirname][3] is not None):
	nodes = list(nodes)
	nodes.sort()

	if dirname in node_info:
	git_url, title, preemptions, node_pattern = node_info[dirname]

	metadata['title_aux'] = title

	if preemptions is not None:
	metadata['preemptions'] = preemptions

	if node_pattern is not None:
	metadata['nodename_pattern'] = node_pattern

	data[git_url] = (nodes, metadata)
	else:
	print(f"WARN: {dirname} is removed from custom-node-list.json")

	for file in node_files:
	nodes, metadata = scan_in_file(file)

	if len(nodes) > 0 or (dirname in node_info and node_info[dirname][3] is not None):
	nodes = list(nodes)
	nodes.sort()

	file = os.path.basename(file)

	if file in node_info:
	url, title, preemptions, node_pattern = node_info[file]
	metadata['title_aux'] = title

	if preemptions is not None:
	metadata['preemptions'] = preemptions

	if node_pattern is not None:
	metadata['nodename_pattern'] = node_pattern

	data[url] = (nodes, metadata)
	else:
	print(f"Missing info: {file}")

	# scan from node_list.json file
	extensions = [name for name in os.listdir(temp_dir) if os.path.isdir(os.path.join(temp_dir, name))]

	for extension in extensions:
	node_list_json_path = os.path.join(temp_dir, extension, 'node_list.json')
	if os.path.exists(node_list_json_path):
	git_url, title, preemptions, node_pattern = node_info[extension]

	with open(node_list_json_path, 'r', encoding='utf-8') as f:
	try:
	node_list_json = json.load(f)
	except Exception as e:
	print(f"\nERROR: Invalid json format '{node_list_json_path}'")
	print("------------------------------------------------------")
	print(e)
	print("------------------------------------------------------")
	node_list_json = {}

	metadata_in_url = {}
	if git_url not in data:
	nodes = set()
	else:
	nodes_in_url, metadata_in_url = data[git_url]
	nodes = set(nodes_in_url)

	for x, desc in node_list_json.items():
	nodes.add(x.strip())

	metadata_in_url['title_aux'] = title

	if preemptions is not None:
	metadata['preemptions'] = preemptions

	if node_pattern is not None:
	metadata_in_url['nodename_pattern'] = node_pattern

	nodes = list(nodes)
	nodes.sort()
	data[git_url] = (nodes, metadata_in_url)

	json_path = f"extension-node-map.json"
	with open(json_path, "w", encoding='utf-8') as file:
	json.dump(data, file, indent=4, sort_keys=True)


	print("### ComfyUI Manager Node Scanner ###")

	print("\n# Updating extensions\n")
	updated_node_info = update_custom_nodes()

	print("\n# 'extension-node-map.json' file is generated.\n")
	gen_json(updated_node_info)

	print("\nDONE.\n")