Facepalm0 commited on
Commit
22f4d72
·
verified ·
1 Parent(s): cdf5f1c

Upload upload_huggingface.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. upload_huggingface.py +107 -0
upload_huggingface.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import HfApi
2
+ import os
3
+ from pathlib import Path
4
+ from dotenv import load_dotenv
5
+ import time
6
+ from requests.exceptions import ConnectionError, HTTPError
7
+ from pathspec import PathSpec
8
+ from pathspec.patterns import GitWildMatchPattern
9
+
10
+ def get_gitignore_patterns():
11
+ """读取 .gitignore 文件中的规则"""
12
+ if os.path.exists('.gitignore'):
13
+ with open('.gitignore', 'r', encoding='utf-8') as f:
14
+ return f.read().splitlines()
15
+ return []
16
+
17
+ def upload_to_huggingface(
18
+ local_directory: str,
19
+ repo_id: str,
20
+ max_retries: int = 3,
21
+ retry_delay: int = 5
22
+ ):
23
+ """
24
+ 上传整个目录到 Hugging Face
25
+
26
+ Args:
27
+ local_directory: 本地项目目录路径
28
+ repo_id: Hugging Face 仓库ID (格式: username/repo_name)
29
+ max_retries: 最大重试次数
30
+ retry_delay: 重试间隔(秒)
31
+ """
32
+ # 加载环境变量
33
+ load_dotenv()
34
+ token = os.getenv("HUGGINGFACE_TOKEN")
35
+
36
+ if not token:
37
+ raise ValueError("请在 .env 文件中设置 HUGGINGFACE_TOKEN")
38
+
39
+ # 读取 .gitignore 规则
40
+ gitignore_patterns = get_gitignore_patterns()
41
+ # 添加一些额外的忽略规则
42
+ additional_patterns = [
43
+ "VectorScience - 方案介绍 - 策略部分.docx",
44
+ ".git/",
45
+ "__pycache/",
46
+ "*.pyc",
47
+ ".env",
48
+ ".gitignore",
49
+ ".DS_Store",
50
+ "ewv9ssdcuvg6",
51
+ ".docx",
52
+ "wandb/",
53
+ "jk_zfls/"
54
+ ]
55
+ gitignore_patterns.extend(additional_patterns)
56
+
57
+ # 创建 PathSpec 对象来匹配文件
58
+ spec = PathSpec.from_lines(GitWildMatchPattern, gitignore_patterns)
59
+
60
+ # 初始化 Hugging Face API
61
+ api = HfApi()
62
+
63
+ # 获取所有要上传的文件
64
+ files_to_upload = []
65
+ for root, _, files in os.walk(local_directory):
66
+ for file in files:
67
+ file_path = Path(root) / file
68
+ relative_path = file_path.relative_to(local_directory)
69
+
70
+ # 使用 PathSpec 检查文件是否应该被忽略
71
+ if not spec.match_file(str(relative_path)):
72
+ files_to_upload.append((str(file_path), str(relative_path)))
73
+
74
+ # 上传文件
75
+ for local_path, path_in_repo in files_to_upload:
76
+ for attempt in range(max_retries):
77
+ try:
78
+ print(f"正在上传: {local_path} -> {path_in_repo}")
79
+ api.upload_file(
80
+ path_or_fileobj=local_path,
81
+ path_in_repo=path_in_repo,
82
+ repo_id=repo_id,
83
+ token=token
84
+ )
85
+ print(f"成功上传: {path_in_repo}")
86
+ break
87
+ except (ConnectionError, HTTPError) as e:
88
+ if attempt < max_retries - 1:
89
+ print(f"上传失败,{retry_delay}秒后重试... ({attempt + 1}/{max_retries})")
90
+ print(f"错误信息: {str(e)}")
91
+ time.sleep(retry_delay)
92
+ else:
93
+ print(f"上传失败: {path_in_repo}")
94
+ print(f"错误信息: {str(e)}")
95
+ raise
96
+
97
+ if __name__ == "__main__":
98
+ # 设置环境变量以使用代理
99
+ os.environ["HTTPS_PROXY"] = "http://127.0.0.1:17890"
100
+
101
+ # 使用示例
102
+ upload_to_huggingface(
103
+ local_directory=".", # 当前目录
104
+ repo_id="Facepalm0/Ubiquant_CharacterHunter", # 替换为你的仓库ID
105
+ max_retries=5, # 最大重试5次
106
+ retry_delay=5 # 每次重试间隔5秒
107
+ )