import os from huggingface_hub import HfApi import time from concurrent.futures import ThreadPoolExecutor from tqdm import tqdm def upload_single_file(args): file_path, path_in_repo, repo_id, max_retries = args api = HfApi() for attempt in range(max_retries): try: api.upload_file( path_or_fileobj=file_path, path_in_repo=path_in_repo, repo_id=repo_id, repo_type="model" ) return True, path_in_repo, None except Exception as e: if attempt < max_retries - 1: time.sleep(10 * (attempt + 1)) else: return False, path_in_repo, str(e) def upload_with_retry(folder_path, repo_id, max_retries=10, max_workers=4): # 获取所有文件 files = [] for root, _, filenames in os.walk(folder_path): for filename in filenames: if not any(pattern in filename for pattern in [".git"]): full_path = os.path.join(root, filename) relative_path = os.path.relpath(full_path, folder_path) files.append((full_path, relative_path)) print(f"Found {len(files)} files to upload") # 准备上传参数 upload_args = [ (file_path, path_in_repo, repo_id, max_retries) for file_path, path_in_repo in files ] # 并行上传 failed_files = [] with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = list(tqdm( executor.map(upload_single_file, upload_args), total=len(files), desc="Uploading files" )) # 处理结果 for success, file_name, error in futures: if not success: failed_files.append((file_name, error)) # 报告结果 print(f"\nUpload completed!") print(f"Successfully uploaded: {len(files) - len(failed_files)} files") if failed_files: print("\nFailed uploads:") for file_name, error in failed_files: print(f"- {file_name}: {error}") # 使用 upload_with_retry( ".", "Neph0s/CoSER-Llama-3.1-70B", max_retries=10, max_workers=10 # 同时上传4个文件 )