| | from huggingface_hub import login, HfApi |
| | import json |
| | import argparse |
| | import os |
| |
|
| | def upload_json_to_hf(token, repo_id, file_path, file_name): |
| | |
| | login(token) |
| | |
| | |
| | api = HfApi() |
| | |
| | |
| | try: |
| | api.upload_file( |
| | path_or_fileobj=file_path, |
| | path_in_repo=file_name, |
| | repo_id=repo_id, |
| | repo_type="dataset" |
| | ) |
| | print(f"Successfully uploaded {file_name} to {repo_id}") |
| | except Exception as e: |
| | print(f"Error uploading file: {str(e)}") |
| | raise |
| |
|
| | def upload_folder_to_hf(token, repo_id, folder_path): |
| | |
| | files = [] |
| | for root, _, filenames in os.walk(folder_path): |
| | for filename in filenames: |
| | if filename.endswith('.json'): |
| | file_path = os.path.join(root, filename) |
| | |
| | relative_path = os.path.relpath(file_path, folder_path) |
| | files.append((file_path, relative_path)) |
| |
|
| | |
| | for file_path, relative_path in files: |
| | try: |
| | |
| | with open(file_path, 'r') as f: |
| | json.load(f) |
| | |
| | |
| | upload_json_to_hf(token, repo_id, file_path, relative_path) |
| | except json.JSONDecodeError: |
| | print(f"Skipping invalid JSON file: {file_path}") |
| | except Exception as e: |
| | print(f"Error processing {file_path}: {str(e)}") |
| |
|
| | def main(): |
| | parser = argparse.ArgumentParser(description='Upload JSON files to Hugging Face') |
| | |
| | |
| | parser.add_argument( |
| | '--token', |
| | type=str, |
| | help='Hugging Face access token (or set HUGGINGFACE_TOKEN env variable)', |
| | default=os.getenv('HUGGINGFACE_TOKEN') |
| | ) |
| | |
| | parser.add_argument( |
| | '--repo-id', |
| | type=str, |
| | required=True, |
| | help='Repository ID (format: username/repo-name)' |
| | ) |
| | |
| | parser.add_argument( |
| | '--folder-path', |
| | type=str, |
| | required=True, |
| | help='Path to the folder containing JSON files' |
| | ) |
| |
|
| | |
| | args = parser.parse_args() |
| |
|
| | |
| | if not args.token: |
| | raise ValueError("Please provide a token either via --token or HUGGINGFACE_TOKEN environment variable") |
| |
|
| | |
| | if not os.path.exists(args.folder_path): |
| | raise FileNotFoundError(f"Folder not found: {args.folder_path}") |
| | |
| | if not os.path.isdir(args.folder_path): |
| | raise NotADirectoryError(f"Path is not a directory: {args.folder_path}") |
| |
|
| | |
| | upload_folder_to_hf(args.token, args.repo_id, args.folder_path) |
| |
|
| | if __name__ == "__main__": |
| | main() |