from huggingface_hub import login, HfApi import json import argparse import os def upload_json_to_hf(token, repo_id, file_path, file_name): # Login to Hugging Face login(token) # Initialize the API api = HfApi() # Upload the file try: api.upload_file( path_or_fileobj=file_path, path_in_repo=file_name, repo_id=repo_id, repo_type="dataset" ) print(f"Successfully uploaded {file_name} to {repo_id}") except Exception as e: print(f"Error uploading file: {str(e)}") raise def upload_folder_to_hf(token, repo_id, folder_path): # Get all files in the folder files = [] for root, _, filenames in os.walk(folder_path): for filename in filenames: if filename.endswith('.json'): # Only process JSON files file_path = os.path.join(root, filename) # Get relative path for the file in the repository relative_path = os.path.relpath(file_path, folder_path) files.append((file_path, relative_path)) # Upload each file for file_path, relative_path in files: try: # Validate JSON format with open(file_path, 'r') as f: json.load(f) # Upload file upload_json_to_hf(token, repo_id, file_path, relative_path) except json.JSONDecodeError: print(f"Skipping invalid JSON file: {file_path}") except Exception as e: print(f"Error processing {file_path}: {str(e)}") def main(): parser = argparse.ArgumentParser(description='Upload JSON files to Hugging Face') # Add arguments parser.add_argument( '--token', type=str, help='Hugging Face access token (or set HUGGINGFACE_TOKEN env variable)', default=os.getenv('HUGGINGFACE_TOKEN') ) parser.add_argument( '--repo-id', type=str, required=True, help='Repository ID (format: username/repo-name)' ) parser.add_argument( '--folder-path', type=str, required=True, help='Path to the folder containing JSON files' ) # Parse arguments args = parser.parse_args() # Validate token if not args.token: raise ValueError("Please provide a token either via --token or HUGGINGFACE_TOKEN environment variable") # Validate folder exists if not os.path.exists(args.folder_path): raise FileNotFoundError(f"Folder not found: {args.folder_path}") if not os.path.isdir(args.folder_path): raise NotADirectoryError(f"Path is not a directory: {args.folder_path}") # Upload files upload_folder_to_hf(args.token, args.repo_id, args.folder_path) if __name__ == "__main__": main()