| import argparse |
| import os |
| import alibabacloud_oss_v2 as oss |
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="批量上传JSONL文件到OSS") |
| |
| |
| parser.add_argument('--region', required=True, help='OSS存储空间所在区域') |
| parser.add_argument('--bucket', required=True, help='目标存储空间名称') |
| parser.add_argument('--key', required=True, help='OSS目标文件夹路径(如:/data)') |
| parser.add_argument('--file_path', required=True, help='本地包含JSONL文件的文件夹路径') |
| |
| |
| parser.add_argument('--endpoint', help='自定义访问端点') |
| |
| args = parser.parse_args() |
|
|
| |
| if not os.path.isdir(args.file_path): |
| raise ValueError(f"无效的目录路径: {args.file_path}") |
|
|
| |
| jsonl_files = [] |
| for filename in os.listdir(args.file_path): |
| if filename.endswith('.jsonl'): |
| full_path = os.path.join(args.file_path, filename) |
| if os.path.isfile(full_path): |
| jsonl_files.append((full_path, filename)) |
|
|
| if not jsonl_files: |
| print("未找到任何JSONL文件") |
| return |
|
|
| |
| credentials_provider = oss.credentials.EnvironmentVariableCredentialsProvider() |
| cfg = oss.config.load_default() |
| cfg.credentials_provider = credentials_provider |
| cfg.region = args.region |
| if args.endpoint: |
| cfg.endpoint = args.endpoint |
|
|
| |
| client = oss.Client(cfg) |
| uploader = client.uploader() |
|
|
| |
| base_key = args.key.rstrip('/') |
| |
| |
| for local_path, filename in jsonl_files: |
| oss_key = f"{base_key}/{filename}" if base_key else filename |
| |
| try: |
| result = uploader.upload_file( |
| oss.PutObjectRequest( |
| bucket=args.bucket, |
| key=oss_key, |
| ), |
| filepath=local_path |
| ) |
| |
| |
| print(f" 成功上传 {filename}") |
| print(f" OSS路径: {oss_key}") |
| print(f" 状态码: {result.status_code}") |
| print(f" 请求ID: {result.request_id}") |
| print(f" ETag: {result.etag}\n") |
| |
| except Exception as e: |
| print(f" 上传失败 {filename}") |
| print(f" 错误信息: {str(e)}\n") |
|
|
| if __name__ == "__main__": |
| main() |