| | from .supported_datasets import ( |
| | supported_datasets, |
| | internal_datasets, |
| | possible_with_vector_reps, |
| | standard_data_benchmark, |
| | testing, |
| | ) |
| |
|
| |
|
| | def list_supported_datasets(with_descriptions=True): |
| | """ |
| | Lists all supported datasets with optional descriptions. |
| | |
| | Args: |
| | with_descriptions (bool): Whether to include descriptions (if available) |
| | """ |
| | try: |
| | from .dataset_descriptions import dataset_descriptions |
| | has_descriptions = True |
| | except ImportError: |
| | has_descriptions = False |
| | |
| | if not with_descriptions or not has_descriptions: |
| | print("\n=== Supported Datasets ===\n") |
| | for dataset_name in supported_datasets: |
| | print(f"- {dataset_name}: {supported_datasets[dataset_name]}") |
| | return |
| | |
| | print("\n=== Supported Datasets ===\n") |
| | |
| | |
| | max_name_len = max(len(name) for name in supported_datasets) |
| | max_type_len = max(len(dataset_descriptions.get(name, {}).get('type', 'Unknown')) for name in supported_datasets if name in dataset_descriptions) |
| | max_task_len = max(len(dataset_descriptions.get(name, {}).get('task', 'Unknown')) for name in supported_datasets if name in dataset_descriptions) |
| | |
| | |
| | print(f"{'Dataset':<{max_name_len+2}}{'Type':<{max_type_len+2}}{'Task':<{max_task_len+2}}Description") |
| | print("-" * (max_name_len + max_type_len + max_task_len + 50)) |
| | |
| | |
| | for dataset_name in supported_datasets: |
| | if dataset_name in dataset_descriptions: |
| | dataset_info = dataset_descriptions[dataset_name] |
| | print(f"{dataset_name:<{max_name_len+2}}{dataset_info.get('type', 'Unknown'):<{max_type_len+2}}{dataset_info.get('task', 'Unknown'):<{max_task_len+2}}{dataset_info.get('description', 'No description available')}") |
| | else: |
| | print(f"{dataset_name:<{max_name_len+2}}{'Unknown':<{max_type_len+2}}{'Unknown':<{max_task_len+2}}No description available") |
| | |
| | print("\n=== Standard Benchmark Datasets ===\n") |
| | for dataset_name in standard_data_benchmark: |
| | print(f"- {dataset_name}") |
| |
|
| |
|
| | def get_dataset_info(dataset_name): |
| | """ |
| | Get detailed information about a specific dataset. |
| | |
| | Args: |
| | dataset_name (str): Name of the dataset |
| | |
| | Returns: |
| | dict: Dataset information or None if not found |
| | """ |
| | try: |
| | from .dataset_descriptions import dataset_descriptions |
| | if dataset_name in dataset_descriptions: |
| | return dataset_descriptions[dataset_name] |
| | except ImportError: |
| | pass |
| | |
| | if dataset_name in supported_datasets: |
| | return {"name": dataset_name, "source": supported_datasets[dataset_name]} |
| | |
| | return None |
| |
|
| |
|
| | if __name__ == "__main__": |
| | import sys |
| | import argparse |
| | |
| | parser = argparse.ArgumentParser(description='List and describe supported datasets') |
| | parser.add_argument('--list', action='store_true', help='List all supported datasets') |
| | parser.add_argument('--info', type=str, help='Get information about a specific dataset') |
| | args = parser.parse_args() |
| | |
| | if len(sys.argv) == 1: |
| | parser.print_help() |
| | sys.exit(1) |
| | |
| | if args.list: |
| | list_supported_datasets() |
| | |
| | if args.info: |
| | dataset_info = get_dataset_info(args.info) |
| | if dataset_info: |
| | print(f"\n=== Dataset: {args.info} ===\n") |
| | for key, value in dataset_info.items(): |
| | print(f"{key.capitalize()}: {value}") |
| | else: |
| | print(f"Dataset '{args.info}' not found in supported datasets.") |