| |
| import os |
| import pandas as pd |
| from datasets import load_dataset |
|
|
| def row_to_dict(row, split_name): |
| return { |
| "image_uid": row["id"], |
| "age": int(row["metadata"]["age"]), |
| "sex": 1 if row["metadata"]["sex"].lower() == "male" else 2, |
| "image_path": os.path.abspath(row["nii_filepath"]), |
| "split": split_name |
| } |
|
|
| def main(): |
| |
| ds_train = load_dataset("radiata-ai/brain-structure", split="train", trust_remote_code=True) |
| ds_val = load_dataset("radiata-ai/brain-structure", split="validation", trust_remote_code=True) |
| ds_test = load_dataset("radiata-ai/brain-structure", split="test", trust_remote_code=True) |
|
|
| rows = [] |
|
|
| |
| for data_row in ds_train: |
| rows.append(row_to_dict(data_row, "train")) |
| for data_row in ds_val: |
| rows.append(row_to_dict(data_row, "validation")) |
| for data_row in ds_test: |
| rows.append(row_to_dict(data_row, "test")) |
|
|
| |
| df = pd.DataFrame(rows) |
| output_csv = "inputs.csv" |
| df.to_csv(output_csv, index=False) |
| print(f"CSV file created: {output_csv}") |
|
|
| if __name__ == "__main__": |
| main() |
|
|
|
|