|
|
| '''
|
| write by ygq
|
| create on 2025-09-04
|
|
|
| OASIS(Open Access Series of Imaging Studies) 是一个旨在向科研界免费提供脑部MRI数据的项目。本横断面(Cross-Sectional)数据集是其第一个版本,发布于2007年。
|
| OASIS-1 是横断面的,意味着它无法捕捉个体随时间的动态变化。对于研究疾病进展,后续的 OASIS-2 和 OASIS-3(纵向数据集)是更好的选择。
|
|
|
| 1. 目录与文件命名规则
|
| 根目录下按受试者会话ID建立文件夹。
|
| 受试者ID格式:OAS1_xxxx (例如 OAS1_0012)
|
| 会话ID格式:OAS1_xxxx_MRy (例如 OAS1_0012_MR1,y代表第几次访问成像)
|
| OAS1_xxxx_MRy/
|
| │
|
| ├── OAS1_xxxx_MRy.xml # 包含采集细节和解剖指标的XML元数据文件
|
| ├── OAS1_xxxx_MRy.txt # 与XML内容相同的文本格式文件(便于查看)
|
| ├── RAW/ # 存储原始扫描图像(DICOM或Analyze格式)
|
| ├── PROCESSED/ # 预处理后的图像
|
| │ ├── SUBJ_111/ # 原始空间下的平均配准图像(各向同性1mm³)
|
| │ └── T88_111/ # 图谱配准空间下的图像
|
| │ ├── t4_files/ # 存储配准变换矩阵文件
|
| │ └── ... # 配准后的图像文件
|
| └── FSL_SEG/ # 基于图谱配准图像生成的脑组织分割结果(灰质2/白质3/脑脊液1)
|
|
|
|
|
| 所有图像均以 Analyze 7.5格式 存储,包含:
|
| 一个图像文件(.img)
|
| 一个头文件(.hdr)
|
| 使用 16位大端序(big-endian) 存储
|
|
|
| OAS1_xxxx_MRy_mpr-z_anon 单次原始扫描 256x256x128 1x1x1.25 mm 矢状位
|
| OAS1_xxxx_MRy_mpr_ni_anon_sbj_111 多次扫描平均配准图像 256x256x160 1x1x1 mm 矢状位
|
| OAS1_xxxx_MRy_mpr_ni_anon_111_t88_gfc 增益场校正后的图谱配准图像 176x208x176 1x1x1 mm 横断位
|
| OAS1_xxxx_MRy_mpr_ni_anon_111_t88_masked_gfc 去除非脑组织的掩模图像 176x208x176 1x1x1 mm 横断位
|
| OAS1_xxxx_MRy_mpr_ni_anon_111_t88_masked_gfc_fseg 脑组织分割图像(灰/白/CSF) 176x208x176 1x1x1 mm 横断位
|
|
|
| 1. 人口统计学信息
|
| 性别(M/F)
|
| 用手习惯(Hand)(均为右利手)
|
| 年龄(Age)
|
| 教育程度(Educ)(1-5级)
|
| 社会经济地位(SES)
|
|
|
| 2. 临床评估
|
| MMSE(简易精神状态检查)
|
| CDR(临床痴呆评级:0=正常,0.5=非常轻微,1=轻度,2=中度)
|
|
|
| 3. 衍生解剖指标
|
| eTIV:估计颅内容积
|
| ASF:图谱缩放因子
|
| nWBV:标准化全脑体积
|
| '''
|
| import os
|
| import glob,re
|
| import pandas as pd
|
| import SimpleITK as sitk
|
| import argparse
|
| import json
|
| from tqdm import tqdm
|
| from util import meta_data
|
| import util
|
| import numpy as np
|
|
|
|
|
| import shutil
|
|
|
| import warnings
|
| warnings.filterwarnings("ignore")
|
| meta_id_name='ID'
|
|
|
| META_COLUMN=['ID', 'M/F', 'Hand', 'Age', 'Educ', 'SES', 'MMSE', 'CDR', 'eTIV','nWBV', 'ASF', 'Delay']
|
|
|
|
|
| TASK_VALUE="segmentation"
|
| CLAMP_RANGE_CT = [-300,300]
|
| CLAMP_RANGE_MRI = None
|
| TARGET_VOXEL_SPACING=None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| def find_metadata_files(path):
|
|
|
| search_pattern = os.path.join(path, '*.csv')
|
| return glob.glob(search_pattern, recursive=True)
|
|
|
| def find_image_dirs(path):
|
| return os.listdir(path)
|
|
|
|
|
| def load_dicom_images(folder_path):
|
| reader = sitk.ImageSeriesReader()
|
| dicom_names = reader.GetGDCMSeriesFileNames(folder_path)
|
| reader.SetFileNames(dicom_names)
|
| image = reader.Execute()
|
| return dicom_names,image
|
|
|
|
|
| def load_dicom_tag(imgs):
|
| reader = sitk.ImageFileReader()
|
|
|
| reader.SetFileName(imgs)
|
| reader.ReadImageInformation()
|
|
|
| tag=reader.Execute()
|
| return tag
|
|
|
| def load_nrrd(fp):
|
| return sitk.ReadImage(fp)
|
|
|
|
|
| def load_raw_images(series_files):
|
| '''
|
| 每个病例包含3到4种RAW的单次平扫MR
|
| 将多个分开的模态合并,构建第四个维度的数组,分别按照MPR-1,MPR-2...顺序存放
|
| '''
|
| reader = sitk.ImageSeriesReader()
|
| reader.SetFileNames(series_files)
|
| image = reader.Execute()
|
| return image
|
|
|
| def save_nifti(image, output_path, folder_path):
|
|
|
| output_dirpath = os.path.dirname(output_path)
|
| if not os.path.exists(output_dirpath):
|
| print(f"Creating directory {output_dirpath}")
|
| os.makedirs(output_dirpath)
|
|
|
| image.SetMetaData("FolderPath", folder_path)
|
| sitk.WriteImage(image, output_path)
|
|
|
|
|
| def convert_windows_to_linux_path(windows_path):
|
|
|
|
|
| linux_path = windows_path.replace('\\', '/')
|
| if ':' in linux_path:
|
| linux_path = linux_path.split(':', 1)[1]
|
| return linux_path
|
|
|
| def main(target_path, output_dir):
|
| pid_dirs=find_image_dirs(target_path)
|
| failed_files = []
|
| if not os.path.isdir(output_dir):
|
| os.makedirs(output_dir)
|
| json_output_path = os.path.join(output_dir, 'nifti_mappings.json')
|
| failed_files_path = os.path.join(output_dir, 'failed_files.json')
|
| meta = meta_data()
|
|
|
|
|
| if not os.path.exists(json_output_path):
|
| with open(json_output_path, 'w') as json_file:
|
| json.dump({}, json_file)
|
|
|
| meta_file=os.path.join(os.path.dirname(os.path.realpath(__file__)),'oasis_cross-sectional-5708aa0a98d82080.csv')
|
| meta_file_ori=os.path.join(target_path,'oasis_cross-sectional-5708aa0a98d82080.xlsx')
|
| if os.path.isfile(meta_file):
|
| mf_flag=True
|
| df_meta=pd.read_csv(meta_file,sep=',')
|
| else:
|
| mf_flag=False
|
|
|
|
|
| if pid_dirs:
|
| for pid_dir in tqdm(pid_dirs, desc="Processing pid dirs"):
|
| if not os.path.isdir(os.path.join(target_path,pid_dir)):
|
| continue
|
|
|
|
|
| image_dirs=find_image_dirs(os.path.join(target_path,pid_dir))
|
|
|
| for data_dir in tqdm(image_dirs, desc="Processing images files"):
|
|
|
| full_path=os.path.join(target_path,pid_dir,data_dir)
|
|
|
| modality="MRI"
|
| study='OASIS_1'
|
| CIA_other_info = {'metadata_file':''}
|
| CIA_other_info['split'] = "train"
|
| CIA_other_info['metadata_file']=meta_file_ori
|
| data_info_row=df_meta[df_meta[meta_id_name]==data_dir]
|
|
|
| if data_info_row.shape[0]>0:
|
| data_info_row=data_info_row.reset_index()
|
|
|
| for keyname in META_COLUMN[1:]:
|
| CIA_other_info[keyname]=str(data_info_row[keyname][0])
|
|
|
| CIA_other_info['Image_id']=data_dir
|
|
|
|
|
| else:
|
| meta_image_id=data_dir
|
| for keyname in META_COLUMN[1:]:
|
| CIA_other_info[keyname]=''
|
|
|
|
|
|
|
| try:
|
|
|
|
|
| series_files=glob.glob("%s/RAW/%s_mpr-*.img"%(full_path,data_dir))
|
| series_files.sort()
|
|
|
| if len(series_files)>0:
|
|
|
| sitk_img_original=load_raw_images(series_files)
|
| submodality=[re.search(r"mpr-\d{1}",os.path.basename(fp)).group(0) for fp in series_files]
|
| sub_modality_dict={}
|
| for idx,value in enumerate(submodality):
|
| sub_modality_dict[idx]=value
|
|
|
| meta.add_keyvalue('Sub_modality',sub_modality_dict)
|
|
|
| else:
|
| print("病例数据%s为空"%data_dir)
|
| continue
|
|
|
|
|
| original_spacing = list(sitk_img_original.GetSpacing())
|
| original_size = list(sitk_img_original.GetSize())
|
|
|
|
|
|
|
| meta.add_keyvalue('Spacing_mm',min(original_spacing))
|
| meta.add_keyvalue('OriImg_path',",".join(series_files))
|
| meta.add_keyvalue('Size',original_size)
|
| meta.add_keyvalue('Modality',modality)
|
| meta.add_keyvalue('Dataset_name',study)
|
| meta.add_keyvalue('ROI','head')
|
|
|
|
|
|
|
| output_image_file = os.path.join(output_dir,data_dir, f"{data_dir}.nii.gz")
|
|
|
|
|
| save_nifti(sitk_img_original, output_image_file, full_path)
|
| print(f"Saved NIfTI file to {output_image_file}")
|
|
|
|
|
|
|
|
|
| except Exception as e:
|
| print(e)
|
| failed_files.append(data_dir)
|
| print(f"Failed to load BRATS images from {data_dir}")
|
| continue
|
|
|
|
|
|
|
| meta.add_extra_keyvalue('Metadata',CIA_other_info)
|
|
|
|
|
|
|
| with open(json_output_path, 'r+') as json_file:
|
| existing_mappings = json.load(json_file)
|
| existing_mappings[output_image_file] = meta.get_meta_data()
|
| json_file.seek(0)
|
|
|
| json.dump(existing_mappings, json_file, indent=4)
|
| json_file.truncate()
|
|
|
|
|
|
|
| with open(failed_files_path, "w") as json_file:
|
| json.dump(failed_files, json_file)
|
|
|
| print(f"The list has been written to {failed_files_path}")
|
| print(f"Saved NIfTI mappings to {json_output_path}")
|
|
|
| if __name__ == "__main__":
|
| parser = argparse.ArgumentParser(description="Process DICOM files and save as NIfTI.")
|
| parser.add_argument("--target_path", type=str, help="Path to the target directory containing metadata files.", default="/home/data/Github/data/data_gen_def/DATASETS/OASIS/OASIS_1/oasis_cs_sectional/")
|
| parser.add_argument("--output_dir", type=str, help="Directory to save the NIfTI files.", default="/home/data/Github/data/data_gen_def/DATASETS_processed/OASIS/OASIS_1/CS_SECTIONAL_RAW")
|
| args = parser.parse_args()
|
| print(args.target_path, args.output_dir)
|
| main(args.target_path, args.output_dir) |