File size: 12,266 Bytes
da9fb1e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 | #coding:utf-8
'''
write by ygq
create on 2025-09-04
OASIS(Open Access Series of Imaging Studies) 是一个旨在向科研界免费提供脑部MRI数据的项目。本横断面(Cross-Sectional)数据集是其第一个版本,发布于2007年。
OASIS-1 是横断面的,意味着它无法捕捉个体随时间的动态变化。对于研究疾病进展,后续的 OASIS-2 和 OASIS-3(纵向数据集)是更好的选择。
1. 目录与文件命名规则
根目录下按受试者会话ID建立文件夹。
受试者ID格式:OAS1_xxxx (例如 OAS1_0012)
会话ID格式:OAS1_xxxx_MRy (例如 OAS1_0012_MR1,y代表第几次访问成像)
OAS1_xxxx_MRy/
│
├── OAS1_xxxx_MRy.xml # 包含采集细节和解剖指标的XML元数据文件
├── OAS1_xxxx_MRy.txt # 与XML内容相同的文本格式文件(便于查看)
├── RAW/ # 存储原始扫描图像(DICOM或Analyze格式)
├── PROCESSED/ # 预处理后的图像
│ ├── SUBJ_111/ # 原始空间下的平均配准图像(各向同性1mm³)
│ └── T88_111/ # 图谱配准空间下的图像
│ ├── t4_files/ # 存储配准变换矩阵文件
│ └── ... # 配准后的图像文件
└── FSL_SEG/ # 基于图谱配准图像生成的脑组织分割结果(灰质2/白质3/脑脊液1)
所有图像均以 Analyze 7.5格式 存储,包含:
一个图像文件(.img)
一个头文件(.hdr)
使用 16位大端序(big-endian) 存储
OAS1_xxxx_MRy_mpr-z_anon 单次原始扫描 256x256x128 1x1x1.25 mm 矢状位
OAS1_xxxx_MRy_mpr_ni_anon_sbj_111 多次扫描平均配准图像 256x256x160 1x1x1 mm 矢状位
OAS1_xxxx_MRy_mpr_ni_anon_111_t88_gfc 增益场校正后的图谱配准图像 176x208x176 1x1x1 mm 横断位
OAS1_xxxx_MRy_mpr_ni_anon_111_t88_masked_gfc 去除非脑组织的掩模图像 176x208x176 1x1x1 mm 横断位
OAS1_xxxx_MRy_mpr_ni_anon_111_t88_masked_gfc_fseg 脑组织分割图像(灰/白/CSF) 176x208x176 1x1x1 mm 横断位
1. 人口统计学信息
性别(M/F)
用手习惯(Hand)(均为右利手)
年龄(Age)
教育程度(Educ)(1-5级)
社会经济地位(SES)
2. 临床评估
MMSE(简易精神状态检查)
CDR(临床痴呆评级:0=正常,0.5=非常轻微,1=轻度,2=中度)
3. 衍生解剖指标
eTIV:估计颅内容积
ASF:图谱缩放因子
nWBV:标准化全脑体积
'''
import os
import glob,re
import pandas as pd
import SimpleITK as sitk
import argparse
import json
from tqdm import tqdm
from util import meta_data
import util
import numpy as np
# from bert_helper import *
import shutil
##dataset_meta
import warnings
warnings.filterwarnings("ignore")
meta_id_name='ID'
##性别(M/F),用手习惯(Hand)(均为右利手),年龄(Age),教育程度(Educ)(1-5级),社会经济地位(SES),MMSE(简易精神状态检查),CDR(临床痴呆评级:0=正常,0.5=非常轻微,1=轻度,2=中度),eTIV:估计颅内容积,ASF:图谱缩放因子,nWBV:标准化全脑体积
META_COLUMN=['ID', 'M/F', 'Hand', 'Age', 'Educ', 'SES', 'MMSE', 'CDR', 'eTIV','nWBV', 'ASF', 'Delay']
TASK_VALUE="segmentation"
CLAMP_RANGE_CT = [-300,300]
CLAMP_RANGE_MRI = None # MRI images threshold placeholder TBC...
TARGET_VOXEL_SPACING=None
# def find_metadata_files(path):
# # for Cancer Image Archive (TCIA) dataset
# search_pattern = os.path.join(path, '**', 'metadata.csv')
# return glob.glob(search_pattern, recursive=True)
def find_metadata_files(path):
# for Cancer Image Archive (TCIA) dataset
search_pattern = os.path.join(path, '*.csv')
return glob.glob(search_pattern, recursive=True)
##added by yanguoqing on 20250527
def find_image_dirs(path):
return os.listdir(path)
##modify by yanguoqing on 20250527
def load_dicom_images(folder_path):
reader = sitk.ImageSeriesReader()
dicom_names = reader.GetGDCMSeriesFileNames(folder_path)
reader.SetFileNames(dicom_names)
image = reader.Execute()
return dicom_names,image
##added by yanguoqing on 20250527
def load_dicom_tag(imgs):
reader = sitk.ImageFileReader()
# dicom_names = reader.GetGDCMSeriesFileNames(folder_path)
reader.SetFileName(imgs)
reader.ReadImageInformation() # 仅读取元信息,不加载像素数据
# metadata_keys = reader.GetMetaDataKeys()
tag=reader.Execute()
return tag
def load_nrrd(fp):
return sitk.ReadImage(fp)
##modify by yanguoqing on 20250904
def load_raw_images(series_files):
'''
每个病例包含3到4种RAW的单次平扫MR
将多个分开的模态合并,构建第四个维度的数组,分别按照MPR-1,MPR-2...顺序存放
'''
reader = sitk.ImageSeriesReader()
reader.SetFileNames(series_files)
image = reader.Execute()
return image
def save_nifti(image, output_path, folder_path):
# Set metadata in the NIfTI file's header
output_dirpath = os.path.dirname(output_path)
if not os.path.exists(output_dirpath):
print(f"Creating directory {output_dirpath}")
os.makedirs(output_dirpath)
# Set metadata in the NIfTI file's header
image.SetMetaData("FolderPath", folder_path)
sitk.WriteImage(image, output_path)
##modify by yanguoqing on 20250527
def convert_windows_to_linux_path(windows_path):
# Replace backslashes with forward slashes and remove the drive letter
# Some meta files have windows paths, but the data is stored on a linux server
linux_path = windows_path.replace('\\', '/')
if ':' in linux_path:
linux_path = linux_path.split(':', 1)[1]
return linux_path
def main(target_path, output_dir):
pid_dirs=find_image_dirs(target_path)
failed_files = []
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
json_output_path = os.path.join(output_dir, 'nifti_mappings.json')
failed_files_path = os.path.join(output_dir, 'failed_files.json')
meta = meta_data()
# Initialize the JSON file
if not os.path.exists(json_output_path):
with open(json_output_path, 'w') as json_file:
json.dump({}, json_file)
##方便处理解析信息,转成csv文件
meta_file=os.path.join(os.path.dirname(os.path.realpath(__file__)),'oasis_cross-sectional-5708aa0a98d82080.csv')
meta_file_ori=os.path.join(target_path,'oasis_cross-sectional-5708aa0a98d82080.xlsx')
if os.path.isfile(meta_file):
mf_flag=True
df_meta=pd.read_csv(meta_file,sep=',')
else:
mf_flag=False
if pid_dirs:
for pid_dir in tqdm(pid_dirs, desc="Processing pid dirs"):
if not os.path.isdir(os.path.join(target_path,pid_dir)):
continue
##遍历所有目录下的病例数据
image_dirs=find_image_dirs(os.path.join(target_path,pid_dir))
for data_dir in tqdm(image_dirs, desc="Processing images files"):
##data_dir即id
full_path=os.path.join(target_path,pid_dir,data_dir)
modality="MRI"
study='OASIS_1'##Dataset_name
CIA_other_info = {'metadata_file':''}
CIA_other_info['split'] = "train"
CIA_other_info['metadata_file']=meta_file_ori
data_info_row=df_meta[df_meta[meta_id_name]==data_dir]
if data_info_row.shape[0]>0:
data_info_row=data_info_row.reset_index()
#print(data_info_row[meta_id_name])
for keyname in META_COLUMN[1:]:
CIA_other_info[keyname]=str(data_info_row[keyname][0])
CIA_other_info['Image_id']=data_dir
else:
meta_image_id=data_dir
for keyname in META_COLUMN[1:]:
CIA_other_info[keyname]=''
try:
##读取原始的RAW目录下多次单扫img
#\RAW\OAS1_0001_MR1_mpr-1_anon.img
series_files=glob.glob("%s/RAW/%s_mpr-*.img"%(full_path,data_dir))
series_files.sort()
if len(series_files)>0:
##存在有效的MRI影像数据进行后续处理
sitk_img_original=load_raw_images(series_files)
submodality=[re.search(r"mpr-\d{1}",os.path.basename(fp)).group(0) for fp in series_files]
sub_modality_dict={}
for idx,value in enumerate(submodality):
sub_modality_dict[idx]=value
meta.add_keyvalue('Sub_modality',sub_modality_dict)
else:
print("病例数据%s为空"%data_dir)
continue
original_spacing = list(sitk_img_original.GetSpacing())
original_size = list(sitk_img_original.GetSize())
meta.add_keyvalue('Spacing_mm',min(original_spacing))
meta.add_keyvalue('OriImg_path',",".join(series_files))
meta.add_keyvalue('Size',original_size) # 这里用处理后的size -- YH Jachin
meta.add_keyvalue('Modality',modality)
meta.add_keyvalue('Dataset_name',study)
meta.add_keyvalue('ROI','head')
output_image_file = os.path.join(output_dir,data_dir, f"{data_dir}.nii.gz")
# output_path=convert_windows_to_linux_path(output_path)
##
save_nifti(sitk_img_original, output_image_file, full_path)
print(f"Saved NIfTI file to {output_image_file}")
##Label processing
except Exception as e:
print(e)
failed_files.append(data_dir)
print(f"Failed to load BRATS images from {data_dir}")
continue
meta.add_extra_keyvalue('Metadata',CIA_other_info)
# Write the mapping to the JSON file on the fly
with open(json_output_path, 'r+') as json_file:
existing_mappings = json.load(json_file)
existing_mappings[output_image_file] = meta.get_meta_data()
json_file.seek(0)
# print(existing_mappings)
json.dump(existing_mappings, json_file, indent=4)
json_file.truncate()
# else:
# print("No metadata.csv files found.")
with open(failed_files_path, "w") as json_file:
json.dump(failed_files, json_file)
print(f"The list has been written to {failed_files_path}")
print(f"Saved NIfTI mappings to {json_output_path}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process DICOM files and save as NIfTI.")
parser.add_argument("--target_path", type=str, help="Path to the target directory containing metadata files.", default="/home/data/Github/data/data_gen_def/DATASETS/OASIS/OASIS_1/oasis_cs_sectional/")
parser.add_argument("--output_dir", type=str, help="Directory to save the NIfTI files.", default="/home/data/Github/data/data_gen_def/DATASETS_processed/OASIS/OASIS_1/CS_SECTIONAL_RAW")
args = parser.parse_args()
print(args.target_path, args.output_dir)
main(args.target_path, args.output_dir) |