| import os |
| import json |
| import SimpleITK as sitk |
| import glob |
| import pandas as pd |
|
|
| def load_dicom_images(folder_path): |
| reader = sitk.ImageSeriesReader() |
| dicom_names = reader.GetGDCMSeriesFileNames(folder_path) |
| reader.SetFileNames(dicom_names) |
| image = reader.Execute() |
| return image |
|
|
| def convert_windows_to_linux_path(windows_path): |
| |
| |
| linux_path = windows_path.replace('\\', '/') |
| if ':' in linux_path: |
| linux_path = linux_path.split(':', 1)[1] |
| return linux_path |
|
|
| |
| |
| |
|
|
| def read_table(file_path, split_str=';'): |
| try: |
| df = pd.read_excel(file_path, engine='openpyxl') |
| except: |
| df = pd.read_csv(file_path, sep=split_str) |
| return df |
|
|
| def load_nifti(image_path): |
| return sitk.ReadImage(image_path) |
|
|
| def save_nifti(image, output_path, folder_path): |
| output_dirpath = os.path.dirname(output_path) |
| if not os.path.exists(output_dirpath): |
| print(f"Creating directory {output_dirpath}") |
| os.makedirs(output_dirpath) |
| |
| image.SetMetaData("FolderPath", folder_path) |
| sitk.WriteImage(image, output_path) |
|
|
| def find_metadata_files(path, file_name='*meta*'): |
| |
| search_pattern = os.path.join(path, '**', file_name) |
| return glob.glob(search_pattern, recursive=True) |
|
|
| def get_img_path_from_folder(folder_path, img_type='.nii.gz', include_str=None, exclude_str='segmentation', is_sorted=True): |
| img_path = [] |
| for root, dirs, files in os.walk(folder_path): |
| for file in files: |
| if file.endswith(img_type) and (include_str is None or include_str in file) and (exclude_str is None or exclude_str not in file): |
| img_path.append(os.path.join(root, file)) |
| if is_sorted: |
| img_path.sort() |
| return img_path |
|
|
| def get_unisize_resampler(ref_img, interpolator='linear', spacing=None, size=None): |
| ''' |
| Resample the image to have isotropic spacing, following the steps: |
| 1. Find the minimum spacing |
| 2. Resample the image to have the minimum spacing |
| 3. Set the interpolator (linear for images, nearest for segmentation masks) |
| 4. Set the output spacing |
| 5. Return the resampler for resampling |
| For example, if the input image has spacing [0.1, 0.1, 0.3], the output image will have spacing [0.1, 0.1, 0.1] |
| ''' |
| |
| if size is None: |
| size = ref_img.GetSize() |
| if spacing is None: |
| spacing = ref_img.GetSpacing() |
| min_spacing = min(spacing) |
| if all([spc == min_spacing for spc in spacing]): |
| return None |
| else: |
| |
| if interpolator == 'nearest': |
| interpolator = sitk.sitkNearestNeighbor |
| elif interpolator == 'linear': |
| interpolator = sitk.sitkLinear |
| resampler = sitk.ResampleImageFilter() |
| |
| |
| new_size = [int(round(old_sz * old_spc / min_spacing)) for old_sz, old_spc in zip(size, spacing)] |
| new_size_xy=[new_size[0],new_size[1],new_size[2]] |
| |
| new_size_spacing=[min_spacing,min_spacing,min_spacing] |
| |
| |
| |
| resampler.SetSize(new_size_xy) |
| resampler.SetOutputSpacing(new_size_spacing) |
|
|
| |
| resampler.SetOutputOrigin(ref_img.GetOrigin()) |
| resampler.SetOutputDirection(ref_img.GetDirection()) |
| resampler.SetInterpolator(interpolator) |
| resampler.SetDefaultPixelValue(ref_img.GetPixelIDValue()) |
| resampler.SetOutputPixelType(ref_img.GetPixelID()) |
| return resampler |
|
|
| def clamp_image(in_img,clamp_range): |
| ''' |
| Clamp the image to the specified range |
| ''' |
| clamp_filter = sitk.ClampImageFilter() |
| clamp_filter.SetLowerBound(clamp_range[0]) |
| clamp_filter.SetUpperBound(clamp_range[1]) |
| return clamp_filter.Execute(in_img) |
|
|
| def get_synonyms_dict(dict_type='ROI'): |
| ''' |
| Get the dictionary of synonyms for the specified dictionary type |
| ''' |
| if dict_type == 'ROI': |
| dict_synonyms = { |
| 'whole-body': ['whole-body', 'whole body', 'wholebody', 'whole body', 'whole-body', 'whole body', 'wholebody','polytrauma','head-neck-thorax-abdomen-pelvis-leg','head-neck-thorax-abdomen-pelvis'], |
| 'neck-thorax-abdomen-pelvis-leg': ['neck-thorax-abdomen-pelvis-leg','neck-thx-abd-pelvis-leg', 'angiography neck-thx-abd-pelvis-leg', 'neck thorax abdomen pelvis leg', 'neck and thorax and abdomen and pelvis and leg', 'neck, thorax, abdomen, pelvis & leg', 'neck/thorax/abdomen/pelvis/leg', 'neck, thorax, abdomen, pelvis and leg', 'neck thorax abdomen pelvis leg'], |
| 'neck-thorax-abdomen-pelvis': ['neck-thorax-abdomen-pelvis', 'neck-thx-abd-pelvis', 'neck thorax abdomen pelvis', 'neck and thorax and abdomen and pelvis', 'neck, thorax, abdomen & pelvis', 'neck/thorax/abdomen/pelvis', 'neck, thorax, abdomen and pelvis', 'neck thorax abdomen & pelvis'], |
| 'thorax-abdomen-pelvis-leg': ['thorax-abdomen-pelvis-leg','thx-abd-pelvis-leg', 'angiography thx-abd-pelvis-leg', 'thorax abdomen pelvis leg', 'thorax and abdomen and pelvis and leg', 'thorax, abdomen, pelvis & leg', 'thorax/abdomen/pelvis/leg', 'thorax, abdomen, pelvis and leg', 'thorax abdomen pelvis leg'], |
| 'neck-thorax-abdomen': ['neck-thorax-abdomen', 'neck-thorax-abdomen', 'neck thorax abdomen', 'neck and thorax and abdomen', 'neck, thorax, abdomen', 'neck/thorax/abdomen', 'neck, thorax, abdomen', 'neck thorax abdomen'], |
| 'head-neck-thorax-abdomen': ['head-neck-thorax-abdomen', 'head-neck-thorax-abdomen', 'head neck thorax abdomen', 'head and neck and thorax and abdomen', 'head, neck, thorax, abdomen', 'head/thorax/abdomen', 'head, thorax, abdomen', 'head thorax abdomen'], |
| 'head-neck-thorax': ['head-neck-thorax', 'head neck thorax', 'head and neck and thorax', 'head, neck, thorax', 'head/thorax', 'head, thorax', 'head thorax'], |
| 'thorax-abdomen-pelvis': ['thorax-abdomen-pelvis', 'thx-abd-pelvis', 'polytrauma', 'thorax abdomen pelvis', 'thorax and abdomen and pelvis', 'thorax, abdomen & pelvis', 'thorax/abdomen/pelvis', 'thorax, abdomen and pelvis', 'thorax abdomen & pelvis'], |
| 'abdomen-pelvis-leg': ['abdomen-pelvis-leg', 'angiography abdomen-pelvis-leg', 'abd-pelvis-leg', 'abdomen pelvis leg', 'abdomen and pelvis and leg', 'abdomen, pelvis & leg', 'abdomen/pelvis/leg', 'abdomen, pelvis, leg', 'abdomen pelvis leg'], |
| 'neck-thorax': ['neck-thorax', 'neck thorax', 'neck and thorax', 'neck, thorax', 'thorax-neck', 'thorax neck', 'thorax and neck', 'thorax, neck','thorax/neck'], |
| 'thorax-abdomen': ['thorax-abdomen', 'thorax abdomen', 'thorax and abdomen', 'thorax, abdomen'], |
| 'abdomen-pelvis': ['abdomen-pelvis', 'abdomen pelvis', 'abdomen and pelvis', 'abdomen & pelvis', 'abdomen/pelvis', 'abdomen-pelvis', 'abdomen pelvis', 'abdomen and pelvis', 'abdomen & pelvis', 'abdomen/pelvis'], |
| 'pelvis-leg': ['pelvis-leg', 'pelvis leg', 'pelvis and leg', 'pelvis, leg', 'pelvis/leg', 'pelvis-leg', 'pelvis leg', 'pelvis and leg', 'pelvis, leg', 'pelvis/leg'], |
| 'head-neck': ['head-neck', 'head neck', 'head and neck', 'head, neck', 'head/neck', 'head-neck', 'head neck', 'head and neck', 'head, neck', 'head/neck'], |
| 'abdomen': ['abdomen', 'abdominal', 'belly', 'stomach', 'tummy', 'gut', 'guts', 'viscera', 'bowels', 'intestines', 'gastrointestinal', 'digestive', 'peritoneum','gastric', 'liver', 'spleen', 'pancreas','kidney','lumbar','renal','hepatic','splenic','pancreatic','intervention'], |
| 'thorax': ['chest', 'thorax', 'breast', 'lung', 'heart','heart-thorakale aorta', 'heart-thorakale', 'mediastinum', 'pleura', 'bronchus', 'bronchi', 'trachea', 'esophagus', 'diaphragm', 'rib', 'sternum', 'clavicle', 'scapula', 'axilla', 'armpit','breast biopsy','thoracic','mammary','caeiothoracic','mediastinal','pleural','bronchial','bronchial tree','tracheal','esophageal','diaphragmatic','costal','sternal','clavicular','scapular','axillary','axillar','cardiac','pericardial','pericardiac','pericardium'], |
| 'head': ['head', 'headbasis', 'brain', 'skull', 'face','nose','ear','eye','mouth','jaw','cheek','chin','forehead','temporal','parietal','occipital','frontal','mandible','maxilla','mandibular','maxillary','nasal','orbital','orbita','ocular','auricular','otic','oral','buccal','labial','lingual','palatal'], |
| 'neck': ['neck', 'throat', 'cervical', 'thyroid', 'trachea', 'larynx', 'pharynx', 'esophagus','pharyngeal','laryngeal','cervical','thyroid','trachea','esophagus','carotid','jugular'], |
| 'hand': ['hand', 'finger', 'thumb', 'palm', 'wrist', 'knuckle', 'fingernail', 'phalanx', 'metacarpal', 'carpal', 'radius'], |
| 'arm': ['arm', 'forearm', 'upper arm', 'bicep', 'tricep', 'brachium', 'brachial', 'humerus', 'radius', 'ulna', 'elbow', 'shoulder', 'armpit''clavicle', 'scapula', 'acromion', 'acromioclavicular'], |
| 'leg': ['leg', 'felsenleg','thigh', 'calf', 'shin', 'knee', 'foot', 'ankle', 'toe', 'heel', 'sole', 'arch', 'instep', 'metatarsal', 'phalanx', 'tibia', 'fibula', 'femur', 'patella', 'kneecap','achilles tendon','achilles'], |
| 'pelvis': ['pelvis', 'hip', 'groin', 'buttock', 'gluteus', 'gluteal', 'ischium', 'pubis', 'sacrum', 'coccyx', 'acetabulum', 'iliac', 'iliac crest', 'iliac spine', 'iliac wing', 'sacroiliac', 'sacroiliac joint', 'sacroiliac ligament', 'sacroiliac spine', 'ureter', 'bladder', 'urethra', 'prostate', 'testicle', 'ovary', 'uterus',], |
| 'skeleton': ['skeleton','bone','spine', 'back', 'vertebra', 'sacrum', 'coccyx'], |
| } |
| elif dict_type == 'Label_tissue': |
| dict_synonyms = { |
| 'liver': ['liver','hepatic'], |
| 'spleen': ['spleen','splenic'], |
| 'kidney': ['kidney','renal'], |
| 'pancreas': ['pancreas','pancreatic'], |
| 'stomach': ['stomach','gastric'], |
| 'intestine': ['large intestine', 'small intestine','large bowel','small bowel'], |
| 'gallbladder': ['gallbladder'], |
| 'adrenal_gland': ['adrenal_gland','adrenal gland'], |
| 'bladder': ['bladder'], |
| 'prostate': ['prostate'], |
| 'uterus': ['uterus'], |
| 'ovary': ['ovary'], |
| 'testicle': ['testicle'], |
| 'lymph_node': ['lymph_node','lymph node'], |
| 'bone': ['bone'], |
| 'lung': ['lung'], |
| 'heart': ['heart'], |
| 'esophagus': ['esophagus'], |
| 'muscle': ['muscle'], |
| 'fat': ['fat'], |
| 'skin': ['skin'], |
| 'vessel': ['vessel'], |
| 'tumor': ['tumor'], |
| 'other': ['other'] |
| } |
| elif dict_type == 'Task': |
| dict_synonyms = { |
| 'segmentation': ['segmentation', 'seg', 'mask'], |
| 'classification': ['classification', 'class', 'diagnosis','identify','identification'], |
| 'localization': ['localization', 'locate', 'location', 'position'], |
| 'registration': ['registration', 'register', 'align', 'alignment'], |
| 'detection': ['detection', 'detect', 'find', 'locate'], |
| 'quantification': ['quantification', 'quantify', 'measure', 'measurement'], |
| } |
| elif dict_type == 'Modality': |
| dict_synonyms = { |
| 'CT': ['CT', 'computed tomography'], |
| 'MRI': ['MRI', 'MR', 'magnetic resonance imaging'], |
| 'PET': ['PET', 'positron emission tomography'], |
| 'US': ['US', 'ultrasound'], |
| 'X-ray': ['X-ray', 'radiography'], |
| 'SPECT': ['SPECT', 'single-photon emission computed tomlogy'], |
| } |
| else: |
| raise ValueError(f"dict_type {dict_type} is not valid") |
| return dict_synonyms |
|
|
| def replace_synonyms(text, dict_synonyms): |
| ''' |
| Replace the synonyms in the text with the standard term |
| ''' |
| if isinstance(text,str): |
| for key, value in dict_synonyms.items(): |
| for v in value: |
| if v.lower() in text.lower(): |
| return key |
| Warning(f"Value {text} is not in the correct format") |
| elif isinstance(text,list): |
| text = [replace_synonyms(t, dict_synonyms) for t in text] |
| elif isinstance(text,dict): |
| for key in text.keys(): |
| |
| text[key] = replace_synonyms(text[key], dict_synonyms) |
| |
| for k in dict_synonyms.keys(): |
| text[dict_synonyms[k]] = text.pop(key) |
| return text |
|
|
| |
|
|
| class meta_data(object): |
| ''' |
| This class is used to store the metadata of the dataset |
| ''' |
| def __init__(self): |
| self.config_format_path = os.path.join(os.path.dirname(__file__),'config_format.json') |
| with open(self.config_format_path, 'r') as file: |
| self.config_format = json.load(file) |
| self.config = {} |
| for key in self.config_format.keys(): |
| if self.config_format[key]['required'] == True: |
| self.config[key] = {} |
| self.keytypes = self.find_all_keys_with_type() |
| self.keytypes_flatten = self.flatten_json() |
| self.ambiguity_keys = ['ROI', 'Label_tissue', 'Task', 'Modality'] |
| for key in self.ambiguity_keys: |
| ambiguity_dict = get_synonyms_dict(key) |
| self.config_format[key]['options'] = list(ambiguity_dict.keys()) |
|
|
| def get_ketytypes(self): |
| return self.keytypes |
| |
| def get_keytypes_flatten(self): |
| return self.keytypes_flatten |
| |
| def find_all_keys_with_type(self, data=None, parent_key=''): |
| if data is None: |
| data = self.config_format |
| keys_with_type = {} |
| if isinstance(data, dict): |
| for key, value in data.items(): |
| full_key = f"{parent_key}.{key}" if parent_key else key |
| if isinstance(value, dict) and 'type' in value: |
| keys_with_type[full_key] = value['type'] |
| keys_with_type.update(self.find_all_keys_with_type(value, full_key)) |
| elif isinstance(data, list): |
| for index, item in enumerate(data): |
| full_key = f"{parent_key}[{index}]" |
| keys_with_type.update(self.find_all_keys_with_type(item, full_key)) |
| return keys_with_type |
|
|
| def flatten_json(self, data=None, parent_key='', sep='.'): |
| if data is None: |
| data = self.config_format |
| items = {} |
| if isinstance(data, dict): |
| for key, value in data.items(): |
| new_key = f"{parent_key}{sep}{key}" if parent_key else key |
| if isinstance(value, dict): |
| items.update(self.flatten_json(value, new_key, sep=sep)) |
| elif isinstance(value, list): |
| for i, item in enumerate(value): |
| items.update(self.flatten_json(item, f"{new_key}[{i}]", sep=sep)) |
| else: |
| items[new_key] = value |
| elif isinstance(data, list): |
| for i, item in enumerate(data): |
| items.update(self.flatten_json(item, f"{parent_key}[{i}]", sep=sep)) |
| return items |
|
|
| def req_check(self): |
| self.unfilled_keys = [] |
| for key in self.config.keys(): |
| if self.config[key] == {}: |
| self.unfilled_keys.append(key) |
| if len(self.unfilled_keys) == 0: |
| return True |
| else: |
| return False |
| |
| def type_check(self, key, value): |
| if key not in self.config_format.keys(): |
| print(key, "is not a valid key") |
| return False |
| |
| if key == 'Modality': |
| if value not in self.config_format[key]['options']: |
| return False |
| else: |
| return True |
| |
| elif key == 'OriImg_path': |
| if isinstance(value, str): |
| return True |
| else: |
| return False |
| |
| elif key == 'Label_path' and isinstance(value, dict): |
| for skey in value.keys(): |
| if skey in self.config_format[key]['keys']: |
| for kk in value[skey]: |
| if isinstance(value[skey][kk],str): |
| pass |
| |
| |
| |
| |
| |
| else: |
| return False |
| return True |
| |
| elif key == 'ROI': |
| if value not in self.config_format[key]['options']: |
| return False |
| else: |
| return True |
| |
| elif key == 'Label_tissue' and isinstance(value, list): |
| for i in value: |
| if i not in self.config_format[key]['items']['options']: |
| return False |
| return True |
| |
| elif key =='Task' and isinstance(value, list): |
| for i in value: |
| if i not in self.config_format[key]['items']['options']: |
| return False |
| return True |
| |
| elif key == 'Spacing_mm': |
| if isinstance(value, float): |
| return True |
| else: |
| False |
| |
| |
| elif key == 'Size' and isinstance(value, list) and len(value) >= 3 : |
| return all(isinstance(item, int) for item in value) |
| |
| elif key == 'Dataset_name': |
| if isinstance(value, str): |
| return True |
| else: |
| return False |
| |
| elif key == 'Sub_modality': |
| |
| if isinstance(value, dict): |
| return True |
| else: |
| return False |
| elif key == 'Label_Dict': |
| |
| if isinstance(value, dict): |
| return True |
| else: |
| return False |
| def add_extra_keyvalue(self, key, value): |
| self.config[key] = value |
| return True |
|
|
| def add_keyvalue(self, key, value): |
| if key in self.ambiguity_keys: |
| value = replace_synonyms(value, get_synonyms_dict(key)) |
| |
| if self.type_check(key, value): |
| |
| self.config[key] = value |
| return True |
| else: |
| Warning(f"Value {value} is not in the correct format for key {key}") |
| pass |
| |
|
|
| def get_meta_data(self): |
| if self.req_check(): |
| return self.config |
| else: |
| print("Not all required keys are filled", self.unfilled_keys) |
| return False |
|
|
|
|
|
|
| if __name__ == '__main__': |
| meta = meta_data() |
| print(meta.get_keytypes_flatten()) |
| print(meta.get_ketytypes()) |
| meta.add_keyvalue('Modality', 'CT') |
| meta.add_keyvalue('OriImg_path', 'C:/Users/jzheng/Desktop/CT') |
| meta.add_keyvalue('Label_path', {'ROI': {'1': 'C:/Users/jzheng/Desktop/CT/1'}, 'Tissue': {'1': 'C:/Users/jzheng/Desktop/CT/1'}}) |
| meta.add_keyvalue('Spacing_mm', 1.5) |
| meta.add_keyvalue('Size', [512, 512, 100]) |
| meta.add_keyvalue('Dataset_name', 'CT') |
| meta.add_keyvalue('Label_tissue', ['1', '2', '3']) |
| meta.add_keyvalue('Task', ['1', '2', '3']) |
| print(meta.get_meta_data()) |
| meta.add_extra_key('extra', 'extra') |
| print(meta.get_meta_data()) |
| print(meta.get_ketytypes()) |
| print(meta.get_keytypes_flatten) |
|
|
| org_data_foler_path = '/home/jachin/data/Github/data/data_gen_def/DATASETS/TotalSegmentorCT_MRI/TS_CT' |
| img_paths = get_img_path_from_folder(org_data_foler_path, img_type='.nii.gz', include_str='ct', exclude_str='segmentation') |
| print(img_paths) |