|
|
| dataset: |
| add_dim_keys: |
| test: !!python/tuple |
| - drift_at_observations |
| train: !!python/tuple |
| - drift_at_observations |
| validation: !!python/tuple |
| - drift_at_observations |
| add_paths_keys: |
| test: !!python/tuple |
| - drift_at_observations |
| train: !!python/tuple |
| - drift_at_observations |
| validation: !!python/tuple |
| - drift_at_observations |
| batch_size: |
| test: 32 |
| train: 64 |
| validation: 32 |
| data_dirs: |
| test: !!python/tuple |
| - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/test/test_deg_3 |
| - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/test/test_deg_2 |
| - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/test/test_deg_1 |
| train: !!python/tuple |
| - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/train/train_deg_3 |
| - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/train/train_deg_2 |
| - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/train/train_deg_1 |
| validation: !!python/tuple |
| - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/validation/val_deg_3 |
| - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/validation/val_deg_2 |
| - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/validation/val_deg_1 |
| dataset_name: |
| test: HeterogeneousFIMSDEDataset |
| train: StreamingFIMSDEDataset |
| validation: StreamingFIMSDEDataset |
| files_to_load: |
| drift_at_locations: drift_at_locations.h5 |
| drift_at_observations: drift_at_observations.h5 |
| locations: locations.h5 |
| obs_mask: obs_mask.h5 |
| obs_times: obs_times.h5 |
| obs_values: obs_values.h5 |
| max_dim: 3 |
| name: FIMSDEDataloaderIterableDataset |
| num_locations: |
| test: null |
| train: 2000 |
| validation: 10000 |
| num_observations: |
| test: null |
| train: !!python/tuple |
| - 0 |
| - 1801 |
| validation: !!python/tuple |
| - 1799 |
| - 1801 |
| num_workers: |
| test: 0 |
| train: 7 |
| validation: 5 |
| shard: |
| test: false |
| train: true |
| validation: true |
| shuffle_elements: true |
| shuffle_locations: |
| test: false |
| train: true |
| validation: true |
| shuffle_paths: true |
|
|
| distributed: |
| activation_chekpoint: false |
| checkpoint_type: full_state |
| enabled: true |
| min_num_params: 1e5 |
| sharding_strategy: NO_SHARD |
| wrap_policy: SIZE_BAZED |
|
|
| experiment: |
| device_map: cuda |
| name: big_model_l1_600k_examples |
| name_add_date: true |
| seed: 10 |
|
|
| model: |
| model_config: |
| attention_map: softmax |
| attention_method: linear |
| dim_embed: 256 |
| dim_feedforward: 1024 |
| dim_ffn_u_model: 1024 |
| dim_hidden_u_model: 256 |
| dim_max_trajectory: 3 |
| dropout: 0.1 |
| num_context_encoder_layers: 2 |
| num_heads: 8 |
| num_res_layer_u_model: 6 |
| num_res_layers_functional_decoder: 8 |
| use_bias_for_projection: true |
| use_bias_in_attention: true |
| use_query_residual_in_attention: true |
| model_type: TrainingWrapper |
| train_config: |
| corruption_model_type: odeformer |
| loss_filter_nans: true |
| loss_type: l1 |
| max_sigma_trajectory_noise: 0.06 |
| max_subsampling_ration: 0.5 |
| train_type: vector_field |
| train_with_normalized_head: true |
|
|
| optimizers: !!python/tuple |
| - optimizer_d: |
| gradient_norm_clipping: 10 |
| lr: 1.0e-05 |
| name: torch.optim.AdamW |
| weight_decay: 0.0001 |
|
|
| trainer: |
| best_metric: loss |
| debug_iterations: null |
| detect_anomaly: false |
| epochs: 2500 |
| experiment_dir: ./results/ |
| gradient_accumulation_steps: 1 |
| logging_format: RANK_%(rank)s - %(asctime)s - %(name)s - %(levelname)s - %(message)s |
| name: Trainer |
| precision: bf16mixed |
| save_every: 1 |
| schedulers: !!python/tuple |
| - beta: 1.0 |
| label: drift_loss_scale |
| name: fim.utils.param_scheduler.ConstantScheduler |
|
|