dataset: add_dim_keys: test: !!python/tuple - drift_at_observations train: !!python/tuple - drift_at_observations validation: !!python/tuple - drift_at_observations add_paths_keys: test: !!python/tuple - drift_at_observations train: !!python/tuple - drift_at_observations validation: !!python/tuple - drift_at_observations batch_size: test: 32 train: 64 validation: 32 data_dirs: test: !!python/tuple - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/test/test_deg_3 - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/test/test_deg_2 - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/test/test_deg_1 train: !!python/tuple - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/train/train_deg_3 - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/train/train_deg_2 - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/train/train_deg_1 validation: !!python/tuple - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/validation/val_deg_3 - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/validation/val_deg_2 - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/validation/val_deg_1 dataset_name: test: HeterogeneousFIMSDEDataset train: StreamingFIMSDEDataset validation: StreamingFIMSDEDataset files_to_load: drift_at_locations: drift_at_locations.h5 drift_at_observations: drift_at_observations.h5 locations: locations.h5 obs_mask: obs_mask.h5 obs_times: obs_times.h5 obs_values: obs_values.h5 max_dim: 3 name: FIMSDEDataloaderIterableDataset num_locations: test: null train: 2000 validation: 10000 num_observations: test: null train: !!python/tuple - 0 - 1801 validation: !!python/tuple - 1799 - 1801 num_workers: test: 0 train: 7 validation: 5 shard: test: false train: true validation: true shuffle_elements: true shuffle_locations: test: false train: true validation: true shuffle_paths: true distributed: activation_chekpoint: false checkpoint_type: full_state enabled: true min_num_params: 1e5 sharding_strategy: NO_SHARD wrap_policy: SIZE_BAZED experiment: device_map: cuda name: big_model_l1_600k_examples name_add_date: true seed: 10 model: model_config: attention_map: softmax attention_method: linear dim_embed: 256 dim_feedforward: 1024 dim_ffn_u_model: 1024 dim_hidden_u_model: 256 dim_max_trajectory: 3 dropout: 0.1 num_context_encoder_layers: 2 num_heads: 8 num_res_layer_u_model: 6 num_res_layers_functional_decoder: 8 use_bias_for_projection: true use_bias_in_attention: true use_query_residual_in_attention: true model_type: TrainingWrapper train_config: corruption_model_type: odeformer loss_filter_nans: true loss_type: l1 max_sigma_trajectory_noise: 0.06 max_subsampling_ration: 0.5 train_type: vector_field train_with_normalized_head: true optimizers: !!python/tuple - optimizer_d: gradient_norm_clipping: 10 lr: 1.0e-05 name: torch.optim.AdamW weight_decay: 0.0001 trainer: best_metric: loss debug_iterations: null detect_anomaly: false epochs: 2500 experiment_dir: ./results/ gradient_accumulation_steps: 1 logging_format: RANK_%(rank)s - %(asctime)s - %(name)s - %(levelname)s - %(message)s name: Trainer precision: bf16mixed save_every: 1 schedulers: !!python/tuple - beta: 1.0 label: drift_loss_scale name: fim.utils.param_scheduler.ConstantScheduler