checkpoint step 1600 epoch 1

Browse files

Files changed (4) hide show

0323_0747_checkpoint_step_1600_epoch_1/config.json +101 -0
0323_0747_checkpoint_step_1600_epoch_1/model.pt +3 -0
0323_0747_checkpoint_step_1600_epoch_1/optimizer.pt +3 -0
0323_0747_checkpoint_step_1600_epoch_1/scheduler.pt +3 -0

0323_0747_checkpoint_step_1600_epoch_1/config.json ADDED Viewed

	@@ -0,0 +1,101 @@

+{
+  "epoch": 1,
+  "global_step": 1600,
+  "loss": 0.13015475869178772,
+  "training_config": {
+    "llm_backbone": "dasheng",
+    "qwen3_name": "Qwen/Qwen3-Embedding-0.6B",
+    "dasheng_name": "mispeech/midashenglm-7b-0804-fp32",
+    "dasheng_path": "/workspace/cache/huggingface/dasheng_lm",
+    "trainable_modules": [
+      "backbone",
+      "dasheng",
+      "dasheng_down",
+      "dasheng_proj",
+      "siglip_head"
+    ],
+    "use_lora": true,
+    "lora_r": 16,
+    "lora_alpha": 32,
+    "lora_dropout": 0.05,
+    "lora_target_modules": [
+      "q_proj",
+      "k_proj",
+      "v_proj"
+    ],
+    "use_dasheng_lora": false,
+    "dasheng_lora_r": 8,
+    "dasheng_lora_alpha": 16,
+    "dasheng_lora_dropout": 0.1,
+    "dasheng_lora_target_modules": null,
+    "train_layer_ratio": 1.0,
+    "train_layer_strategy": "last_n",
+    "output_dim": null,
+    "output_identity": false,
+    "use_logit_scale": true,
+    "loss_type": "infonce",
+    "use_checkpointing": true,
+    "checkpoint_reentrant": false,
+    "gather_negatives": true,
+    "use_loss_mask": true,
+    "duplicate_doc_threshold": 0.999,
+    "duplicate_query_threshold": 0.999,
+    "hard_negative_margin": 0.1,
+    "add_speaker_mask": false,
+    "s3_base_path": "https://d2j287p0ytux1o.cloudfront.net",
+    "dataset_config": "/workspace/SpeechRAG_exp/dataset_configs/setting_multi_task.json",
+    "aws_profile": "test_user",
+    "cache_dir": "/workspace/cache/huggingface",
+    "enable_audio_cache": true,
+    "audio_cache_dir": "/workspace/cache/huggingface/audio_cache",
+    "target_sr": 16000,
+    "mono": true,
+    "max_query_audio_length": 45.0,
+    "max_doc_audio_length": 45.0,
+    "max_query_text_length": 1000,
+    "max_doc_text_length": 1000,
+    "eval_max_query_audio_length": null,
+    "eval_max_doc_audio_length": null,
+    "eval_max_query_text_length": null,
+    "eval_max_doc_text_length": null,
+    "batch_size": 64,
+    "num_epochs": 10,
+    "learning_rate": 0.0001,
+    "gradient_accumulation_steps": 2,
+    "use_grad_cache": true,
+    "gc_query_chunk_size": 8,
+    "gc_doc_chunk_size": 8,
+    "gc_no_sync_except_last": true,
+    "ddp_find_unused_parameters": false,
+    "weight_decay": 0.001,
+    "optimizer_bits": "default",
+    "num_workers": 16,
+    "train_batch_task_mode": "single_task",
+    "task_batch_ratio": "{\"semantic\": 0.5, \"cross\": 0.5}",
+    "save_dir": "checkpoints",
+    "save_steps": 100,
+    "keep_checkpoints": 1,
+    "upload_steps": 100,
+    "upload_repo_id": "jdosjcd/embedding_checkpoint",
+    "log_dir": "logs",
+    "log_steps": 5,
+    "mixed_precision": "bf16",
+    "use_deepspeed": false,
+    "deepspeed_config": null,
+    "use_fsdp": false,
+    "fsdp_config": null,
+    "use_ema": false,
+    "ema_decay": 0.9999,
+    "ema_update_after": 0,
+    "ema_update_every": 1,
+    "scheduler_type": "warmup_cosine_decay",
+    "warmup_steps": 500,
+    "warmup_ratio": 0.1,
+    "min_lr": 1e-05,
+    "eval_steps": 100,
+    "eval_batch_size": 32,
+    "resume_from": "/workspace/SpeechRAG_exp/checkpoints/checkpoint_step_1500_epoch_1",
+    "test_mode": false,
+    "mock_dataset_size": 100
+  }
+}

0323_0747_checkpoint_step_1600_epoch_1/model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b45e438e621a02bc3880d7f36e7211c6d8a3cc8d9eb9da32738513237139c23
+size 31026342367

0323_0747_checkpoint_step_1600_epoch_1/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51e9f0da27d770a1773e3dae1ebc282a066867f1bc6a1df521669036233c11c0
+size 5487310547

0323_0747_checkpoint_step_1600_epoch_1/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a791d89cf477ec8e5100abeb2621ba1aa008c24cc8ecc11550a5a22ae90a527f
+size 1401