File size: 1,461 Bytes
73b7176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
{
  "checkpoint": "/tmp/checkpoints_block_d128/search_step_1000.pt",
  "step": 1000,
  "K": 128,
  "prefill_len": 1024,
  "local_window": 256,
  "num_batches": 8,
  "layers": {
    "4": {
      "dynamic_mass": 0.947265625,
      "static_mass": 0.9248046875,
      "static_teacher_available": 0.974609375,
      "dynamic_minus_static": 0.0224609375
    },
    "8": {
      "dynamic_mass": 0.9755859375,
      "static_mass": 0.91796875,
      "static_teacher_available": 0.94140625,
      "dynamic_minus_static": 0.0576171875
    },
    "12": {
      "dynamic_mass": 0.97607421875,
      "static_mass": 0.9267578125,
      "static_teacher_available": 0.94921875,
      "dynamic_minus_static": 0.04931640625
    },
    "16": {
      "dynamic_mass": 0.96875,
      "static_mass": 0.93115234375,
      "static_teacher_available": 0.9599609375,
      "dynamic_minus_static": 0.03759765625
    },
    "20": {
      "dynamic_mass": 0.98193359375,
      "static_mass": 0.94091796875,
      "static_teacher_available": 0.95654296875,
      "dynamic_minus_static": 0.041015625
    },
    "24": {
      "dynamic_mass": 0.98388671875,
      "static_mass": 0.92724609375,
      "static_teacher_available": 0.94189453125,
      "dynamic_minus_static": 0.056640625
    }
  },
  "aggregate": {
    "dynamic_mass": 0.9722493290901184,
    "static_mass": 0.9281412959098816,
    "static_teacher_available": 0.9539387822151184,
    "dynamic_minus_static": 0.0441080741584301
  }
}