program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})] { func main(tensor attention_mask, tensor input_ids) { tensor encoder_layers_0_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589952))))[name = string("encoder_layers_0_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_0_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591552))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(788224))))[name = string("encoder_layers_0_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_0_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(788800))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(985472))))[name = string("encoder_layers_0_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_0_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(986048))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1870848))))[name = string("encoder_layers_0_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_0_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1873216))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2758016))))[name = string("encoder_layers_0_mlp_up_proj_weight_quantized")]; tensor encoder_layers_0_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2760384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3645184))))[name = string("encoder_layers_0_mlp_down_proj_weight_quantized")]; tensor encoder_layers_1_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3646784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4236672))))[name = string("encoder_layers_1_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_1_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4238272))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4434944))))[name = string("encoder_layers_1_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_1_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4435520))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4632192))))[name = string("encoder_layers_1_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_1_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4632768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5517568))))[name = string("encoder_layers_1_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_1_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5519936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6404736))))[name = string("encoder_layers_1_mlp_up_proj_weight_quantized")]; tensor encoder_layers_1_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6407104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7291904))))[name = string("encoder_layers_1_mlp_down_proj_weight_quantized")]; tensor encoder_layers_2_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7293504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7883392))))[name = string("encoder_layers_2_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_2_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7884992))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8081664))))[name = string("encoder_layers_2_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_2_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8082240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8278912))))[name = string("encoder_layers_2_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_2_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8279488))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9164288))))[name = string("encoder_layers_2_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_2_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9166656))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10051456))))[name = string("encoder_layers_2_mlp_up_proj_weight_quantized")]; tensor encoder_layers_2_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10053824))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10938624))))[name = string("encoder_layers_2_mlp_down_proj_weight_quantized")]; tensor encoder_layers_3_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10940224))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11530112))))[name = string("encoder_layers_3_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_3_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11531712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11728384))))[name = string("encoder_layers_3_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_3_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11728960))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11925632))))[name = string("encoder_layers_3_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_3_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11926208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12811008))))[name = string("encoder_layers_3_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_3_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12813376))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13698176))))[name = string("encoder_layers_3_mlp_up_proj_weight_quantized")]; tensor encoder_layers_3_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13700544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14585344))))[name = string("encoder_layers_3_mlp_down_proj_weight_quantized")]; tensor encoder_layers_4_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14586944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15176832))))[name = string("encoder_layers_4_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_4_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15178432))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15375104))))[name = string("encoder_layers_4_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_4_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15375680))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15572352))))[name = string("encoder_layers_4_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_4_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15572928))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16457728))))[name = string("encoder_layers_4_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_4_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16460096))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17344896))))[name = string("encoder_layers_4_mlp_up_proj_weight_quantized")]; tensor encoder_layers_4_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17347264))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18232064))))[name = string("encoder_layers_4_mlp_down_proj_weight_quantized")]; tensor encoder_layers_5_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18233664))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18823552))))[name = string("encoder_layers_5_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_5_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18825152))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19021824))))[name = string("encoder_layers_5_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_5_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19022400))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19219072))))[name = string("encoder_layers_5_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_5_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19219648))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20104448))))[name = string("encoder_layers_5_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_5_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20106816))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20991616))))[name = string("encoder_layers_5_mlp_up_proj_weight_quantized")]; tensor encoder_layers_5_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20993984))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21878784))))[name = string("encoder_layers_5_mlp_down_proj_weight_quantized")]; tensor encoder_layers_6_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21880384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22470272))))[name = string("encoder_layers_6_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_6_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22471872))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22668544))))[name = string("encoder_layers_6_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_6_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22669120))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22865792))))[name = string("encoder_layers_6_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_6_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22866368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23751168))))[name = string("encoder_layers_6_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_6_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23753536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24638336))))[name = string("encoder_layers_6_mlp_up_proj_weight_quantized")]; tensor encoder_layers_6_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24640704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25525504))))[name = string("encoder_layers_6_mlp_down_proj_weight_quantized")]; tensor encoder_layers_7_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25527104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26116992))))[name = string("encoder_layers_7_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_7_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26118592))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26315264))))[name = string("encoder_layers_7_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_7_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26315840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26512512))))[name = string("encoder_layers_7_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_7_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26513088))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27397888))))[name = string("encoder_layers_7_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_7_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27400256))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28285056))))[name = string("encoder_layers_7_mlp_up_proj_weight_quantized")]; tensor encoder_layers_7_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28287424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29172224))))[name = string("encoder_layers_7_mlp_down_proj_weight_quantized")]; tensor encoder_layers_8_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29173824))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29763712))))[name = string("encoder_layers_8_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_8_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29765312))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29961984))))[name = string("encoder_layers_8_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_8_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29962560))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30159232))))[name = string("encoder_layers_8_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_8_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30159808))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31044608))))[name = string("encoder_layers_8_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_8_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31046976))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31931776))))[name = string("encoder_layers_8_mlp_up_proj_weight_quantized")]; tensor encoder_layers_8_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31934144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32818944))))[name = string("encoder_layers_8_mlp_down_proj_weight_quantized")]; tensor encoder_layers_9_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32820544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33410432))))[name = string("encoder_layers_9_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_9_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33412032))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33608704))))[name = string("encoder_layers_9_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_9_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33609280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33805952))))[name = string("encoder_layers_9_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_9_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33806528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34691328))))[name = string("encoder_layers_9_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_9_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34693696))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35578496))))[name = string("encoder_layers_9_mlp_up_proj_weight_quantized")]; tensor encoder_layers_9_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35580864))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36465664))))[name = string("encoder_layers_9_mlp_down_proj_weight_quantized")]; tensor encoder_layers_10_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36467264))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37057152))))[name = string("encoder_layers_10_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_10_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37058752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37255424))))[name = string("encoder_layers_10_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_10_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37256000))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37452672))))[name = string("encoder_layers_10_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_10_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37453248))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38338048))))[name = string("encoder_layers_10_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_10_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38340416))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39225216))))[name = string("encoder_layers_10_mlp_up_proj_weight_quantized")]; tensor encoder_layers_10_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39227584))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40112384))))[name = string("encoder_layers_10_mlp_down_proj_weight_quantized")]; tensor encoder_layers_11_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40113984))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40703872))))[name = string("encoder_layers_11_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_11_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40705472))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40902144))))[name = string("encoder_layers_11_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_11_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40902720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41099392))))[name = string("encoder_layers_11_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_11_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41099968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41984768))))[name = string("encoder_layers_11_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_11_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41987136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42871936))))[name = string("encoder_layers_11_mlp_up_proj_weight_quantized")]; tensor encoder_layers_11_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42874304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43759104))))[name = string("encoder_layers_11_mlp_down_proj_weight_quantized")]; tensor encoder_layers_12_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43760704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44350592))))[name = string("encoder_layers_12_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_12_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44352192))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44548864))))[name = string("encoder_layers_12_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_12_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44549440))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44746112))))[name = string("encoder_layers_12_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_12_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44746688))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45631488))))[name = string("encoder_layers_12_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_12_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45633856))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46518656))))[name = string("encoder_layers_12_mlp_up_proj_weight_quantized")]; tensor encoder_layers_12_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46521024))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47405824))))[name = string("encoder_layers_12_mlp_down_proj_weight_quantized")]; tensor encoder_layers_13_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47407424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47997312))))[name = string("encoder_layers_13_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_13_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47998912))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48195584))))[name = string("encoder_layers_13_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_13_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48196160))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48392832))))[name = string("encoder_layers_13_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_13_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48393408))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49278208))))[name = string("encoder_layers_13_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_13_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49280576))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50165376))))[name = string("encoder_layers_13_mlp_up_proj_weight_quantized")]; tensor encoder_layers_13_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50167744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51052544))))[name = string("encoder_layers_13_mlp_down_proj_weight_quantized")]; tensor encoder_layers_14_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51054144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51644032))))[name = string("encoder_layers_14_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_14_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51645632))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51842304))))[name = string("encoder_layers_14_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_14_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51842880))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52039552))))[name = string("encoder_layers_14_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_14_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52040128))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52924928))))[name = string("encoder_layers_14_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_14_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52927296))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53812096))))[name = string("encoder_layers_14_mlp_up_proj_weight_quantized")]; tensor encoder_layers_14_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53814464))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54699264))))[name = string("encoder_layers_14_mlp_down_proj_weight_quantized")]; tensor encoder_layers_15_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54700864))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55290752))))[name = string("encoder_layers_15_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_15_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55292352))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55489024))))[name = string("encoder_layers_15_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_15_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55489600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55686272))))[name = string("encoder_layers_15_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_15_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55686848))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56571648))))[name = string("encoder_layers_15_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_15_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56574016))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57458816))))[name = string("encoder_layers_15_mlp_up_proj_weight_quantized")]; tensor encoder_layers_15_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57461184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58345984))))[name = string("encoder_layers_15_mlp_down_proj_weight_quantized")]; tensor encoder_layers_16_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58347584))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58937472))))[name = string("encoder_layers_16_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_16_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58939072))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59135744))))[name = string("encoder_layers_16_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_16_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59136320))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59332992))))[name = string("encoder_layers_16_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_16_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59333568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60218368))))[name = string("encoder_layers_16_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_16_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60220736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61105536))))[name = string("encoder_layers_16_mlp_up_proj_weight_quantized")]; tensor encoder_layers_16_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61107904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61992704))))[name = string("encoder_layers_16_mlp_down_proj_weight_quantized")]; tensor encoder_layers_17_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61994304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62584192))))[name = string("encoder_layers_17_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_17_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62585792))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62782464))))[name = string("encoder_layers_17_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_17_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62783040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62979712))))[name = string("encoder_layers_17_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_17_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62980288))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63865088))))[name = string("encoder_layers_17_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_17_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63867456))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64752256))))[name = string("encoder_layers_17_mlp_up_proj_weight_quantized")]; tensor encoder_layers_17_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64754624))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65639424))))[name = string("encoder_layers_17_mlp_down_proj_weight_quantized")]; tensor encoder_layers_18_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65641024))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66230912))))[name = string("encoder_layers_18_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_18_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66232512))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66429184))))[name = string("encoder_layers_18_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_18_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66429760))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66626432))))[name = string("encoder_layers_18_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_18_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66627008))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67511808))))[name = string("encoder_layers_18_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_18_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67514176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68398976))))[name = string("encoder_layers_18_mlp_up_proj_weight_quantized")]; tensor encoder_layers_18_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68401344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69286144))))[name = string("encoder_layers_18_mlp_down_proj_weight_quantized")]; tensor encoder_layers_19_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69287744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69877632))))[name = string("encoder_layers_19_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_19_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69879232))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70075904))))[name = string("encoder_layers_19_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_19_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70076480))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70273152))))[name = string("encoder_layers_19_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_19_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70273728))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71158528))))[name = string("encoder_layers_19_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_19_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71160896))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72045696))))[name = string("encoder_layers_19_mlp_up_proj_weight_quantized")]; tensor encoder_layers_19_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72048064))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72932864))))[name = string("encoder_layers_19_mlp_down_proj_weight_quantized")]; tensor encoder_layers_20_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72934464))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73524352))))[name = string("encoder_layers_20_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_20_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73525952))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73722624))))[name = string("encoder_layers_20_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_20_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73723200))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73919872))))[name = string("encoder_layers_20_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_20_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73920448))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74805248))))[name = string("encoder_layers_20_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_20_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74807616))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75692416))))[name = string("encoder_layers_20_mlp_up_proj_weight_quantized")]; tensor encoder_layers_20_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75694784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76579584))))[name = string("encoder_layers_20_mlp_down_proj_weight_quantized")]; tensor encoder_layers_21_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76581184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77171072))))[name = string("encoder_layers_21_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_21_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77172672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77369344))))[name = string("encoder_layers_21_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_21_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77369920))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77566592))))[name = string("encoder_layers_21_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_21_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77567168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78451968))))[name = string("encoder_layers_21_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_21_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78454336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79339136))))[name = string("encoder_layers_21_mlp_up_proj_weight_quantized")]; tensor encoder_layers_21_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79341504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80226304))))[name = string("encoder_layers_21_mlp_down_proj_weight_quantized")]; tensor encoder_layers_22_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80227904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80817792))))[name = string("encoder_layers_22_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_22_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80819392))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81016064))))[name = string("encoder_layers_22_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_22_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81016640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81213312))))[name = string("encoder_layers_22_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_22_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81213888))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82098688))))[name = string("encoder_layers_22_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_22_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82101056))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82985856))))[name = string("encoder_layers_22_mlp_up_proj_weight_quantized")]; tensor encoder_layers_22_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82988224))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83873024))))[name = string("encoder_layers_22_mlp_down_proj_weight_quantized")]; tensor encoder_layers_23_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83874624))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84464512))))[name = string("encoder_layers_23_self_attn_q_proj_weight_quantized")]; tensor encoder_layers_23_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84466112))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84662784))))[name = string("encoder_layers_23_self_attn_k_proj_weight_quantized")]; tensor encoder_layers_23_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84663360))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84860032))))[name = string("encoder_layers_23_self_attn_v_proj_weight_quantized")]; tensor encoder_layers_23_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84860608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85745408))))[name = string("encoder_layers_23_mlp_gate_proj_weight_quantized")]; tensor encoder_layers_23_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85747776))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86632576))))[name = string("encoder_layers_23_mlp_up_proj_weight_quantized")]; tensor encoder_layers_23_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86634944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87519744))))[name = string("encoder_layers_23_mlp_down_proj_weight_quantized")]; tensor dense1_bias = const()[name = string("dense1_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87521344)))]; tensor dense1_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87527552))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89886912))))[name = string("dense1_weight_quantized")]; tensor dense2_bias = const()[name = string("dense2_bias"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89893120)))]; tensor dense2_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89894720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92254080))))[name = string("dense2_weight_quantized")]; int32 var_23 = const()[name = string("op_23"), val = int32(-1)]; int32 var_81_batch_dims_0 = const()[name = string("op_81_batch_dims_0"), val = int32(0)]; bool var_81_validate_indices_0 = const()[name = string("op_81_validate_indices_0"), val = bool(false)]; tensor encoder_embed_tokens_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92255680))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293582336))))[name = string("encoder_embed_tokens_weight_to_fp16_quantized")]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = input_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(262144)]; tensor add_0 = add(x = input_ids, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = input_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; tensor greater_equal_0_1 = greater_equal(x = select_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(262144)]; tensor add_0_1 = add(x = select_0, y = slice_by_index_0_1)[name = string("add_0_1")]; tensor select_0_1 = select(a = select_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; int32 op_81_cast_fp16_axis_0 = const()[name = string("op_81_cast_fp16_axis_0"), val = int32(0)]; tensor op_81_cast_fp16 = gather(axis = op_81_cast_fp16_axis_0, batch_dims = var_81_batch_dims_0, indices = select_0_1, validate_indices = var_81_validate_indices_0, x = encoder_embed_tokens_weight_to_fp16_quantized)[name = string("op_81_cast_fp16")]; fp16 var_83_to_fp16 = const()[name = string("op_83_to_fp16"), val = fp16(0x1.bb8p+4)]; tensor x_1_cast_fp16 = mul(x = op_81_cast_fp16, y = var_83_to_fp16)[name = string("x_1_cast_fp16")]; tensor cos_1_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294106688))), scale = tensor([[[[0x1.02p-7]]]]))[name = string("cos_1_quantized")]; tensor sin_1_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294139520))), scale = tensor([[[[0x1.02p-7]]]]))[name = string("sin_1_quantized")]; tensor cos_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294172352))), scale = tensor([[[[0x1.02p-7]]]]))[name = string("cos_quantized")]; tensor sin_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294205184))), scale = tensor([[[[0x1.02p-7]]]]))[name = string("sin_quantized")]; fp16 var_17_to_fp16 = const()[name = string("op_17_to_fp16"), val = fp16(0x1p+0)]; tensor var_93_cast_fp16 = sub(x = var_17_to_fp16, y = attention_mask)[name = string("op_93_cast_fp16")]; fp16 var_95_to_fp16 = const()[name = string("op_95_to_fp16"), val = fp16(-0x1.388p+13)]; tensor key_pad_1_cast_fp16 = mul(x = var_93_cast_fp16, y = var_95_to_fp16)[name = string("key_pad_1_cast_fp16")]; tensor var_97 = const()[name = string("op_97"), val = tensor([1, 1, 1, 128])]; tensor key_pad_cast_fp16 = reshape(shape = var_97, x = key_pad_1_cast_fp16)[name = string("key_pad_cast_fp16")]; tensor full_mask_reps_0 = const()[name = string("full_mask_reps_0"), val = tensor([1, 1, 128, 1])]; tensor full_mask_cast_fp16 = tile(reps = full_mask_reps_0, x = key_pad_cast_fp16)[name = string("full_mask_cast_fp16")]; fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_128_cast_fp16 = mul(x = x_1_cast_fp16, y = const_0_promoted_to_fp16)[name = string("op_128_cast_fp16")]; bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; tensor input_1_cast_fp16 = concat(axis = var_23, interleave = input_1_interleave_0, values = (x_1_cast_fp16, var_128_cast_fp16))[name = string("input_1_cast_fp16")]; tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; fp16 var_8_to_fp16 = const()[name = string("op_8_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_8_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; tensor var_133_split_sizes_0 = const()[name = string("op_133_split_sizes_0"), val = tensor([768, 768])]; int32 var_133_axis_0 = const()[name = string("op_133_axis_0"), val = int32(-1)]; tensor var_133_cast_fp16_0, tensor var_133_cast_fp16_1 = split(axis = var_133_axis_0, split_sizes = var_133_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_133_cast_fp16")]; tensor var_137_to_fp16 = const()[name = string("op_137_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294238016)))]; tensor out_1_cast_fp16 = mul(x = var_133_cast_fp16_0, y = var_137_to_fp16)[name = string("out_1_cast_fp16")]; tensor var_143 = const()[name = string("op_143"), val = tensor([0, 2, 1])]; tensor var_145_axes_0 = const()[name = string("op_145_axes_0"), val = tensor([2])]; tensor var_144_cast_fp16 = transpose(perm = var_143, x = out_1_cast_fp16)[name = string("transpose_215")]; tensor var_145_cast_fp16 = expand_dims(axes = var_145_axes_0, x = var_144_cast_fp16)[name = string("op_145_cast_fp16")]; string var_152_pad_type_0 = const()[name = string("op_152_pad_type_0"), val = string("valid")]; tensor var_152_strides_0 = const()[name = string("op_152_strides_0"), val = tensor([1, 1])]; tensor var_152_pad_0 = const()[name = string("op_152_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_152_dilations_0 = const()[name = string("op_152_dilations_0"), val = tensor([1, 1])]; int32 var_152_groups_0 = const()[name = string("op_152_groups_0"), val = int32(1)]; tensor var_152 = conv(dilations = var_152_dilations_0, groups = var_152_groups_0, pad = var_152_pad_0, pad_type = var_152_pad_type_0, strides = var_152_strides_0, weight = encoder_layers_0_self_attn_q_proj_weight_quantized, x = var_145_cast_fp16)[name = string("op_152")]; tensor var_153 = const()[name = string("op_153"), val = tensor([1, 3, 256, 128])]; tensor var_154 = reshape(shape = var_153, x = var_152)[name = string("op_154")]; tensor var_155 = const()[name = string("op_155"), val = tensor([0, 1, 3, 2])]; string var_162_pad_type_0 = const()[name = string("op_162_pad_type_0"), val = string("valid")]; tensor var_162_strides_0 = const()[name = string("op_162_strides_0"), val = tensor([1, 1])]; tensor var_162_pad_0 = const()[name = string("op_162_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_162_dilations_0 = const()[name = string("op_162_dilations_0"), val = tensor([1, 1])]; int32 var_162_groups_0 = const()[name = string("op_162_groups_0"), val = int32(1)]; tensor var_162 = conv(dilations = var_162_dilations_0, groups = var_162_groups_0, pad = var_162_pad_0, pad_type = var_162_pad_type_0, strides = var_162_strides_0, weight = encoder_layers_0_self_attn_k_proj_weight_quantized, x = var_145_cast_fp16)[name = string("op_162")]; tensor var_163 = const()[name = string("op_163"), val = tensor([1, 1, 256, 128])]; tensor var_164 = reshape(shape = var_163, x = var_162)[name = string("op_164")]; tensor var_165 = const()[name = string("op_165"), val = tensor([0, 1, 3, 2])]; string var_172_pad_type_0 = const()[name = string("op_172_pad_type_0"), val = string("valid")]; tensor var_172_strides_0 = const()[name = string("op_172_strides_0"), val = tensor([1, 1])]; tensor var_172_pad_0 = const()[name = string("op_172_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_172_dilations_0 = const()[name = string("op_172_dilations_0"), val = tensor([1, 1])]; int32 var_172_groups_0 = const()[name = string("op_172_groups_0"), val = int32(1)]; tensor var_172 = conv(dilations = var_172_dilations_0, groups = var_172_groups_0, pad = var_172_pad_0, pad_type = var_172_pad_type_0, strides = var_172_strides_0, weight = encoder_layers_0_self_attn_v_proj_weight_quantized, x = var_145_cast_fp16)[name = string("op_172")]; tensor var_173 = const()[name = string("op_173"), val = tensor([1, 1, 256, 128])]; tensor var_174 = reshape(shape = var_173, x = var_172)[name = string("op_174")]; tensor var_175 = const()[name = string("op_175"), val = tensor([0, 1, 3, 2])]; fp16 const_2_promoted_to_fp16 = const()[name = string("const_2_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_1 = transpose(perm = var_155, x = var_154)[name = string("transpose_214")]; tensor var_181_cast_fp16 = mul(x = q_1, y = const_2_promoted_to_fp16)[name = string("op_181_cast_fp16")]; bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; tensor input_5_cast_fp16 = concat(axis = var_23, interleave = input_5_interleave_0, values = (q_1, var_181_cast_fp16))[name = string("input_5_cast_fp16")]; tensor normed_7_axes_0 = const()[name = string("normed_7_axes_0"), val = tensor([-1])]; tensor normed_7_cast_fp16 = layer_norm(axes = normed_7_axes_0, epsilon = var_8_to_fp16, x = input_5_cast_fp16)[name = string("normed_7_cast_fp16")]; tensor var_186_split_sizes_0 = const()[name = string("op_186_split_sizes_0"), val = tensor([256, 256])]; int32 var_186_axis_0 = const()[name = string("op_186_axis_0"), val = int32(-1)]; tensor var_186_cast_fp16_0, tensor var_186_cast_fp16_1 = split(axis = var_186_axis_0, split_sizes = var_186_split_sizes_0, x = normed_7_cast_fp16)[name = string("op_186_cast_fp16")]; tensor var_190_to_fp16 = const()[name = string("op_190_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294239616)))]; tensor out_3_cast_fp16 = mul(x = var_186_cast_fp16_0, y = var_190_to_fp16)[name = string("out_3_cast_fp16")]; fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_1 = transpose(perm = var_165, x = var_164)[name = string("transpose_213")]; tensor var_197_cast_fp16 = mul(x = k_1, y = const_4_promoted_to_fp16)[name = string("op_197_cast_fp16")]; bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; tensor input_7_cast_fp16 = concat(axis = var_23, interleave = input_7_interleave_0, values = (k_1, var_197_cast_fp16))[name = string("input_7_cast_fp16")]; tensor normed_11_axes_0 = const()[name = string("normed_11_axes_0"), val = tensor([-1])]; tensor normed_11_cast_fp16 = layer_norm(axes = normed_11_axes_0, epsilon = var_8_to_fp16, x = input_7_cast_fp16)[name = string("normed_11_cast_fp16")]; tensor var_202_split_sizes_0 = const()[name = string("op_202_split_sizes_0"), val = tensor([256, 256])]; int32 var_202_axis_0 = const()[name = string("op_202_axis_0"), val = int32(-1)]; tensor var_202_cast_fp16_0, tensor var_202_cast_fp16_1 = split(axis = var_202_axis_0, split_sizes = var_202_split_sizes_0, x = normed_11_cast_fp16)[name = string("op_202_cast_fp16")]; tensor var_206_to_fp16 = const()[name = string("op_206_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294240192)))]; tensor out_5_cast_fp16 = mul(x = var_202_cast_fp16_0, y = var_206_to_fp16)[name = string("out_5_cast_fp16")]; tensor var_209 = mul(x = out_3_cast_fp16, y = cos_1_quantized)[name = string("op_209")]; tensor var_210_split_sizes_0 = const()[name = string("op_210_split_sizes_0"), val = tensor([128, 128])]; int32 var_210_axis_0 = const()[name = string("op_210_axis_0"), val = int32(-1)]; tensor var_210_0, tensor var_210_1 = split(axis = var_210_axis_0, split_sizes = var_210_split_sizes_0, x = out_3_cast_fp16)[name = string("op_210")]; fp16 const_6_promoted = const()[name = string("const_6_promoted"), val = fp16(-0x1p+0)]; tensor var_212 = mul(x = var_210_1, y = const_6_promoted)[name = string("op_212")]; bool var_214_interleave_0 = const()[name = string("op_214_interleave_0"), val = bool(false)]; tensor var_214 = concat(axis = var_23, interleave = var_214_interleave_0, values = (var_212, var_210_0))[name = string("op_214")]; tensor var_215 = mul(x = var_214, y = sin_1_quantized)[name = string("op_215")]; tensor q_5 = add(x = var_209, y = var_215)[name = string("q_5")]; tensor var_217 = mul(x = out_5_cast_fp16, y = cos_1_quantized)[name = string("op_217")]; tensor var_218_split_sizes_0 = const()[name = string("op_218_split_sizes_0"), val = tensor([128, 128])]; int32 var_218_axis_0 = const()[name = string("op_218_axis_0"), val = int32(-1)]; tensor var_218_0, tensor var_218_1 = split(axis = var_218_axis_0, split_sizes = var_218_split_sizes_0, x = out_5_cast_fp16)[name = string("op_218")]; fp16 const_7_promoted = const()[name = string("const_7_promoted"), val = fp16(-0x1p+0)]; tensor var_220 = mul(x = var_218_1, y = const_7_promoted)[name = string("op_220")]; bool var_222_interleave_0 = const()[name = string("op_222_interleave_0"), val = bool(false)]; tensor var_222 = concat(axis = var_23, interleave = var_222_interleave_0, values = (var_220, var_218_0))[name = string("op_222")]; tensor var_223 = mul(x = var_222, y = sin_1_quantized)[name = string("op_223")]; tensor hidden_states_1 = add(x = var_217, y = var_223)[name = string("hidden_states_1")]; tensor hidden_states_3_axes_0 = const()[name = string("hidden_states_3_axes_0"), val = tensor([2])]; tensor hidden_states_3 = expand_dims(axes = hidden_states_3_axes_0, x = hidden_states_1)[name = string("hidden_states_3")]; tensor var_226 = const()[name = string("op_226"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_5 = tile(reps = var_226, x = hidden_states_3)[name = string("hidden_states_5")]; tensor var_228 = const()[name = string("op_228"), val = tensor([1, 3, 128, 256])]; tensor k_5 = reshape(shape = var_228, x = hidden_states_5)[name = string("k_5")]; tensor hidden_states_9_axes_0 = const()[name = string("hidden_states_9_axes_0"), val = tensor([2])]; tensor hidden_states_7 = transpose(perm = var_175, x = var_174)[name = string("transpose_212")]; tensor hidden_states_9 = expand_dims(axes = hidden_states_9_axes_0, x = hidden_states_7)[name = string("hidden_states_9")]; tensor var_231 = const()[name = string("op_231"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_11 = tile(reps = var_231, x = hidden_states_9)[name = string("hidden_states_11")]; tensor var_233 = const()[name = string("op_233"), val = tensor([1, 3, 128, 256])]; tensor v_1 = reshape(shape = var_233, x = hidden_states_11)[name = string("v_1")]; bool var_238_transpose_x_1 = const()[name = string("op_238_transpose_x_1"), val = bool(false)]; bool var_238_transpose_y_1 = const()[name = string("op_238_transpose_y_1"), val = bool(true)]; tensor var_238_cast_fp16 = matmul(transpose_x = var_238_transpose_x_1, transpose_y = var_238_transpose_y_1, x = q_5, y = k_5)[name = string("op_238_cast_fp16")]; fp16 var_239_to_fp16 = const()[name = string("op_239_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_1_cast_fp16 = mul(x = var_238_cast_fp16, y = var_239_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_3_cast_fp16")]; tensor var_243_cast_fp16 = softmax(axis = var_23, x = attn_weights_3_cast_fp16)[name = string("op_243_cast_fp16")]; bool var_247_transpose_x_0 = const()[name = string("op_247_transpose_x_0"), val = bool(false)]; bool var_247_transpose_y_0 = const()[name = string("op_247_transpose_y_0"), val = bool(false)]; tensor var_247_cast_fp16 = matmul(transpose_x = var_247_transpose_x_0, transpose_y = var_247_transpose_y_0, x = var_243_cast_fp16, y = v_1)[name = string("op_247_cast_fp16")]; tensor var_249 = const()[name = string("op_249"), val = tensor([0, 2, 1, 3])]; tensor var_252 = const()[name = string("op_252"), val = tensor([1, 128, 768])]; tensor var_250 = transpose(perm = var_249, x = var_247_cast_fp16)[name = string("transpose_211")]; tensor attn_out_3 = reshape(shape = var_252, x = var_250)[name = string("attn_out_3")]; tensor var_254 = const()[name = string("op_254"), val = tensor([0, 2, 1])]; tensor squeeze_0_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294240768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294830656))))[name = string("squeeze_0_quantized")]; string var_263_pad_type_0 = const()[name = string("op_263_pad_type_0"), val = string("valid")]; int32 var_263_groups_0 = const()[name = string("op_263_groups_0"), val = int32(1)]; tensor var_263_strides_0 = const()[name = string("op_263_strides_0"), val = tensor([1])]; tensor var_263_pad_0 = const()[name = string("op_263_pad_0"), val = tensor([0, 0])]; tensor var_263_dilations_0 = const()[name = string("op_263_dilations_0"), val = tensor([1])]; tensor var_255 = transpose(perm = var_254, x = attn_out_3)[name = string("transpose_210")]; tensor var_263 = conv(dilations = var_263_dilations_0, groups = var_263_groups_0, pad = var_263_pad_0, pad_type = var_263_pad_type_0, strides = var_263_strides_0, weight = squeeze_0_quantized, x = var_255)[name = string("op_263")]; tensor var_264 = const()[name = string("op_264"), val = tensor([0, 2, 1])]; fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_9 = transpose(perm = var_264, x = var_263)[name = string("transpose_209")]; tensor var_268_cast_fp16 = mul(x = x_9, y = const_8_promoted_to_fp16)[name = string("op_268_cast_fp16")]; bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; tensor input_11_cast_fp16 = concat(axis = var_23, interleave = input_11_interleave_0, values = (x_9, var_268_cast_fp16))[name = string("input_11_cast_fp16")]; tensor normed_15_axes_0 = const()[name = string("normed_15_axes_0"), val = tensor([-1])]; tensor normed_15_cast_fp16 = layer_norm(axes = normed_15_axes_0, epsilon = var_8_to_fp16, x = input_11_cast_fp16)[name = string("normed_15_cast_fp16")]; tensor var_273_split_sizes_0 = const()[name = string("op_273_split_sizes_0"), val = tensor([768, 768])]; int32 var_273_axis_0 = const()[name = string("op_273_axis_0"), val = int32(-1)]; tensor var_273_cast_fp16_0, tensor var_273_cast_fp16_1 = split(axis = var_273_axis_0, split_sizes = var_273_split_sizes_0, x = normed_15_cast_fp16)[name = string("op_273_cast_fp16")]; tensor var_277_to_fp16 = const()[name = string("op_277_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294832256)))]; tensor out_7_cast_fp16 = mul(x = var_273_cast_fp16_0, y = var_277_to_fp16)[name = string("out_7_cast_fp16")]; tensor x_11_cast_fp16 = add(x = x_1_cast_fp16, y = out_7_cast_fp16)[name = string("x_11_cast_fp16")]; fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_284_cast_fp16 = mul(x = x_11_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_284_cast_fp16")]; bool input_13_interleave_0 = const()[name = string("input_13_interleave_0"), val = bool(false)]; tensor input_13_cast_fp16 = concat(axis = var_23, interleave = input_13_interleave_0, values = (x_11_cast_fp16, var_284_cast_fp16))[name = string("input_13_cast_fp16")]; tensor normed_19_axes_0 = const()[name = string("normed_19_axes_0"), val = tensor([-1])]; tensor normed_19_cast_fp16 = layer_norm(axes = normed_19_axes_0, epsilon = var_8_to_fp16, x = input_13_cast_fp16)[name = string("normed_19_cast_fp16")]; tensor var_289_split_sizes_0 = const()[name = string("op_289_split_sizes_0"), val = tensor([768, 768])]; int32 var_289_axis_0 = const()[name = string("op_289_axis_0"), val = int32(-1)]; tensor var_289_cast_fp16_0, tensor var_289_cast_fp16_1 = split(axis = var_289_axis_0, split_sizes = var_289_split_sizes_0, x = normed_19_cast_fp16)[name = string("op_289_cast_fp16")]; tensor var_293_to_fp16 = const()[name = string("op_293_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294833856)))]; tensor out_9_cast_fp16 = mul(x = var_289_cast_fp16_0, y = var_293_to_fp16)[name = string("out_9_cast_fp16")]; tensor var_300 = const()[name = string("op_300"), val = tensor([0, 2, 1])]; tensor input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor([2])]; tensor var_301 = transpose(perm = var_300, x = out_9_cast_fp16)[name = string("transpose_208")]; tensor input_15 = expand_dims(axes = input_15_axes_0, x = var_301)[name = string("input_15")]; string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; tensor gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor([1, 1])]; tensor gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor([1, 1])]; int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; tensor gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = encoder_layers_0_mlp_gate_proj_weight_quantized, x = input_15)[name = string("gate_1")]; string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; tensor up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor([1, 1])]; tensor up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor([1, 1])]; int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; tensor up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = encoder_layers_0_mlp_up_proj_weight_quantized, x = input_15)[name = string("up_1")]; string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; tensor input_17 = mul(x = gate_3, y = up_1)[name = string("input_17")]; string var_322_pad_type_0 = const()[name = string("op_322_pad_type_0"), val = string("valid")]; tensor var_322_strides_0 = const()[name = string("op_322_strides_0"), val = tensor([1, 1])]; tensor var_322_pad_0 = const()[name = string("op_322_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_322_dilations_0 = const()[name = string("op_322_dilations_0"), val = tensor([1, 1])]; int32 var_322_groups_0 = const()[name = string("op_322_groups_0"), val = int32(1)]; tensor var_322 = conv(dilations = var_322_dilations_0, groups = var_322_groups_0, pad = var_322_pad_0, pad_type = var_322_pad_type_0, strides = var_322_strides_0, weight = encoder_layers_0_mlp_down_proj_weight_quantized, x = input_17)[name = string("op_322")]; tensor var_323_axes_0 = const()[name = string("op_323_axes_0"), val = tensor([2])]; tensor var_323 = squeeze(axes = var_323_axes_0, x = var_322)[name = string("op_323")]; tensor var_324 = const()[name = string("op_324"), val = tensor([0, 2, 1])]; fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_15 = transpose(perm = var_324, x = var_323)[name = string("transpose_207")]; tensor var_328_cast_fp16 = mul(x = x_15, y = const_12_promoted_to_fp16)[name = string("op_328_cast_fp16")]; bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; tensor input_19_cast_fp16 = concat(axis = var_23, interleave = input_19_interleave_0, values = (x_15, var_328_cast_fp16))[name = string("input_19_cast_fp16")]; tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_8_to_fp16, x = input_19_cast_fp16)[name = string("normed_25_cast_fp16")]; tensor var_333_split_sizes_0 = const()[name = string("op_333_split_sizes_0"), val = tensor([768, 768])]; int32 var_333_axis_0 = const()[name = string("op_333_axis_0"), val = int32(-1)]; tensor var_333_cast_fp16_0, tensor var_333_cast_fp16_1 = split(axis = var_333_axis_0, split_sizes = var_333_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_333_cast_fp16")]; tensor var_337_to_fp16 = const()[name = string("op_337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294835456)))]; tensor out_11_cast_fp16 = mul(x = var_333_cast_fp16_0, y = var_337_to_fp16)[name = string("out_11_cast_fp16")]; tensor x_17_cast_fp16 = add(x = x_11_cast_fp16, y = out_11_cast_fp16)[name = string("x_17_cast_fp16")]; fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_366_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_366_cast_fp16")]; bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; tensor input_21_cast_fp16 = concat(axis = var_23, interleave = input_21_interleave_0, values = (x_17_cast_fp16, var_366_cast_fp16))[name = string("input_21_cast_fp16")]; tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_8_to_fp16, x = input_21_cast_fp16)[name = string("normed_29_cast_fp16")]; tensor var_371_split_sizes_0 = const()[name = string("op_371_split_sizes_0"), val = tensor([768, 768])]; int32 var_371_axis_0 = const()[name = string("op_371_axis_0"), val = int32(-1)]; tensor var_371_cast_fp16_0, tensor var_371_cast_fp16_1 = split(axis = var_371_axis_0, split_sizes = var_371_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_371_cast_fp16")]; tensor var_375_to_fp16 = const()[name = string("op_375_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294837056)))]; tensor out_13_cast_fp16 = mul(x = var_371_cast_fp16_0, y = var_375_to_fp16)[name = string("out_13_cast_fp16")]; tensor var_381 = const()[name = string("op_381"), val = tensor([0, 2, 1])]; tensor var_383_axes_0 = const()[name = string("op_383_axes_0"), val = tensor([2])]; tensor var_382_cast_fp16 = transpose(perm = var_381, x = out_13_cast_fp16)[name = string("transpose_206")]; tensor var_383_cast_fp16 = expand_dims(axes = var_383_axes_0, x = var_382_cast_fp16)[name = string("op_383_cast_fp16")]; string var_390_pad_type_0 = const()[name = string("op_390_pad_type_0"), val = string("valid")]; tensor var_390_strides_0 = const()[name = string("op_390_strides_0"), val = tensor([1, 1])]; tensor var_390_pad_0 = const()[name = string("op_390_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_390_dilations_0 = const()[name = string("op_390_dilations_0"), val = tensor([1, 1])]; int32 var_390_groups_0 = const()[name = string("op_390_groups_0"), val = int32(1)]; tensor var_390 = conv(dilations = var_390_dilations_0, groups = var_390_groups_0, pad = var_390_pad_0, pad_type = var_390_pad_type_0, strides = var_390_strides_0, weight = encoder_layers_1_self_attn_q_proj_weight_quantized, x = var_383_cast_fp16)[name = string("op_390")]; tensor var_391 = const()[name = string("op_391"), val = tensor([1, 3, 256, 128])]; tensor var_392 = reshape(shape = var_391, x = var_390)[name = string("op_392")]; tensor var_393 = const()[name = string("op_393"), val = tensor([0, 1, 3, 2])]; string var_400_pad_type_0 = const()[name = string("op_400_pad_type_0"), val = string("valid")]; tensor var_400_strides_0 = const()[name = string("op_400_strides_0"), val = tensor([1, 1])]; tensor var_400_pad_0 = const()[name = string("op_400_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_400_dilations_0 = const()[name = string("op_400_dilations_0"), val = tensor([1, 1])]; int32 var_400_groups_0 = const()[name = string("op_400_groups_0"), val = int32(1)]; tensor var_400 = conv(dilations = var_400_dilations_0, groups = var_400_groups_0, pad = var_400_pad_0, pad_type = var_400_pad_type_0, strides = var_400_strides_0, weight = encoder_layers_1_self_attn_k_proj_weight_quantized, x = var_383_cast_fp16)[name = string("op_400")]; tensor var_401 = const()[name = string("op_401"), val = tensor([1, 1, 256, 128])]; tensor var_402 = reshape(shape = var_401, x = var_400)[name = string("op_402")]; tensor var_403 = const()[name = string("op_403"), val = tensor([0, 1, 3, 2])]; string var_410_pad_type_0 = const()[name = string("op_410_pad_type_0"), val = string("valid")]; tensor var_410_strides_0 = const()[name = string("op_410_strides_0"), val = tensor([1, 1])]; tensor var_410_pad_0 = const()[name = string("op_410_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_410_dilations_0 = const()[name = string("op_410_dilations_0"), val = tensor([1, 1])]; int32 var_410_groups_0 = const()[name = string("op_410_groups_0"), val = int32(1)]; tensor var_410 = conv(dilations = var_410_dilations_0, groups = var_410_groups_0, pad = var_410_pad_0, pad_type = var_410_pad_type_0, strides = var_410_strides_0, weight = encoder_layers_1_self_attn_v_proj_weight_quantized, x = var_383_cast_fp16)[name = string("op_410")]; tensor var_411 = const()[name = string("op_411"), val = tensor([1, 1, 256, 128])]; tensor var_412 = reshape(shape = var_411, x = var_410)[name = string("op_412")]; tensor var_413 = const()[name = string("op_413"), val = tensor([0, 1, 3, 2])]; fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_7 = transpose(perm = var_393, x = var_392)[name = string("transpose_205")]; tensor var_419_cast_fp16 = mul(x = q_7, y = const_16_promoted_to_fp16)[name = string("op_419_cast_fp16")]; bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; tensor input_25_cast_fp16 = concat(axis = var_23, interleave = input_25_interleave_0, values = (q_7, var_419_cast_fp16))[name = string("input_25_cast_fp16")]; tensor normed_35_axes_0 = const()[name = string("normed_35_axes_0"), val = tensor([-1])]; tensor normed_35_cast_fp16 = layer_norm(axes = normed_35_axes_0, epsilon = var_8_to_fp16, x = input_25_cast_fp16)[name = string("normed_35_cast_fp16")]; tensor var_424_split_sizes_0 = const()[name = string("op_424_split_sizes_0"), val = tensor([256, 256])]; int32 var_424_axis_0 = const()[name = string("op_424_axis_0"), val = int32(-1)]; tensor var_424_cast_fp16_0, tensor var_424_cast_fp16_1 = split(axis = var_424_axis_0, split_sizes = var_424_split_sizes_0, x = normed_35_cast_fp16)[name = string("op_424_cast_fp16")]; tensor var_428_to_fp16 = const()[name = string("op_428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294838656)))]; tensor out_15_cast_fp16 = mul(x = var_424_cast_fp16_0, y = var_428_to_fp16)[name = string("out_15_cast_fp16")]; fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_7 = transpose(perm = var_403, x = var_402)[name = string("transpose_204")]; tensor var_435_cast_fp16 = mul(x = k_7, y = const_18_promoted_to_fp16)[name = string("op_435_cast_fp16")]; bool input_27_interleave_0 = const()[name = string("input_27_interleave_0"), val = bool(false)]; tensor input_27_cast_fp16 = concat(axis = var_23, interleave = input_27_interleave_0, values = (k_7, var_435_cast_fp16))[name = string("input_27_cast_fp16")]; tensor normed_39_axes_0 = const()[name = string("normed_39_axes_0"), val = tensor([-1])]; tensor normed_39_cast_fp16 = layer_norm(axes = normed_39_axes_0, epsilon = var_8_to_fp16, x = input_27_cast_fp16)[name = string("normed_39_cast_fp16")]; tensor var_440_split_sizes_0 = const()[name = string("op_440_split_sizes_0"), val = tensor([256, 256])]; int32 var_440_axis_0 = const()[name = string("op_440_axis_0"), val = int32(-1)]; tensor var_440_cast_fp16_0, tensor var_440_cast_fp16_1 = split(axis = var_440_axis_0, split_sizes = var_440_split_sizes_0, x = normed_39_cast_fp16)[name = string("op_440_cast_fp16")]; tensor var_444_to_fp16 = const()[name = string("op_444_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294839232)))]; tensor out_17_cast_fp16 = mul(x = var_440_cast_fp16_0, y = var_444_to_fp16)[name = string("out_17_cast_fp16")]; tensor var_447 = mul(x = out_15_cast_fp16, y = cos_1_quantized)[name = string("op_447")]; tensor var_448_split_sizes_0 = const()[name = string("op_448_split_sizes_0"), val = tensor([128, 128])]; int32 var_448_axis_0 = const()[name = string("op_448_axis_0"), val = int32(-1)]; tensor var_448_0, tensor var_448_1 = split(axis = var_448_axis_0, split_sizes = var_448_split_sizes_0, x = out_15_cast_fp16)[name = string("op_448")]; fp16 const_20_promoted = const()[name = string("const_20_promoted"), val = fp16(-0x1p+0)]; tensor var_450 = mul(x = var_448_1, y = const_20_promoted)[name = string("op_450")]; bool var_452_interleave_0 = const()[name = string("op_452_interleave_0"), val = bool(false)]; tensor var_452 = concat(axis = var_23, interleave = var_452_interleave_0, values = (var_450, var_448_0))[name = string("op_452")]; tensor var_453 = mul(x = var_452, y = sin_1_quantized)[name = string("op_453")]; tensor q_11 = add(x = var_447, y = var_453)[name = string("q_11")]; tensor var_455 = mul(x = out_17_cast_fp16, y = cos_1_quantized)[name = string("op_455")]; tensor var_456_split_sizes_0 = const()[name = string("op_456_split_sizes_0"), val = tensor([128, 128])]; int32 var_456_axis_0 = const()[name = string("op_456_axis_0"), val = int32(-1)]; tensor var_456_0, tensor var_456_1 = split(axis = var_456_axis_0, split_sizes = var_456_split_sizes_0, x = out_17_cast_fp16)[name = string("op_456")]; fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)]; tensor var_458 = mul(x = var_456_1, y = const_21_promoted)[name = string("op_458")]; bool var_460_interleave_0 = const()[name = string("op_460_interleave_0"), val = bool(false)]; tensor var_460 = concat(axis = var_23, interleave = var_460_interleave_0, values = (var_458, var_456_0))[name = string("op_460")]; tensor var_461 = mul(x = var_460, y = sin_1_quantized)[name = string("op_461")]; tensor hidden_states_13 = add(x = var_455, y = var_461)[name = string("hidden_states_13")]; tensor hidden_states_15_axes_0 = const()[name = string("hidden_states_15_axes_0"), val = tensor([2])]; tensor hidden_states_15 = expand_dims(axes = hidden_states_15_axes_0, x = hidden_states_13)[name = string("hidden_states_15")]; tensor var_464 = const()[name = string("op_464"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_17 = tile(reps = var_464, x = hidden_states_15)[name = string("hidden_states_17")]; tensor var_466 = const()[name = string("op_466"), val = tensor([1, 3, 128, 256])]; tensor k_11 = reshape(shape = var_466, x = hidden_states_17)[name = string("k_11")]; tensor hidden_states_21_axes_0 = const()[name = string("hidden_states_21_axes_0"), val = tensor([2])]; tensor hidden_states_19 = transpose(perm = var_413, x = var_412)[name = string("transpose_203")]; tensor hidden_states_21 = expand_dims(axes = hidden_states_21_axes_0, x = hidden_states_19)[name = string("hidden_states_21")]; tensor var_469 = const()[name = string("op_469"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_23 = tile(reps = var_469, x = hidden_states_21)[name = string("hidden_states_23")]; tensor var_471 = const()[name = string("op_471"), val = tensor([1, 3, 128, 256])]; tensor v_3 = reshape(shape = var_471, x = hidden_states_23)[name = string("v_3")]; bool var_476_transpose_x_1 = const()[name = string("op_476_transpose_x_1"), val = bool(false)]; bool var_476_transpose_y_1 = const()[name = string("op_476_transpose_y_1"), val = bool(true)]; tensor var_476_cast_fp16 = matmul(transpose_x = var_476_transpose_x_1, transpose_y = var_476_transpose_y_1, x = q_11, y = k_11)[name = string("op_476_cast_fp16")]; fp16 var_477_to_fp16 = const()[name = string("op_477_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_7_cast_fp16 = mul(x = var_476_cast_fp16, y = var_477_to_fp16)[name = string("attn_weights_7_cast_fp16")]; tensor attn_weights_9_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_9_cast_fp16")]; tensor var_481_cast_fp16 = softmax(axis = var_23, x = attn_weights_9_cast_fp16)[name = string("op_481_cast_fp16")]; bool var_485_transpose_x_0 = const()[name = string("op_485_transpose_x_0"), val = bool(false)]; bool var_485_transpose_y_0 = const()[name = string("op_485_transpose_y_0"), val = bool(false)]; tensor var_485_cast_fp16 = matmul(transpose_x = var_485_transpose_x_0, transpose_y = var_485_transpose_y_0, x = var_481_cast_fp16, y = v_3)[name = string("op_485_cast_fp16")]; tensor var_487 = const()[name = string("op_487"), val = tensor([0, 2, 1, 3])]; tensor var_490 = const()[name = string("op_490"), val = tensor([1, 128, 768])]; tensor var_488 = transpose(perm = var_487, x = var_485_cast_fp16)[name = string("transpose_202")]; tensor attn_out_9 = reshape(shape = var_490, x = var_488)[name = string("attn_out_9")]; tensor var_492 = const()[name = string("op_492"), val = tensor([0, 2, 1])]; tensor squeeze_1_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294839808))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295429696))))[name = string("squeeze_1_quantized")]; string var_501_pad_type_0 = const()[name = string("op_501_pad_type_0"), val = string("valid")]; int32 var_501_groups_0 = const()[name = string("op_501_groups_0"), val = int32(1)]; tensor var_501_strides_0 = const()[name = string("op_501_strides_0"), val = tensor([1])]; tensor var_501_pad_0 = const()[name = string("op_501_pad_0"), val = tensor([0, 0])]; tensor var_501_dilations_0 = const()[name = string("op_501_dilations_0"), val = tensor([1])]; tensor var_493 = transpose(perm = var_492, x = attn_out_9)[name = string("transpose_201")]; tensor var_501 = conv(dilations = var_501_dilations_0, groups = var_501_groups_0, pad = var_501_pad_0, pad_type = var_501_pad_type_0, strides = var_501_strides_0, weight = squeeze_1_quantized, x = var_493)[name = string("op_501")]; tensor var_502 = const()[name = string("op_502"), val = tensor([0, 2, 1])]; fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_25 = transpose(perm = var_502, x = var_501)[name = string("transpose_200")]; tensor var_506_cast_fp16 = mul(x = x_25, y = const_22_promoted_to_fp16)[name = string("op_506_cast_fp16")]; bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; tensor input_31_cast_fp16 = concat(axis = var_23, interleave = input_31_interleave_0, values = (x_25, var_506_cast_fp16))[name = string("input_31_cast_fp16")]; tensor normed_43_axes_0 = const()[name = string("normed_43_axes_0"), val = tensor([-1])]; tensor normed_43_cast_fp16 = layer_norm(axes = normed_43_axes_0, epsilon = var_8_to_fp16, x = input_31_cast_fp16)[name = string("normed_43_cast_fp16")]; tensor var_511_split_sizes_0 = const()[name = string("op_511_split_sizes_0"), val = tensor([768, 768])]; int32 var_511_axis_0 = const()[name = string("op_511_axis_0"), val = int32(-1)]; tensor var_511_cast_fp16_0, tensor var_511_cast_fp16_1 = split(axis = var_511_axis_0, split_sizes = var_511_split_sizes_0, x = normed_43_cast_fp16)[name = string("op_511_cast_fp16")]; tensor var_515_to_fp16 = const()[name = string("op_515_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295431296)))]; tensor out_19_cast_fp16 = mul(x = var_511_cast_fp16_0, y = var_515_to_fp16)[name = string("out_19_cast_fp16")]; tensor x_27_cast_fp16 = add(x = x_17_cast_fp16, y = out_19_cast_fp16)[name = string("x_27_cast_fp16")]; fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_522_cast_fp16 = mul(x = x_27_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_522_cast_fp16")]; bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; tensor input_33_cast_fp16 = concat(axis = var_23, interleave = input_33_interleave_0, values = (x_27_cast_fp16, var_522_cast_fp16))[name = string("input_33_cast_fp16")]; tensor normed_47_axes_0 = const()[name = string("normed_47_axes_0"), val = tensor([-1])]; tensor normed_47_cast_fp16 = layer_norm(axes = normed_47_axes_0, epsilon = var_8_to_fp16, x = input_33_cast_fp16)[name = string("normed_47_cast_fp16")]; tensor var_527_split_sizes_0 = const()[name = string("op_527_split_sizes_0"), val = tensor([768, 768])]; int32 var_527_axis_0 = const()[name = string("op_527_axis_0"), val = int32(-1)]; tensor var_527_cast_fp16_0, tensor var_527_cast_fp16_1 = split(axis = var_527_axis_0, split_sizes = var_527_split_sizes_0, x = normed_47_cast_fp16)[name = string("op_527_cast_fp16")]; tensor var_531_to_fp16 = const()[name = string("op_531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295432896)))]; tensor out_21_cast_fp16 = mul(x = var_527_cast_fp16_0, y = var_531_to_fp16)[name = string("out_21_cast_fp16")]; tensor var_538 = const()[name = string("op_538"), val = tensor([0, 2, 1])]; tensor input_35_axes_0 = const()[name = string("input_35_axes_0"), val = tensor([2])]; tensor var_539 = transpose(perm = var_538, x = out_21_cast_fp16)[name = string("transpose_199")]; tensor input_35 = expand_dims(axes = input_35_axes_0, x = var_539)[name = string("input_35")]; string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; tensor gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor([1, 1])]; tensor gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor([1, 1])]; int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; tensor gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = encoder_layers_1_mlp_gate_proj_weight_quantized, x = input_35)[name = string("gate_5")]; string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; tensor up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor([1, 1])]; tensor up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor([1, 1])]; int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; tensor up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = encoder_layers_1_mlp_up_proj_weight_quantized, x = input_35)[name = string("up_3")]; string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; tensor input_37 = mul(x = gate_7, y = up_3)[name = string("input_37")]; string var_560_pad_type_0 = const()[name = string("op_560_pad_type_0"), val = string("valid")]; tensor var_560_strides_0 = const()[name = string("op_560_strides_0"), val = tensor([1, 1])]; tensor var_560_pad_0 = const()[name = string("op_560_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_560_dilations_0 = const()[name = string("op_560_dilations_0"), val = tensor([1, 1])]; int32 var_560_groups_0 = const()[name = string("op_560_groups_0"), val = int32(1)]; tensor var_560 = conv(dilations = var_560_dilations_0, groups = var_560_groups_0, pad = var_560_pad_0, pad_type = var_560_pad_type_0, strides = var_560_strides_0, weight = encoder_layers_1_mlp_down_proj_weight_quantized, x = input_37)[name = string("op_560")]; tensor var_561_axes_0 = const()[name = string("op_561_axes_0"), val = tensor([2])]; tensor var_561 = squeeze(axes = var_561_axes_0, x = var_560)[name = string("op_561")]; tensor var_562 = const()[name = string("op_562"), val = tensor([0, 2, 1])]; fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_31 = transpose(perm = var_562, x = var_561)[name = string("transpose_198")]; tensor var_566_cast_fp16 = mul(x = x_31, y = const_26_promoted_to_fp16)[name = string("op_566_cast_fp16")]; bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; tensor input_39_cast_fp16 = concat(axis = var_23, interleave = input_39_interleave_0, values = (x_31, var_566_cast_fp16))[name = string("input_39_cast_fp16")]; tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_8_to_fp16, x = input_39_cast_fp16)[name = string("normed_53_cast_fp16")]; tensor var_571_split_sizes_0 = const()[name = string("op_571_split_sizes_0"), val = tensor([768, 768])]; int32 var_571_axis_0 = const()[name = string("op_571_axis_0"), val = int32(-1)]; tensor var_571_cast_fp16_0, tensor var_571_cast_fp16_1 = split(axis = var_571_axis_0, split_sizes = var_571_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_571_cast_fp16")]; tensor var_575_to_fp16 = const()[name = string("op_575_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295434496)))]; tensor out_23_cast_fp16 = mul(x = var_571_cast_fp16_0, y = var_575_to_fp16)[name = string("out_23_cast_fp16")]; tensor x_33_cast_fp16 = add(x = x_27_cast_fp16, y = out_23_cast_fp16)[name = string("x_33_cast_fp16")]; fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_604_cast_fp16 = mul(x = x_33_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_604_cast_fp16")]; bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; tensor input_41_cast_fp16 = concat(axis = var_23, interleave = input_41_interleave_0, values = (x_33_cast_fp16, var_604_cast_fp16))[name = string("input_41_cast_fp16")]; tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_8_to_fp16, x = input_41_cast_fp16)[name = string("normed_57_cast_fp16")]; tensor var_609_split_sizes_0 = const()[name = string("op_609_split_sizes_0"), val = tensor([768, 768])]; int32 var_609_axis_0 = const()[name = string("op_609_axis_0"), val = int32(-1)]; tensor var_609_cast_fp16_0, tensor var_609_cast_fp16_1 = split(axis = var_609_axis_0, split_sizes = var_609_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_609_cast_fp16")]; tensor var_613_to_fp16 = const()[name = string("op_613_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295436096)))]; tensor out_25_cast_fp16 = mul(x = var_609_cast_fp16_0, y = var_613_to_fp16)[name = string("out_25_cast_fp16")]; tensor var_619 = const()[name = string("op_619"), val = tensor([0, 2, 1])]; tensor var_621_axes_0 = const()[name = string("op_621_axes_0"), val = tensor([2])]; tensor var_620_cast_fp16 = transpose(perm = var_619, x = out_25_cast_fp16)[name = string("transpose_197")]; tensor var_621_cast_fp16 = expand_dims(axes = var_621_axes_0, x = var_620_cast_fp16)[name = string("op_621_cast_fp16")]; string var_628_pad_type_0 = const()[name = string("op_628_pad_type_0"), val = string("valid")]; tensor var_628_strides_0 = const()[name = string("op_628_strides_0"), val = tensor([1, 1])]; tensor var_628_pad_0 = const()[name = string("op_628_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_628_dilations_0 = const()[name = string("op_628_dilations_0"), val = tensor([1, 1])]; int32 var_628_groups_0 = const()[name = string("op_628_groups_0"), val = int32(1)]; tensor var_628 = conv(dilations = var_628_dilations_0, groups = var_628_groups_0, pad = var_628_pad_0, pad_type = var_628_pad_type_0, strides = var_628_strides_0, weight = encoder_layers_2_self_attn_q_proj_weight_quantized, x = var_621_cast_fp16)[name = string("op_628")]; tensor var_629 = const()[name = string("op_629"), val = tensor([1, 3, 256, 128])]; tensor var_630 = reshape(shape = var_629, x = var_628)[name = string("op_630")]; tensor var_631 = const()[name = string("op_631"), val = tensor([0, 1, 3, 2])]; string var_638_pad_type_0 = const()[name = string("op_638_pad_type_0"), val = string("valid")]; tensor var_638_strides_0 = const()[name = string("op_638_strides_0"), val = tensor([1, 1])]; tensor var_638_pad_0 = const()[name = string("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_638_dilations_0 = const()[name = string("op_638_dilations_0"), val = tensor([1, 1])]; int32 var_638_groups_0 = const()[name = string("op_638_groups_0"), val = int32(1)]; tensor var_638 = conv(dilations = var_638_dilations_0, groups = var_638_groups_0, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_638_strides_0, weight = encoder_layers_2_self_attn_k_proj_weight_quantized, x = var_621_cast_fp16)[name = string("op_638")]; tensor var_639 = const()[name = string("op_639"), val = tensor([1, 1, 256, 128])]; tensor var_640 = reshape(shape = var_639, x = var_638)[name = string("op_640")]; tensor var_641 = const()[name = string("op_641"), val = tensor([0, 1, 3, 2])]; string var_648_pad_type_0 = const()[name = string("op_648_pad_type_0"), val = string("valid")]; tensor var_648_strides_0 = const()[name = string("op_648_strides_0"), val = tensor([1, 1])]; tensor var_648_pad_0 = const()[name = string("op_648_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_648_dilations_0 = const()[name = string("op_648_dilations_0"), val = tensor([1, 1])]; int32 var_648_groups_0 = const()[name = string("op_648_groups_0"), val = int32(1)]; tensor var_648 = conv(dilations = var_648_dilations_0, groups = var_648_groups_0, pad = var_648_pad_0, pad_type = var_648_pad_type_0, strides = var_648_strides_0, weight = encoder_layers_2_self_attn_v_proj_weight_quantized, x = var_621_cast_fp16)[name = string("op_648")]; tensor var_649 = const()[name = string("op_649"), val = tensor([1, 1, 256, 128])]; tensor var_650 = reshape(shape = var_649, x = var_648)[name = string("op_650")]; tensor var_651 = const()[name = string("op_651"), val = tensor([0, 1, 3, 2])]; fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_13 = transpose(perm = var_631, x = var_630)[name = string("transpose_196")]; tensor var_657_cast_fp16 = mul(x = q_13, y = const_30_promoted_to_fp16)[name = string("op_657_cast_fp16")]; bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; tensor input_45_cast_fp16 = concat(axis = var_23, interleave = input_45_interleave_0, values = (q_13, var_657_cast_fp16))[name = string("input_45_cast_fp16")]; tensor normed_63_axes_0 = const()[name = string("normed_63_axes_0"), val = tensor([-1])]; tensor normed_63_cast_fp16 = layer_norm(axes = normed_63_axes_0, epsilon = var_8_to_fp16, x = input_45_cast_fp16)[name = string("normed_63_cast_fp16")]; tensor var_662_split_sizes_0 = const()[name = string("op_662_split_sizes_0"), val = tensor([256, 256])]; int32 var_662_axis_0 = const()[name = string("op_662_axis_0"), val = int32(-1)]; tensor var_662_cast_fp16_0, tensor var_662_cast_fp16_1 = split(axis = var_662_axis_0, split_sizes = var_662_split_sizes_0, x = normed_63_cast_fp16)[name = string("op_662_cast_fp16")]; tensor var_666_to_fp16 = const()[name = string("op_666_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295437696)))]; tensor out_27_cast_fp16 = mul(x = var_662_cast_fp16_0, y = var_666_to_fp16)[name = string("out_27_cast_fp16")]; fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_13 = transpose(perm = var_641, x = var_640)[name = string("transpose_195")]; tensor var_673_cast_fp16 = mul(x = k_13, y = const_32_promoted_to_fp16)[name = string("op_673_cast_fp16")]; bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; tensor input_47_cast_fp16 = concat(axis = var_23, interleave = input_47_interleave_0, values = (k_13, var_673_cast_fp16))[name = string("input_47_cast_fp16")]; tensor normed_67_axes_0 = const()[name = string("normed_67_axes_0"), val = tensor([-1])]; tensor normed_67_cast_fp16 = layer_norm(axes = normed_67_axes_0, epsilon = var_8_to_fp16, x = input_47_cast_fp16)[name = string("normed_67_cast_fp16")]; tensor var_678_split_sizes_0 = const()[name = string("op_678_split_sizes_0"), val = tensor([256, 256])]; int32 var_678_axis_0 = const()[name = string("op_678_axis_0"), val = int32(-1)]; tensor var_678_cast_fp16_0, tensor var_678_cast_fp16_1 = split(axis = var_678_axis_0, split_sizes = var_678_split_sizes_0, x = normed_67_cast_fp16)[name = string("op_678_cast_fp16")]; tensor var_682_to_fp16 = const()[name = string("op_682_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295438272)))]; tensor out_29_cast_fp16 = mul(x = var_678_cast_fp16_0, y = var_682_to_fp16)[name = string("out_29_cast_fp16")]; tensor var_685 = mul(x = out_27_cast_fp16, y = cos_1_quantized)[name = string("op_685")]; tensor var_686_split_sizes_0 = const()[name = string("op_686_split_sizes_0"), val = tensor([128, 128])]; int32 var_686_axis_0 = const()[name = string("op_686_axis_0"), val = int32(-1)]; tensor var_686_0, tensor var_686_1 = split(axis = var_686_axis_0, split_sizes = var_686_split_sizes_0, x = out_27_cast_fp16)[name = string("op_686")]; fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)]; tensor var_688 = mul(x = var_686_1, y = const_34_promoted)[name = string("op_688")]; bool var_690_interleave_0 = const()[name = string("op_690_interleave_0"), val = bool(false)]; tensor var_690 = concat(axis = var_23, interleave = var_690_interleave_0, values = (var_688, var_686_0))[name = string("op_690")]; tensor var_691 = mul(x = var_690, y = sin_1_quantized)[name = string("op_691")]; tensor q_17 = add(x = var_685, y = var_691)[name = string("q_17")]; tensor var_693 = mul(x = out_29_cast_fp16, y = cos_1_quantized)[name = string("op_693")]; tensor var_694_split_sizes_0 = const()[name = string("op_694_split_sizes_0"), val = tensor([128, 128])]; int32 var_694_axis_0 = const()[name = string("op_694_axis_0"), val = int32(-1)]; tensor var_694_0, tensor var_694_1 = split(axis = var_694_axis_0, split_sizes = var_694_split_sizes_0, x = out_29_cast_fp16)[name = string("op_694")]; fp16 const_35_promoted = const()[name = string("const_35_promoted"), val = fp16(-0x1p+0)]; tensor var_696 = mul(x = var_694_1, y = const_35_promoted)[name = string("op_696")]; bool var_698_interleave_0 = const()[name = string("op_698_interleave_0"), val = bool(false)]; tensor var_698 = concat(axis = var_23, interleave = var_698_interleave_0, values = (var_696, var_694_0))[name = string("op_698")]; tensor var_699 = mul(x = var_698, y = sin_1_quantized)[name = string("op_699")]; tensor hidden_states_25 = add(x = var_693, y = var_699)[name = string("hidden_states_25")]; tensor hidden_states_27_axes_0 = const()[name = string("hidden_states_27_axes_0"), val = tensor([2])]; tensor hidden_states_27 = expand_dims(axes = hidden_states_27_axes_0, x = hidden_states_25)[name = string("hidden_states_27")]; tensor var_702 = const()[name = string("op_702"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_29 = tile(reps = var_702, x = hidden_states_27)[name = string("hidden_states_29")]; tensor var_704 = const()[name = string("op_704"), val = tensor([1, 3, 128, 256])]; tensor k_17 = reshape(shape = var_704, x = hidden_states_29)[name = string("k_17")]; tensor hidden_states_33_axes_0 = const()[name = string("hidden_states_33_axes_0"), val = tensor([2])]; tensor hidden_states_31 = transpose(perm = var_651, x = var_650)[name = string("transpose_194")]; tensor hidden_states_33 = expand_dims(axes = hidden_states_33_axes_0, x = hidden_states_31)[name = string("hidden_states_33")]; tensor var_707 = const()[name = string("op_707"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_35 = tile(reps = var_707, x = hidden_states_33)[name = string("hidden_states_35")]; tensor var_709 = const()[name = string("op_709"), val = tensor([1, 3, 128, 256])]; tensor v_5 = reshape(shape = var_709, x = hidden_states_35)[name = string("v_5")]; bool var_714_transpose_x_1 = const()[name = string("op_714_transpose_x_1"), val = bool(false)]; bool var_714_transpose_y_1 = const()[name = string("op_714_transpose_y_1"), val = bool(true)]; tensor var_714_cast_fp16 = matmul(transpose_x = var_714_transpose_x_1, transpose_y = var_714_transpose_y_1, x = q_17, y = k_17)[name = string("op_714_cast_fp16")]; fp16 var_715_to_fp16 = const()[name = string("op_715_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_13_cast_fp16 = mul(x = var_714_cast_fp16, y = var_715_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; tensor var_719_cast_fp16 = softmax(axis = var_23, x = attn_weights_15_cast_fp16)[name = string("op_719_cast_fp16")]; bool var_723_transpose_x_0 = const()[name = string("op_723_transpose_x_0"), val = bool(false)]; bool var_723_transpose_y_0 = const()[name = string("op_723_transpose_y_0"), val = bool(false)]; tensor var_723_cast_fp16 = matmul(transpose_x = var_723_transpose_x_0, transpose_y = var_723_transpose_y_0, x = var_719_cast_fp16, y = v_5)[name = string("op_723_cast_fp16")]; tensor var_725 = const()[name = string("op_725"), val = tensor([0, 2, 1, 3])]; tensor var_728 = const()[name = string("op_728"), val = tensor([1, 128, 768])]; tensor var_726 = transpose(perm = var_725, x = var_723_cast_fp16)[name = string("transpose_193")]; tensor attn_out_15 = reshape(shape = var_728, x = var_726)[name = string("attn_out_15")]; tensor var_730 = const()[name = string("op_730"), val = tensor([0, 2, 1])]; tensor squeeze_2_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295438848))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296028736))))[name = string("squeeze_2_quantized")]; string var_739_pad_type_0 = const()[name = string("op_739_pad_type_0"), val = string("valid")]; int32 var_739_groups_0 = const()[name = string("op_739_groups_0"), val = int32(1)]; tensor var_739_strides_0 = const()[name = string("op_739_strides_0"), val = tensor([1])]; tensor var_739_pad_0 = const()[name = string("op_739_pad_0"), val = tensor([0, 0])]; tensor var_739_dilations_0 = const()[name = string("op_739_dilations_0"), val = tensor([1])]; tensor var_731 = transpose(perm = var_730, x = attn_out_15)[name = string("transpose_192")]; tensor var_739 = conv(dilations = var_739_dilations_0, groups = var_739_groups_0, pad = var_739_pad_0, pad_type = var_739_pad_type_0, strides = var_739_strides_0, weight = squeeze_2_quantized, x = var_731)[name = string("op_739")]; tensor var_740 = const()[name = string("op_740"), val = tensor([0, 2, 1])]; fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_41 = transpose(perm = var_740, x = var_739)[name = string("transpose_191")]; tensor var_744_cast_fp16 = mul(x = x_41, y = const_36_promoted_to_fp16)[name = string("op_744_cast_fp16")]; bool input_51_interleave_0 = const()[name = string("input_51_interleave_0"), val = bool(false)]; tensor input_51_cast_fp16 = concat(axis = var_23, interleave = input_51_interleave_0, values = (x_41, var_744_cast_fp16))[name = string("input_51_cast_fp16")]; tensor normed_71_axes_0 = const()[name = string("normed_71_axes_0"), val = tensor([-1])]; tensor normed_71_cast_fp16 = layer_norm(axes = normed_71_axes_0, epsilon = var_8_to_fp16, x = input_51_cast_fp16)[name = string("normed_71_cast_fp16")]; tensor var_749_split_sizes_0 = const()[name = string("op_749_split_sizes_0"), val = tensor([768, 768])]; int32 var_749_axis_0 = const()[name = string("op_749_axis_0"), val = int32(-1)]; tensor var_749_cast_fp16_0, tensor var_749_cast_fp16_1 = split(axis = var_749_axis_0, split_sizes = var_749_split_sizes_0, x = normed_71_cast_fp16)[name = string("op_749_cast_fp16")]; tensor var_753_to_fp16 = const()[name = string("op_753_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296030336)))]; tensor out_31_cast_fp16 = mul(x = var_749_cast_fp16_0, y = var_753_to_fp16)[name = string("out_31_cast_fp16")]; tensor x_43_cast_fp16 = add(x = x_33_cast_fp16, y = out_31_cast_fp16)[name = string("x_43_cast_fp16")]; fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_760_cast_fp16 = mul(x = x_43_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_760_cast_fp16")]; bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; tensor input_53_cast_fp16 = concat(axis = var_23, interleave = input_53_interleave_0, values = (x_43_cast_fp16, var_760_cast_fp16))[name = string("input_53_cast_fp16")]; tensor normed_75_axes_0 = const()[name = string("normed_75_axes_0"), val = tensor([-1])]; tensor normed_75_cast_fp16 = layer_norm(axes = normed_75_axes_0, epsilon = var_8_to_fp16, x = input_53_cast_fp16)[name = string("normed_75_cast_fp16")]; tensor var_765_split_sizes_0 = const()[name = string("op_765_split_sizes_0"), val = tensor([768, 768])]; int32 var_765_axis_0 = const()[name = string("op_765_axis_0"), val = int32(-1)]; tensor var_765_cast_fp16_0, tensor var_765_cast_fp16_1 = split(axis = var_765_axis_0, split_sizes = var_765_split_sizes_0, x = normed_75_cast_fp16)[name = string("op_765_cast_fp16")]; tensor var_769_to_fp16 = const()[name = string("op_769_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296031936)))]; tensor out_33_cast_fp16 = mul(x = var_765_cast_fp16_0, y = var_769_to_fp16)[name = string("out_33_cast_fp16")]; tensor var_776 = const()[name = string("op_776"), val = tensor([0, 2, 1])]; tensor input_55_axes_0 = const()[name = string("input_55_axes_0"), val = tensor([2])]; tensor var_777 = transpose(perm = var_776, x = out_33_cast_fp16)[name = string("transpose_190")]; tensor input_55 = expand_dims(axes = input_55_axes_0, x = var_777)[name = string("input_55")]; string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; tensor gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor([1, 1])]; tensor gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor([1, 1])]; int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; tensor gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = encoder_layers_2_mlp_gate_proj_weight_quantized, x = input_55)[name = string("gate_9")]; string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; tensor up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor([1, 1])]; tensor up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor([1, 1])]; int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; tensor up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = encoder_layers_2_mlp_up_proj_weight_quantized, x = input_55)[name = string("up_5")]; string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; tensor input_57 = mul(x = gate_11, y = up_5)[name = string("input_57")]; string var_798_pad_type_0 = const()[name = string("op_798_pad_type_0"), val = string("valid")]; tensor var_798_strides_0 = const()[name = string("op_798_strides_0"), val = tensor([1, 1])]; tensor var_798_pad_0 = const()[name = string("op_798_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_798_dilations_0 = const()[name = string("op_798_dilations_0"), val = tensor([1, 1])]; int32 var_798_groups_0 = const()[name = string("op_798_groups_0"), val = int32(1)]; tensor var_798 = conv(dilations = var_798_dilations_0, groups = var_798_groups_0, pad = var_798_pad_0, pad_type = var_798_pad_type_0, strides = var_798_strides_0, weight = encoder_layers_2_mlp_down_proj_weight_quantized, x = input_57)[name = string("op_798")]; tensor var_799_axes_0 = const()[name = string("op_799_axes_0"), val = tensor([2])]; tensor var_799 = squeeze(axes = var_799_axes_0, x = var_798)[name = string("op_799")]; tensor var_800 = const()[name = string("op_800"), val = tensor([0, 2, 1])]; fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_47 = transpose(perm = var_800, x = var_799)[name = string("transpose_189")]; tensor var_804_cast_fp16 = mul(x = x_47, y = const_40_promoted_to_fp16)[name = string("op_804_cast_fp16")]; bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; tensor input_59_cast_fp16 = concat(axis = var_23, interleave = input_59_interleave_0, values = (x_47, var_804_cast_fp16))[name = string("input_59_cast_fp16")]; tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_8_to_fp16, x = input_59_cast_fp16)[name = string("normed_81_cast_fp16")]; tensor var_809_split_sizes_0 = const()[name = string("op_809_split_sizes_0"), val = tensor([768, 768])]; int32 var_809_axis_0 = const()[name = string("op_809_axis_0"), val = int32(-1)]; tensor var_809_cast_fp16_0, tensor var_809_cast_fp16_1 = split(axis = var_809_axis_0, split_sizes = var_809_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_809_cast_fp16")]; tensor var_813_to_fp16 = const()[name = string("op_813_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296033536)))]; tensor out_35_cast_fp16 = mul(x = var_809_cast_fp16_0, y = var_813_to_fp16)[name = string("out_35_cast_fp16")]; tensor x_49_cast_fp16 = add(x = x_43_cast_fp16, y = out_35_cast_fp16)[name = string("x_49_cast_fp16")]; fp16 const_42_promoted_to_fp16 = const()[name = string("const_42_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_842_cast_fp16 = mul(x = x_49_cast_fp16, y = const_42_promoted_to_fp16)[name = string("op_842_cast_fp16")]; bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; tensor input_61_cast_fp16 = concat(axis = var_23, interleave = input_61_interleave_0, values = (x_49_cast_fp16, var_842_cast_fp16))[name = string("input_61_cast_fp16")]; tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_8_to_fp16, x = input_61_cast_fp16)[name = string("normed_85_cast_fp16")]; tensor var_847_split_sizes_0 = const()[name = string("op_847_split_sizes_0"), val = tensor([768, 768])]; int32 var_847_axis_0 = const()[name = string("op_847_axis_0"), val = int32(-1)]; tensor var_847_cast_fp16_0, tensor var_847_cast_fp16_1 = split(axis = var_847_axis_0, split_sizes = var_847_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_847_cast_fp16")]; tensor var_851_to_fp16 = const()[name = string("op_851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296035136)))]; tensor out_37_cast_fp16 = mul(x = var_847_cast_fp16_0, y = var_851_to_fp16)[name = string("out_37_cast_fp16")]; tensor var_857 = const()[name = string("op_857"), val = tensor([0, 2, 1])]; tensor var_859_axes_0 = const()[name = string("op_859_axes_0"), val = tensor([2])]; tensor var_858_cast_fp16 = transpose(perm = var_857, x = out_37_cast_fp16)[name = string("transpose_188")]; tensor var_859_cast_fp16 = expand_dims(axes = var_859_axes_0, x = var_858_cast_fp16)[name = string("op_859_cast_fp16")]; string var_866_pad_type_0 = const()[name = string("op_866_pad_type_0"), val = string("valid")]; tensor var_866_strides_0 = const()[name = string("op_866_strides_0"), val = tensor([1, 1])]; tensor var_866_pad_0 = const()[name = string("op_866_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_866_dilations_0 = const()[name = string("op_866_dilations_0"), val = tensor([1, 1])]; int32 var_866_groups_0 = const()[name = string("op_866_groups_0"), val = int32(1)]; tensor var_866 = conv(dilations = var_866_dilations_0, groups = var_866_groups_0, pad = var_866_pad_0, pad_type = var_866_pad_type_0, strides = var_866_strides_0, weight = encoder_layers_3_self_attn_q_proj_weight_quantized, x = var_859_cast_fp16)[name = string("op_866")]; tensor var_867 = const()[name = string("op_867"), val = tensor([1, 3, 256, 128])]; tensor var_868 = reshape(shape = var_867, x = var_866)[name = string("op_868")]; tensor var_869 = const()[name = string("op_869"), val = tensor([0, 1, 3, 2])]; string var_876_pad_type_0 = const()[name = string("op_876_pad_type_0"), val = string("valid")]; tensor var_876_strides_0 = const()[name = string("op_876_strides_0"), val = tensor([1, 1])]; tensor var_876_pad_0 = const()[name = string("op_876_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_876_dilations_0 = const()[name = string("op_876_dilations_0"), val = tensor([1, 1])]; int32 var_876_groups_0 = const()[name = string("op_876_groups_0"), val = int32(1)]; tensor var_876 = conv(dilations = var_876_dilations_0, groups = var_876_groups_0, pad = var_876_pad_0, pad_type = var_876_pad_type_0, strides = var_876_strides_0, weight = encoder_layers_3_self_attn_k_proj_weight_quantized, x = var_859_cast_fp16)[name = string("op_876")]; tensor var_877 = const()[name = string("op_877"), val = tensor([1, 1, 256, 128])]; tensor var_878 = reshape(shape = var_877, x = var_876)[name = string("op_878")]; tensor var_879 = const()[name = string("op_879"), val = tensor([0, 1, 3, 2])]; string var_886_pad_type_0 = const()[name = string("op_886_pad_type_0"), val = string("valid")]; tensor var_886_strides_0 = const()[name = string("op_886_strides_0"), val = tensor([1, 1])]; tensor var_886_pad_0 = const()[name = string("op_886_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_886_dilations_0 = const()[name = string("op_886_dilations_0"), val = tensor([1, 1])]; int32 var_886_groups_0 = const()[name = string("op_886_groups_0"), val = int32(1)]; tensor var_886 = conv(dilations = var_886_dilations_0, groups = var_886_groups_0, pad = var_886_pad_0, pad_type = var_886_pad_type_0, strides = var_886_strides_0, weight = encoder_layers_3_self_attn_v_proj_weight_quantized, x = var_859_cast_fp16)[name = string("op_886")]; tensor var_887 = const()[name = string("op_887"), val = tensor([1, 1, 256, 128])]; tensor var_888 = reshape(shape = var_887, x = var_886)[name = string("op_888")]; tensor var_889 = const()[name = string("op_889"), val = tensor([0, 1, 3, 2])]; fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_19 = transpose(perm = var_869, x = var_868)[name = string("transpose_187")]; tensor var_895_cast_fp16 = mul(x = q_19, y = const_44_promoted_to_fp16)[name = string("op_895_cast_fp16")]; bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; tensor input_65_cast_fp16 = concat(axis = var_23, interleave = input_65_interleave_0, values = (q_19, var_895_cast_fp16))[name = string("input_65_cast_fp16")]; tensor normed_91_axes_0 = const()[name = string("normed_91_axes_0"), val = tensor([-1])]; tensor normed_91_cast_fp16 = layer_norm(axes = normed_91_axes_0, epsilon = var_8_to_fp16, x = input_65_cast_fp16)[name = string("normed_91_cast_fp16")]; tensor var_900_split_sizes_0 = const()[name = string("op_900_split_sizes_0"), val = tensor([256, 256])]; int32 var_900_axis_0 = const()[name = string("op_900_axis_0"), val = int32(-1)]; tensor var_900_cast_fp16_0, tensor var_900_cast_fp16_1 = split(axis = var_900_axis_0, split_sizes = var_900_split_sizes_0, x = normed_91_cast_fp16)[name = string("op_900_cast_fp16")]; tensor var_904_to_fp16 = const()[name = string("op_904_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296036736)))]; tensor out_39_cast_fp16 = mul(x = var_900_cast_fp16_0, y = var_904_to_fp16)[name = string("out_39_cast_fp16")]; fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_19 = transpose(perm = var_879, x = var_878)[name = string("transpose_186")]; tensor var_911_cast_fp16 = mul(x = k_19, y = const_46_promoted_to_fp16)[name = string("op_911_cast_fp16")]; bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; tensor input_67_cast_fp16 = concat(axis = var_23, interleave = input_67_interleave_0, values = (k_19, var_911_cast_fp16))[name = string("input_67_cast_fp16")]; tensor normed_95_axes_0 = const()[name = string("normed_95_axes_0"), val = tensor([-1])]; tensor normed_95_cast_fp16 = layer_norm(axes = normed_95_axes_0, epsilon = var_8_to_fp16, x = input_67_cast_fp16)[name = string("normed_95_cast_fp16")]; tensor var_916_split_sizes_0 = const()[name = string("op_916_split_sizes_0"), val = tensor([256, 256])]; int32 var_916_axis_0 = const()[name = string("op_916_axis_0"), val = int32(-1)]; tensor var_916_cast_fp16_0, tensor var_916_cast_fp16_1 = split(axis = var_916_axis_0, split_sizes = var_916_split_sizes_0, x = normed_95_cast_fp16)[name = string("op_916_cast_fp16")]; tensor var_920_to_fp16 = const()[name = string("op_920_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296037312)))]; tensor out_41_cast_fp16 = mul(x = var_916_cast_fp16_0, y = var_920_to_fp16)[name = string("out_41_cast_fp16")]; tensor var_923 = mul(x = out_39_cast_fp16, y = cos_1_quantized)[name = string("op_923")]; tensor var_924_split_sizes_0 = const()[name = string("op_924_split_sizes_0"), val = tensor([128, 128])]; int32 var_924_axis_0 = const()[name = string("op_924_axis_0"), val = int32(-1)]; tensor var_924_0, tensor var_924_1 = split(axis = var_924_axis_0, split_sizes = var_924_split_sizes_0, x = out_39_cast_fp16)[name = string("op_924")]; fp16 const_48_promoted = const()[name = string("const_48_promoted"), val = fp16(-0x1p+0)]; tensor var_926 = mul(x = var_924_1, y = const_48_promoted)[name = string("op_926")]; bool var_928_interleave_0 = const()[name = string("op_928_interleave_0"), val = bool(false)]; tensor var_928 = concat(axis = var_23, interleave = var_928_interleave_0, values = (var_926, var_924_0))[name = string("op_928")]; tensor var_929 = mul(x = var_928, y = sin_1_quantized)[name = string("op_929")]; tensor q_23 = add(x = var_923, y = var_929)[name = string("q_23")]; tensor var_931 = mul(x = out_41_cast_fp16, y = cos_1_quantized)[name = string("op_931")]; tensor var_932_split_sizes_0 = const()[name = string("op_932_split_sizes_0"), val = tensor([128, 128])]; int32 var_932_axis_0 = const()[name = string("op_932_axis_0"), val = int32(-1)]; tensor var_932_0, tensor var_932_1 = split(axis = var_932_axis_0, split_sizes = var_932_split_sizes_0, x = out_41_cast_fp16)[name = string("op_932")]; fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; tensor var_934 = mul(x = var_932_1, y = const_49_promoted)[name = string("op_934")]; bool var_936_interleave_0 = const()[name = string("op_936_interleave_0"), val = bool(false)]; tensor var_936 = concat(axis = var_23, interleave = var_936_interleave_0, values = (var_934, var_932_0))[name = string("op_936")]; tensor var_937 = mul(x = var_936, y = sin_1_quantized)[name = string("op_937")]; tensor hidden_states_37 = add(x = var_931, y = var_937)[name = string("hidden_states_37")]; tensor hidden_states_39_axes_0 = const()[name = string("hidden_states_39_axes_0"), val = tensor([2])]; tensor hidden_states_39 = expand_dims(axes = hidden_states_39_axes_0, x = hidden_states_37)[name = string("hidden_states_39")]; tensor var_940 = const()[name = string("op_940"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_41 = tile(reps = var_940, x = hidden_states_39)[name = string("hidden_states_41")]; tensor var_942 = const()[name = string("op_942"), val = tensor([1, 3, 128, 256])]; tensor k_23 = reshape(shape = var_942, x = hidden_states_41)[name = string("k_23")]; tensor hidden_states_45_axes_0 = const()[name = string("hidden_states_45_axes_0"), val = tensor([2])]; tensor hidden_states_43 = transpose(perm = var_889, x = var_888)[name = string("transpose_185")]; tensor hidden_states_45 = expand_dims(axes = hidden_states_45_axes_0, x = hidden_states_43)[name = string("hidden_states_45")]; tensor var_945 = const()[name = string("op_945"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_47 = tile(reps = var_945, x = hidden_states_45)[name = string("hidden_states_47")]; tensor var_947 = const()[name = string("op_947"), val = tensor([1, 3, 128, 256])]; tensor v_7 = reshape(shape = var_947, x = hidden_states_47)[name = string("v_7")]; bool var_952_transpose_x_1 = const()[name = string("op_952_transpose_x_1"), val = bool(false)]; bool var_952_transpose_y_1 = const()[name = string("op_952_transpose_y_1"), val = bool(true)]; tensor var_952_cast_fp16 = matmul(transpose_x = var_952_transpose_x_1, transpose_y = var_952_transpose_y_1, x = q_23, y = k_23)[name = string("op_952_cast_fp16")]; fp16 var_953_to_fp16 = const()[name = string("op_953_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_19_cast_fp16 = mul(x = var_952_cast_fp16, y = var_953_to_fp16)[name = string("attn_weights_19_cast_fp16")]; tensor attn_weights_21_cast_fp16 = add(x = attn_weights_19_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; tensor var_957_cast_fp16 = softmax(axis = var_23, x = attn_weights_21_cast_fp16)[name = string("op_957_cast_fp16")]; bool var_961_transpose_x_0 = const()[name = string("op_961_transpose_x_0"), val = bool(false)]; bool var_961_transpose_y_0 = const()[name = string("op_961_transpose_y_0"), val = bool(false)]; tensor var_961_cast_fp16 = matmul(transpose_x = var_961_transpose_x_0, transpose_y = var_961_transpose_y_0, x = var_957_cast_fp16, y = v_7)[name = string("op_961_cast_fp16")]; tensor var_963 = const()[name = string("op_963"), val = tensor([0, 2, 1, 3])]; tensor var_966 = const()[name = string("op_966"), val = tensor([1, 128, 768])]; tensor var_964 = transpose(perm = var_963, x = var_961_cast_fp16)[name = string("transpose_184")]; tensor attn_out_21 = reshape(shape = var_966, x = var_964)[name = string("attn_out_21")]; tensor var_968 = const()[name = string("op_968"), val = tensor([0, 2, 1])]; tensor squeeze_3_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296037888))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296627776))))[name = string("squeeze_3_quantized")]; string var_977_pad_type_0 = const()[name = string("op_977_pad_type_0"), val = string("valid")]; int32 var_977_groups_0 = const()[name = string("op_977_groups_0"), val = int32(1)]; tensor var_977_strides_0 = const()[name = string("op_977_strides_0"), val = tensor([1])]; tensor var_977_pad_0 = const()[name = string("op_977_pad_0"), val = tensor([0, 0])]; tensor var_977_dilations_0 = const()[name = string("op_977_dilations_0"), val = tensor([1])]; tensor var_969 = transpose(perm = var_968, x = attn_out_21)[name = string("transpose_183")]; tensor var_977 = conv(dilations = var_977_dilations_0, groups = var_977_groups_0, pad = var_977_pad_0, pad_type = var_977_pad_type_0, strides = var_977_strides_0, weight = squeeze_3_quantized, x = var_969)[name = string("op_977")]; tensor var_978 = const()[name = string("op_978"), val = tensor([0, 2, 1])]; fp16 const_50_promoted_to_fp16 = const()[name = string("const_50_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_57 = transpose(perm = var_978, x = var_977)[name = string("transpose_182")]; tensor var_982_cast_fp16 = mul(x = x_57, y = const_50_promoted_to_fp16)[name = string("op_982_cast_fp16")]; bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; tensor input_71_cast_fp16 = concat(axis = var_23, interleave = input_71_interleave_0, values = (x_57, var_982_cast_fp16))[name = string("input_71_cast_fp16")]; tensor normed_99_axes_0 = const()[name = string("normed_99_axes_0"), val = tensor([-1])]; tensor normed_99_cast_fp16 = layer_norm(axes = normed_99_axes_0, epsilon = var_8_to_fp16, x = input_71_cast_fp16)[name = string("normed_99_cast_fp16")]; tensor var_987_split_sizes_0 = const()[name = string("op_987_split_sizes_0"), val = tensor([768, 768])]; int32 var_987_axis_0 = const()[name = string("op_987_axis_0"), val = int32(-1)]; tensor var_987_cast_fp16_0, tensor var_987_cast_fp16_1 = split(axis = var_987_axis_0, split_sizes = var_987_split_sizes_0, x = normed_99_cast_fp16)[name = string("op_987_cast_fp16")]; tensor var_991_to_fp16 = const()[name = string("op_991_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296629376)))]; tensor out_43_cast_fp16 = mul(x = var_987_cast_fp16_0, y = var_991_to_fp16)[name = string("out_43_cast_fp16")]; tensor x_59_cast_fp16 = add(x = x_49_cast_fp16, y = out_43_cast_fp16)[name = string("x_59_cast_fp16")]; fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_998_cast_fp16 = mul(x = x_59_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_998_cast_fp16")]; bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; tensor input_73_cast_fp16 = concat(axis = var_23, interleave = input_73_interleave_0, values = (x_59_cast_fp16, var_998_cast_fp16))[name = string("input_73_cast_fp16")]; tensor normed_103_axes_0 = const()[name = string("normed_103_axes_0"), val = tensor([-1])]; tensor normed_103_cast_fp16 = layer_norm(axes = normed_103_axes_0, epsilon = var_8_to_fp16, x = input_73_cast_fp16)[name = string("normed_103_cast_fp16")]; tensor var_1003_split_sizes_0 = const()[name = string("op_1003_split_sizes_0"), val = tensor([768, 768])]; int32 var_1003_axis_0 = const()[name = string("op_1003_axis_0"), val = int32(-1)]; tensor var_1003_cast_fp16_0, tensor var_1003_cast_fp16_1 = split(axis = var_1003_axis_0, split_sizes = var_1003_split_sizes_0, x = normed_103_cast_fp16)[name = string("op_1003_cast_fp16")]; tensor var_1007_to_fp16 = const()[name = string("op_1007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296630976)))]; tensor out_45_cast_fp16 = mul(x = var_1003_cast_fp16_0, y = var_1007_to_fp16)[name = string("out_45_cast_fp16")]; tensor var_1014 = const()[name = string("op_1014"), val = tensor([0, 2, 1])]; tensor input_75_axes_0 = const()[name = string("input_75_axes_0"), val = tensor([2])]; tensor var_1015 = transpose(perm = var_1014, x = out_45_cast_fp16)[name = string("transpose_181")]; tensor input_75 = expand_dims(axes = input_75_axes_0, x = var_1015)[name = string("input_75")]; string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; tensor gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor([1, 1])]; tensor gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor([1, 1])]; int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; tensor gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = encoder_layers_3_mlp_gate_proj_weight_quantized, x = input_75)[name = string("gate_13")]; string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; tensor up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor([1, 1])]; tensor up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor([1, 1])]; int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; tensor up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = encoder_layers_3_mlp_up_proj_weight_quantized, x = input_75)[name = string("up_7")]; string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; tensor input_77 = mul(x = gate_15, y = up_7)[name = string("input_77")]; string var_1036_pad_type_0 = const()[name = string("op_1036_pad_type_0"), val = string("valid")]; tensor var_1036_strides_0 = const()[name = string("op_1036_strides_0"), val = tensor([1, 1])]; tensor var_1036_pad_0 = const()[name = string("op_1036_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1036_dilations_0 = const()[name = string("op_1036_dilations_0"), val = tensor([1, 1])]; int32 var_1036_groups_0 = const()[name = string("op_1036_groups_0"), val = int32(1)]; tensor var_1036 = conv(dilations = var_1036_dilations_0, groups = var_1036_groups_0, pad = var_1036_pad_0, pad_type = var_1036_pad_type_0, strides = var_1036_strides_0, weight = encoder_layers_3_mlp_down_proj_weight_quantized, x = input_77)[name = string("op_1036")]; tensor var_1037_axes_0 = const()[name = string("op_1037_axes_0"), val = tensor([2])]; tensor var_1037 = squeeze(axes = var_1037_axes_0, x = var_1036)[name = string("op_1037")]; tensor var_1038 = const()[name = string("op_1038"), val = tensor([0, 2, 1])]; fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_63 = transpose(perm = var_1038, x = var_1037)[name = string("transpose_180")]; tensor var_1042_cast_fp16 = mul(x = x_63, y = const_54_promoted_to_fp16)[name = string("op_1042_cast_fp16")]; bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; tensor input_79_cast_fp16 = concat(axis = var_23, interleave = input_79_interleave_0, values = (x_63, var_1042_cast_fp16))[name = string("input_79_cast_fp16")]; tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_8_to_fp16, x = input_79_cast_fp16)[name = string("normed_109_cast_fp16")]; tensor var_1047_split_sizes_0 = const()[name = string("op_1047_split_sizes_0"), val = tensor([768, 768])]; int32 var_1047_axis_0 = const()[name = string("op_1047_axis_0"), val = int32(-1)]; tensor var_1047_cast_fp16_0, tensor var_1047_cast_fp16_1 = split(axis = var_1047_axis_0, split_sizes = var_1047_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_1047_cast_fp16")]; tensor var_1051_to_fp16 = const()[name = string("op_1051_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296632576)))]; tensor out_47_cast_fp16 = mul(x = var_1047_cast_fp16_0, y = var_1051_to_fp16)[name = string("out_47_cast_fp16")]; tensor x_65_cast_fp16 = add(x = x_59_cast_fp16, y = out_47_cast_fp16)[name = string("x_65_cast_fp16")]; fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1080_cast_fp16 = mul(x = x_65_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_1080_cast_fp16")]; bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; tensor input_81_cast_fp16 = concat(axis = var_23, interleave = input_81_interleave_0, values = (x_65_cast_fp16, var_1080_cast_fp16))[name = string("input_81_cast_fp16")]; tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_8_to_fp16, x = input_81_cast_fp16)[name = string("normed_113_cast_fp16")]; tensor var_1085_split_sizes_0 = const()[name = string("op_1085_split_sizes_0"), val = tensor([768, 768])]; int32 var_1085_axis_0 = const()[name = string("op_1085_axis_0"), val = int32(-1)]; tensor var_1085_cast_fp16_0, tensor var_1085_cast_fp16_1 = split(axis = var_1085_axis_0, split_sizes = var_1085_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_1085_cast_fp16")]; tensor var_1089_to_fp16 = const()[name = string("op_1089_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296634176)))]; tensor out_49_cast_fp16 = mul(x = var_1085_cast_fp16_0, y = var_1089_to_fp16)[name = string("out_49_cast_fp16")]; tensor var_1095 = const()[name = string("op_1095"), val = tensor([0, 2, 1])]; tensor var_1097_axes_0 = const()[name = string("op_1097_axes_0"), val = tensor([2])]; tensor var_1096_cast_fp16 = transpose(perm = var_1095, x = out_49_cast_fp16)[name = string("transpose_179")]; tensor var_1097_cast_fp16 = expand_dims(axes = var_1097_axes_0, x = var_1096_cast_fp16)[name = string("op_1097_cast_fp16")]; string var_1104_pad_type_0 = const()[name = string("op_1104_pad_type_0"), val = string("valid")]; tensor var_1104_strides_0 = const()[name = string("op_1104_strides_0"), val = tensor([1, 1])]; tensor var_1104_pad_0 = const()[name = string("op_1104_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1104_dilations_0 = const()[name = string("op_1104_dilations_0"), val = tensor([1, 1])]; int32 var_1104_groups_0 = const()[name = string("op_1104_groups_0"), val = int32(1)]; tensor var_1104 = conv(dilations = var_1104_dilations_0, groups = var_1104_groups_0, pad = var_1104_pad_0, pad_type = var_1104_pad_type_0, strides = var_1104_strides_0, weight = encoder_layers_4_self_attn_q_proj_weight_quantized, x = var_1097_cast_fp16)[name = string("op_1104")]; tensor var_1105 = const()[name = string("op_1105"), val = tensor([1, 3, 256, 128])]; tensor var_1106 = reshape(shape = var_1105, x = var_1104)[name = string("op_1106")]; tensor var_1107 = const()[name = string("op_1107"), val = tensor([0, 1, 3, 2])]; string var_1114_pad_type_0 = const()[name = string("op_1114_pad_type_0"), val = string("valid")]; tensor var_1114_strides_0 = const()[name = string("op_1114_strides_0"), val = tensor([1, 1])]; tensor var_1114_pad_0 = const()[name = string("op_1114_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1114_dilations_0 = const()[name = string("op_1114_dilations_0"), val = tensor([1, 1])]; int32 var_1114_groups_0 = const()[name = string("op_1114_groups_0"), val = int32(1)]; tensor var_1114 = conv(dilations = var_1114_dilations_0, groups = var_1114_groups_0, pad = var_1114_pad_0, pad_type = var_1114_pad_type_0, strides = var_1114_strides_0, weight = encoder_layers_4_self_attn_k_proj_weight_quantized, x = var_1097_cast_fp16)[name = string("op_1114")]; tensor var_1115 = const()[name = string("op_1115"), val = tensor([1, 1, 256, 128])]; tensor var_1116 = reshape(shape = var_1115, x = var_1114)[name = string("op_1116")]; tensor var_1117 = const()[name = string("op_1117"), val = tensor([0, 1, 3, 2])]; string var_1124_pad_type_0 = const()[name = string("op_1124_pad_type_0"), val = string("valid")]; tensor var_1124_strides_0 = const()[name = string("op_1124_strides_0"), val = tensor([1, 1])]; tensor var_1124_pad_0 = const()[name = string("op_1124_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1124_dilations_0 = const()[name = string("op_1124_dilations_0"), val = tensor([1, 1])]; int32 var_1124_groups_0 = const()[name = string("op_1124_groups_0"), val = int32(1)]; tensor var_1124 = conv(dilations = var_1124_dilations_0, groups = var_1124_groups_0, pad = var_1124_pad_0, pad_type = var_1124_pad_type_0, strides = var_1124_strides_0, weight = encoder_layers_4_self_attn_v_proj_weight_quantized, x = var_1097_cast_fp16)[name = string("op_1124")]; tensor var_1125 = const()[name = string("op_1125"), val = tensor([1, 1, 256, 128])]; tensor var_1126 = reshape(shape = var_1125, x = var_1124)[name = string("op_1126")]; tensor var_1127 = const()[name = string("op_1127"), val = tensor([0, 1, 3, 2])]; fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_25 = transpose(perm = var_1107, x = var_1106)[name = string("transpose_178")]; tensor var_1133_cast_fp16 = mul(x = q_25, y = const_58_promoted_to_fp16)[name = string("op_1133_cast_fp16")]; bool input_85_interleave_0 = const()[name = string("input_85_interleave_0"), val = bool(false)]; tensor input_85_cast_fp16 = concat(axis = var_23, interleave = input_85_interleave_0, values = (q_25, var_1133_cast_fp16))[name = string("input_85_cast_fp16")]; tensor normed_119_axes_0 = const()[name = string("normed_119_axes_0"), val = tensor([-1])]; tensor normed_119_cast_fp16 = layer_norm(axes = normed_119_axes_0, epsilon = var_8_to_fp16, x = input_85_cast_fp16)[name = string("normed_119_cast_fp16")]; tensor var_1138_split_sizes_0 = const()[name = string("op_1138_split_sizes_0"), val = tensor([256, 256])]; int32 var_1138_axis_0 = const()[name = string("op_1138_axis_0"), val = int32(-1)]; tensor var_1138_cast_fp16_0, tensor var_1138_cast_fp16_1 = split(axis = var_1138_axis_0, split_sizes = var_1138_split_sizes_0, x = normed_119_cast_fp16)[name = string("op_1138_cast_fp16")]; tensor var_1142_to_fp16 = const()[name = string("op_1142_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296635776)))]; tensor out_51_cast_fp16 = mul(x = var_1138_cast_fp16_0, y = var_1142_to_fp16)[name = string("out_51_cast_fp16")]; fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_25 = transpose(perm = var_1117, x = var_1116)[name = string("transpose_177")]; tensor var_1149_cast_fp16 = mul(x = k_25, y = const_60_promoted_to_fp16)[name = string("op_1149_cast_fp16")]; bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)]; tensor input_87_cast_fp16 = concat(axis = var_23, interleave = input_87_interleave_0, values = (k_25, var_1149_cast_fp16))[name = string("input_87_cast_fp16")]; tensor normed_123_axes_0 = const()[name = string("normed_123_axes_0"), val = tensor([-1])]; tensor normed_123_cast_fp16 = layer_norm(axes = normed_123_axes_0, epsilon = var_8_to_fp16, x = input_87_cast_fp16)[name = string("normed_123_cast_fp16")]; tensor var_1154_split_sizes_0 = const()[name = string("op_1154_split_sizes_0"), val = tensor([256, 256])]; int32 var_1154_axis_0 = const()[name = string("op_1154_axis_0"), val = int32(-1)]; tensor var_1154_cast_fp16_0, tensor var_1154_cast_fp16_1 = split(axis = var_1154_axis_0, split_sizes = var_1154_split_sizes_0, x = normed_123_cast_fp16)[name = string("op_1154_cast_fp16")]; tensor var_1158_to_fp16 = const()[name = string("op_1158_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296636352)))]; tensor out_53_cast_fp16 = mul(x = var_1154_cast_fp16_0, y = var_1158_to_fp16)[name = string("out_53_cast_fp16")]; tensor var_1161 = mul(x = out_51_cast_fp16, y = cos_1_quantized)[name = string("op_1161")]; tensor var_1162_split_sizes_0 = const()[name = string("op_1162_split_sizes_0"), val = tensor([128, 128])]; int32 var_1162_axis_0 = const()[name = string("op_1162_axis_0"), val = int32(-1)]; tensor var_1162_0, tensor var_1162_1 = split(axis = var_1162_axis_0, split_sizes = var_1162_split_sizes_0, x = out_51_cast_fp16)[name = string("op_1162")]; fp16 const_62_promoted = const()[name = string("const_62_promoted"), val = fp16(-0x1p+0)]; tensor var_1164 = mul(x = var_1162_1, y = const_62_promoted)[name = string("op_1164")]; bool var_1166_interleave_0 = const()[name = string("op_1166_interleave_0"), val = bool(false)]; tensor var_1166 = concat(axis = var_23, interleave = var_1166_interleave_0, values = (var_1164, var_1162_0))[name = string("op_1166")]; tensor var_1167 = mul(x = var_1166, y = sin_1_quantized)[name = string("op_1167")]; tensor q_29 = add(x = var_1161, y = var_1167)[name = string("q_29")]; tensor var_1169 = mul(x = out_53_cast_fp16, y = cos_1_quantized)[name = string("op_1169")]; tensor var_1170_split_sizes_0 = const()[name = string("op_1170_split_sizes_0"), val = tensor([128, 128])]; int32 var_1170_axis_0 = const()[name = string("op_1170_axis_0"), val = int32(-1)]; tensor var_1170_0, tensor var_1170_1 = split(axis = var_1170_axis_0, split_sizes = var_1170_split_sizes_0, x = out_53_cast_fp16)[name = string("op_1170")]; fp16 const_63_promoted = const()[name = string("const_63_promoted"), val = fp16(-0x1p+0)]; tensor var_1172 = mul(x = var_1170_1, y = const_63_promoted)[name = string("op_1172")]; bool var_1174_interleave_0 = const()[name = string("op_1174_interleave_0"), val = bool(false)]; tensor var_1174 = concat(axis = var_23, interleave = var_1174_interleave_0, values = (var_1172, var_1170_0))[name = string("op_1174")]; tensor var_1175 = mul(x = var_1174, y = sin_1_quantized)[name = string("op_1175")]; tensor hidden_states_49 = add(x = var_1169, y = var_1175)[name = string("hidden_states_49")]; tensor hidden_states_51_axes_0 = const()[name = string("hidden_states_51_axes_0"), val = tensor([2])]; tensor hidden_states_51 = expand_dims(axes = hidden_states_51_axes_0, x = hidden_states_49)[name = string("hidden_states_51")]; tensor var_1178 = const()[name = string("op_1178"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_53 = tile(reps = var_1178, x = hidden_states_51)[name = string("hidden_states_53")]; tensor var_1180 = const()[name = string("op_1180"), val = tensor([1, 3, 128, 256])]; tensor k_29 = reshape(shape = var_1180, x = hidden_states_53)[name = string("k_29")]; tensor hidden_states_57_axes_0 = const()[name = string("hidden_states_57_axes_0"), val = tensor([2])]; tensor hidden_states_55 = transpose(perm = var_1127, x = var_1126)[name = string("transpose_176")]; tensor hidden_states_57 = expand_dims(axes = hidden_states_57_axes_0, x = hidden_states_55)[name = string("hidden_states_57")]; tensor var_1183 = const()[name = string("op_1183"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_59 = tile(reps = var_1183, x = hidden_states_57)[name = string("hidden_states_59")]; tensor var_1185 = const()[name = string("op_1185"), val = tensor([1, 3, 128, 256])]; tensor v_9 = reshape(shape = var_1185, x = hidden_states_59)[name = string("v_9")]; bool var_1190_transpose_x_1 = const()[name = string("op_1190_transpose_x_1"), val = bool(false)]; bool var_1190_transpose_y_1 = const()[name = string("op_1190_transpose_y_1"), val = bool(true)]; tensor var_1190_cast_fp16 = matmul(transpose_x = var_1190_transpose_x_1, transpose_y = var_1190_transpose_y_1, x = q_29, y = k_29)[name = string("op_1190_cast_fp16")]; fp16 var_1191_to_fp16 = const()[name = string("op_1191_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_25_cast_fp16 = mul(x = var_1190_cast_fp16, y = var_1191_to_fp16)[name = string("attn_weights_25_cast_fp16")]; tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_27_cast_fp16")]; tensor var_1195_cast_fp16 = softmax(axis = var_23, x = attn_weights_27_cast_fp16)[name = string("op_1195_cast_fp16")]; bool var_1199_transpose_x_0 = const()[name = string("op_1199_transpose_x_0"), val = bool(false)]; bool var_1199_transpose_y_0 = const()[name = string("op_1199_transpose_y_0"), val = bool(false)]; tensor var_1199_cast_fp16 = matmul(transpose_x = var_1199_transpose_x_0, transpose_y = var_1199_transpose_y_0, x = var_1195_cast_fp16, y = v_9)[name = string("op_1199_cast_fp16")]; tensor var_1201 = const()[name = string("op_1201"), val = tensor([0, 2, 1, 3])]; tensor var_1204 = const()[name = string("op_1204"), val = tensor([1, 128, 768])]; tensor var_1202 = transpose(perm = var_1201, x = var_1199_cast_fp16)[name = string("transpose_175")]; tensor attn_out_27 = reshape(shape = var_1204, x = var_1202)[name = string("attn_out_27")]; tensor var_1206 = const()[name = string("op_1206"), val = tensor([0, 2, 1])]; tensor squeeze_4_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296636928))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297226816))))[name = string("squeeze_4_quantized")]; string var_1215_pad_type_0 = const()[name = string("op_1215_pad_type_0"), val = string("valid")]; int32 var_1215_groups_0 = const()[name = string("op_1215_groups_0"), val = int32(1)]; tensor var_1215_strides_0 = const()[name = string("op_1215_strides_0"), val = tensor([1])]; tensor var_1215_pad_0 = const()[name = string("op_1215_pad_0"), val = tensor([0, 0])]; tensor var_1215_dilations_0 = const()[name = string("op_1215_dilations_0"), val = tensor([1])]; tensor var_1207 = transpose(perm = var_1206, x = attn_out_27)[name = string("transpose_174")]; tensor var_1215 = conv(dilations = var_1215_dilations_0, groups = var_1215_groups_0, pad = var_1215_pad_0, pad_type = var_1215_pad_type_0, strides = var_1215_strides_0, weight = squeeze_4_quantized, x = var_1207)[name = string("op_1215")]; tensor var_1216 = const()[name = string("op_1216"), val = tensor([0, 2, 1])]; fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_73 = transpose(perm = var_1216, x = var_1215)[name = string("transpose_173")]; tensor var_1220_cast_fp16 = mul(x = x_73, y = const_64_promoted_to_fp16)[name = string("op_1220_cast_fp16")]; bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; tensor input_91_cast_fp16 = concat(axis = var_23, interleave = input_91_interleave_0, values = (x_73, var_1220_cast_fp16))[name = string("input_91_cast_fp16")]; tensor normed_127_axes_0 = const()[name = string("normed_127_axes_0"), val = tensor([-1])]; tensor normed_127_cast_fp16 = layer_norm(axes = normed_127_axes_0, epsilon = var_8_to_fp16, x = input_91_cast_fp16)[name = string("normed_127_cast_fp16")]; tensor var_1225_split_sizes_0 = const()[name = string("op_1225_split_sizes_0"), val = tensor([768, 768])]; int32 var_1225_axis_0 = const()[name = string("op_1225_axis_0"), val = int32(-1)]; tensor var_1225_cast_fp16_0, tensor var_1225_cast_fp16_1 = split(axis = var_1225_axis_0, split_sizes = var_1225_split_sizes_0, x = normed_127_cast_fp16)[name = string("op_1225_cast_fp16")]; tensor var_1229_to_fp16 = const()[name = string("op_1229_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297228416)))]; tensor out_55_cast_fp16 = mul(x = var_1225_cast_fp16_0, y = var_1229_to_fp16)[name = string("out_55_cast_fp16")]; tensor x_75_cast_fp16 = add(x = x_65_cast_fp16, y = out_55_cast_fp16)[name = string("x_75_cast_fp16")]; fp16 const_66_promoted_to_fp16 = const()[name = string("const_66_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1236_cast_fp16 = mul(x = x_75_cast_fp16, y = const_66_promoted_to_fp16)[name = string("op_1236_cast_fp16")]; bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)]; tensor input_93_cast_fp16 = concat(axis = var_23, interleave = input_93_interleave_0, values = (x_75_cast_fp16, var_1236_cast_fp16))[name = string("input_93_cast_fp16")]; tensor normed_131_axes_0 = const()[name = string("normed_131_axes_0"), val = tensor([-1])]; tensor normed_131_cast_fp16 = layer_norm(axes = normed_131_axes_0, epsilon = var_8_to_fp16, x = input_93_cast_fp16)[name = string("normed_131_cast_fp16")]; tensor var_1241_split_sizes_0 = const()[name = string("op_1241_split_sizes_0"), val = tensor([768, 768])]; int32 var_1241_axis_0 = const()[name = string("op_1241_axis_0"), val = int32(-1)]; tensor var_1241_cast_fp16_0, tensor var_1241_cast_fp16_1 = split(axis = var_1241_axis_0, split_sizes = var_1241_split_sizes_0, x = normed_131_cast_fp16)[name = string("op_1241_cast_fp16")]; tensor var_1245_to_fp16 = const()[name = string("op_1245_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297230016)))]; tensor out_57_cast_fp16 = mul(x = var_1241_cast_fp16_0, y = var_1245_to_fp16)[name = string("out_57_cast_fp16")]; tensor var_1252 = const()[name = string("op_1252"), val = tensor([0, 2, 1])]; tensor input_95_axes_0 = const()[name = string("input_95_axes_0"), val = tensor([2])]; tensor var_1253 = transpose(perm = var_1252, x = out_57_cast_fp16)[name = string("transpose_172")]; tensor input_95 = expand_dims(axes = input_95_axes_0, x = var_1253)[name = string("input_95")]; string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; tensor gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor([1, 1])]; tensor gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor([1, 1])]; int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; tensor gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = encoder_layers_4_mlp_gate_proj_weight_quantized, x = input_95)[name = string("gate_17")]; string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; tensor up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor([1, 1])]; tensor up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor([1, 1])]; int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; tensor up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = encoder_layers_4_mlp_up_proj_weight_quantized, x = input_95)[name = string("up_9")]; string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; tensor input_97 = mul(x = gate_19, y = up_9)[name = string("input_97")]; string var_1274_pad_type_0 = const()[name = string("op_1274_pad_type_0"), val = string("valid")]; tensor var_1274_strides_0 = const()[name = string("op_1274_strides_0"), val = tensor([1, 1])]; tensor var_1274_pad_0 = const()[name = string("op_1274_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1274_dilations_0 = const()[name = string("op_1274_dilations_0"), val = tensor([1, 1])]; int32 var_1274_groups_0 = const()[name = string("op_1274_groups_0"), val = int32(1)]; tensor var_1274 = conv(dilations = var_1274_dilations_0, groups = var_1274_groups_0, pad = var_1274_pad_0, pad_type = var_1274_pad_type_0, strides = var_1274_strides_0, weight = encoder_layers_4_mlp_down_proj_weight_quantized, x = input_97)[name = string("op_1274")]; tensor var_1275_axes_0 = const()[name = string("op_1275_axes_0"), val = tensor([2])]; tensor var_1275 = squeeze(axes = var_1275_axes_0, x = var_1274)[name = string("op_1275")]; tensor var_1276 = const()[name = string("op_1276"), val = tensor([0, 2, 1])]; fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_79 = transpose(perm = var_1276, x = var_1275)[name = string("transpose_171")]; tensor var_1280_cast_fp16 = mul(x = x_79, y = const_68_promoted_to_fp16)[name = string("op_1280_cast_fp16")]; bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; tensor input_99_cast_fp16 = concat(axis = var_23, interleave = input_99_interleave_0, values = (x_79, var_1280_cast_fp16))[name = string("input_99_cast_fp16")]; tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_8_to_fp16, x = input_99_cast_fp16)[name = string("normed_137_cast_fp16")]; tensor var_1285_split_sizes_0 = const()[name = string("op_1285_split_sizes_0"), val = tensor([768, 768])]; int32 var_1285_axis_0 = const()[name = string("op_1285_axis_0"), val = int32(-1)]; tensor var_1285_cast_fp16_0, tensor var_1285_cast_fp16_1 = split(axis = var_1285_axis_0, split_sizes = var_1285_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_1285_cast_fp16")]; tensor var_1289_to_fp16 = const()[name = string("op_1289_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297231616)))]; tensor out_59_cast_fp16 = mul(x = var_1285_cast_fp16_0, y = var_1289_to_fp16)[name = string("out_59_cast_fp16")]; tensor x_81_cast_fp16 = add(x = x_75_cast_fp16, y = out_59_cast_fp16)[name = string("x_81_cast_fp16")]; fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1318_cast_fp16 = mul(x = x_81_cast_fp16, y = const_70_promoted_to_fp16)[name = string("op_1318_cast_fp16")]; bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; tensor input_101_cast_fp16 = concat(axis = var_23, interleave = input_101_interleave_0, values = (x_81_cast_fp16, var_1318_cast_fp16))[name = string("input_101_cast_fp16")]; tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_8_to_fp16, x = input_101_cast_fp16)[name = string("normed_141_cast_fp16")]; tensor var_1323_split_sizes_0 = const()[name = string("op_1323_split_sizes_0"), val = tensor([768, 768])]; int32 var_1323_axis_0 = const()[name = string("op_1323_axis_0"), val = int32(-1)]; tensor var_1323_cast_fp16_0, tensor var_1323_cast_fp16_1 = split(axis = var_1323_axis_0, split_sizes = var_1323_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_1323_cast_fp16")]; tensor var_1327_to_fp16 = const()[name = string("op_1327_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297233216)))]; tensor out_61_cast_fp16 = mul(x = var_1323_cast_fp16_0, y = var_1327_to_fp16)[name = string("out_61_cast_fp16")]; tensor var_1333 = const()[name = string("op_1333"), val = tensor([0, 2, 1])]; tensor var_1335_axes_0 = const()[name = string("op_1335_axes_0"), val = tensor([2])]; tensor var_1334_cast_fp16 = transpose(perm = var_1333, x = out_61_cast_fp16)[name = string("transpose_170")]; tensor var_1335_cast_fp16 = expand_dims(axes = var_1335_axes_0, x = var_1334_cast_fp16)[name = string("op_1335_cast_fp16")]; string var_1342_pad_type_0 = const()[name = string("op_1342_pad_type_0"), val = string("valid")]; tensor var_1342_strides_0 = const()[name = string("op_1342_strides_0"), val = tensor([1, 1])]; tensor var_1342_pad_0 = const()[name = string("op_1342_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1342_dilations_0 = const()[name = string("op_1342_dilations_0"), val = tensor([1, 1])]; int32 var_1342_groups_0 = const()[name = string("op_1342_groups_0"), val = int32(1)]; tensor var_1342 = conv(dilations = var_1342_dilations_0, groups = var_1342_groups_0, pad = var_1342_pad_0, pad_type = var_1342_pad_type_0, strides = var_1342_strides_0, weight = encoder_layers_5_self_attn_q_proj_weight_quantized, x = var_1335_cast_fp16)[name = string("op_1342")]; tensor var_1343 = const()[name = string("op_1343"), val = tensor([1, 3, 256, 128])]; tensor var_1344 = reshape(shape = var_1343, x = var_1342)[name = string("op_1344")]; tensor var_1345 = const()[name = string("op_1345"), val = tensor([0, 1, 3, 2])]; string var_1352_pad_type_0 = const()[name = string("op_1352_pad_type_0"), val = string("valid")]; tensor var_1352_strides_0 = const()[name = string("op_1352_strides_0"), val = tensor([1, 1])]; tensor var_1352_pad_0 = const()[name = string("op_1352_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1352_dilations_0 = const()[name = string("op_1352_dilations_0"), val = tensor([1, 1])]; int32 var_1352_groups_0 = const()[name = string("op_1352_groups_0"), val = int32(1)]; tensor var_1352 = conv(dilations = var_1352_dilations_0, groups = var_1352_groups_0, pad = var_1352_pad_0, pad_type = var_1352_pad_type_0, strides = var_1352_strides_0, weight = encoder_layers_5_self_attn_k_proj_weight_quantized, x = var_1335_cast_fp16)[name = string("op_1352")]; tensor var_1353 = const()[name = string("op_1353"), val = tensor([1, 1, 256, 128])]; tensor var_1354 = reshape(shape = var_1353, x = var_1352)[name = string("op_1354")]; tensor var_1355 = const()[name = string("op_1355"), val = tensor([0, 1, 3, 2])]; string var_1362_pad_type_0 = const()[name = string("op_1362_pad_type_0"), val = string("valid")]; tensor var_1362_strides_0 = const()[name = string("op_1362_strides_0"), val = tensor([1, 1])]; tensor var_1362_pad_0 = const()[name = string("op_1362_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1362_dilations_0 = const()[name = string("op_1362_dilations_0"), val = tensor([1, 1])]; int32 var_1362_groups_0 = const()[name = string("op_1362_groups_0"), val = int32(1)]; tensor var_1362 = conv(dilations = var_1362_dilations_0, groups = var_1362_groups_0, pad = var_1362_pad_0, pad_type = var_1362_pad_type_0, strides = var_1362_strides_0, weight = encoder_layers_5_self_attn_v_proj_weight_quantized, x = var_1335_cast_fp16)[name = string("op_1362")]; tensor var_1363 = const()[name = string("op_1363"), val = tensor([1, 1, 256, 128])]; tensor var_1364 = reshape(shape = var_1363, x = var_1362)[name = string("op_1364")]; tensor var_1365 = const()[name = string("op_1365"), val = tensor([0, 1, 3, 2])]; fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_31 = transpose(perm = var_1345, x = var_1344)[name = string("transpose_169")]; tensor var_1371_cast_fp16 = mul(x = q_31, y = const_72_promoted_to_fp16)[name = string("op_1371_cast_fp16")]; bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; tensor input_105_cast_fp16 = concat(axis = var_23, interleave = input_105_interleave_0, values = (q_31, var_1371_cast_fp16))[name = string("input_105_cast_fp16")]; tensor normed_147_axes_0 = const()[name = string("normed_147_axes_0"), val = tensor([-1])]; tensor normed_147_cast_fp16 = layer_norm(axes = normed_147_axes_0, epsilon = var_8_to_fp16, x = input_105_cast_fp16)[name = string("normed_147_cast_fp16")]; tensor var_1376_split_sizes_0 = const()[name = string("op_1376_split_sizes_0"), val = tensor([256, 256])]; int32 var_1376_axis_0 = const()[name = string("op_1376_axis_0"), val = int32(-1)]; tensor var_1376_cast_fp16_0, tensor var_1376_cast_fp16_1 = split(axis = var_1376_axis_0, split_sizes = var_1376_split_sizes_0, x = normed_147_cast_fp16)[name = string("op_1376_cast_fp16")]; tensor var_1380_to_fp16 = const()[name = string("op_1380_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297234816)))]; tensor out_63_cast_fp16 = mul(x = var_1376_cast_fp16_0, y = var_1380_to_fp16)[name = string("out_63_cast_fp16")]; fp16 const_74_promoted_to_fp16 = const()[name = string("const_74_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_31 = transpose(perm = var_1355, x = var_1354)[name = string("transpose_168")]; tensor var_1387_cast_fp16 = mul(x = k_31, y = const_74_promoted_to_fp16)[name = string("op_1387_cast_fp16")]; bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; tensor input_107_cast_fp16 = concat(axis = var_23, interleave = input_107_interleave_0, values = (k_31, var_1387_cast_fp16))[name = string("input_107_cast_fp16")]; tensor normed_151_axes_0 = const()[name = string("normed_151_axes_0"), val = tensor([-1])]; tensor normed_151_cast_fp16 = layer_norm(axes = normed_151_axes_0, epsilon = var_8_to_fp16, x = input_107_cast_fp16)[name = string("normed_151_cast_fp16")]; tensor var_1392_split_sizes_0 = const()[name = string("op_1392_split_sizes_0"), val = tensor([256, 256])]; int32 var_1392_axis_0 = const()[name = string("op_1392_axis_0"), val = int32(-1)]; tensor var_1392_cast_fp16_0, tensor var_1392_cast_fp16_1 = split(axis = var_1392_axis_0, split_sizes = var_1392_split_sizes_0, x = normed_151_cast_fp16)[name = string("op_1392_cast_fp16")]; tensor var_1396_to_fp16 = const()[name = string("op_1396_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297235392)))]; tensor out_65_cast_fp16 = mul(x = var_1392_cast_fp16_0, y = var_1396_to_fp16)[name = string("out_65_cast_fp16")]; tensor var_1399 = mul(x = out_63_cast_fp16, y = cos_quantized)[name = string("op_1399")]; tensor var_1400_split_sizes_0 = const()[name = string("op_1400_split_sizes_0"), val = tensor([128, 128])]; int32 var_1400_axis_0 = const()[name = string("op_1400_axis_0"), val = int32(-1)]; tensor var_1400_0, tensor var_1400_1 = split(axis = var_1400_axis_0, split_sizes = var_1400_split_sizes_0, x = out_63_cast_fp16)[name = string("op_1400")]; fp16 const_76_promoted = const()[name = string("const_76_promoted"), val = fp16(-0x1p+0)]; tensor var_1402 = mul(x = var_1400_1, y = const_76_promoted)[name = string("op_1402")]; bool var_1404_interleave_0 = const()[name = string("op_1404_interleave_0"), val = bool(false)]; tensor var_1404 = concat(axis = var_23, interleave = var_1404_interleave_0, values = (var_1402, var_1400_0))[name = string("op_1404")]; tensor var_1405 = mul(x = var_1404, y = sin_quantized)[name = string("op_1405")]; tensor q_35 = add(x = var_1399, y = var_1405)[name = string("q_35")]; tensor var_1407 = mul(x = out_65_cast_fp16, y = cos_quantized)[name = string("op_1407")]; tensor var_1408_split_sizes_0 = const()[name = string("op_1408_split_sizes_0"), val = tensor([128, 128])]; int32 var_1408_axis_0 = const()[name = string("op_1408_axis_0"), val = int32(-1)]; tensor var_1408_0, tensor var_1408_1 = split(axis = var_1408_axis_0, split_sizes = var_1408_split_sizes_0, x = out_65_cast_fp16)[name = string("op_1408")]; fp16 const_77_promoted = const()[name = string("const_77_promoted"), val = fp16(-0x1p+0)]; tensor var_1410 = mul(x = var_1408_1, y = const_77_promoted)[name = string("op_1410")]; bool var_1412_interleave_0 = const()[name = string("op_1412_interleave_0"), val = bool(false)]; tensor var_1412 = concat(axis = var_23, interleave = var_1412_interleave_0, values = (var_1410, var_1408_0))[name = string("op_1412")]; tensor var_1413 = mul(x = var_1412, y = sin_quantized)[name = string("op_1413")]; tensor hidden_states_61 = add(x = var_1407, y = var_1413)[name = string("hidden_states_61")]; tensor hidden_states_63_axes_0 = const()[name = string("hidden_states_63_axes_0"), val = tensor([2])]; tensor hidden_states_63 = expand_dims(axes = hidden_states_63_axes_0, x = hidden_states_61)[name = string("hidden_states_63")]; tensor var_1416 = const()[name = string("op_1416"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_65 = tile(reps = var_1416, x = hidden_states_63)[name = string("hidden_states_65")]; tensor var_1418 = const()[name = string("op_1418"), val = tensor([1, 3, 128, 256])]; tensor k_35 = reshape(shape = var_1418, x = hidden_states_65)[name = string("k_35")]; tensor hidden_states_69_axes_0 = const()[name = string("hidden_states_69_axes_0"), val = tensor([2])]; tensor hidden_states_67 = transpose(perm = var_1365, x = var_1364)[name = string("transpose_167")]; tensor hidden_states_69 = expand_dims(axes = hidden_states_69_axes_0, x = hidden_states_67)[name = string("hidden_states_69")]; tensor var_1421 = const()[name = string("op_1421"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_71 = tile(reps = var_1421, x = hidden_states_69)[name = string("hidden_states_71")]; tensor var_1423 = const()[name = string("op_1423"), val = tensor([1, 3, 128, 256])]; tensor v_11 = reshape(shape = var_1423, x = hidden_states_71)[name = string("v_11")]; bool var_1428_transpose_x_1 = const()[name = string("op_1428_transpose_x_1"), val = bool(false)]; bool var_1428_transpose_y_1 = const()[name = string("op_1428_transpose_y_1"), val = bool(true)]; tensor var_1428_cast_fp16 = matmul(transpose_x = var_1428_transpose_x_1, transpose_y = var_1428_transpose_y_1, x = q_35, y = k_35)[name = string("op_1428_cast_fp16")]; fp16 var_1429_to_fp16 = const()[name = string("op_1429_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_31_cast_fp16 = mul(x = var_1428_cast_fp16, y = var_1429_to_fp16)[name = string("attn_weights_31_cast_fp16")]; tensor attn_weights_33_cast_fp16 = add(x = attn_weights_31_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_33_cast_fp16")]; tensor var_1433_cast_fp16 = softmax(axis = var_23, x = attn_weights_33_cast_fp16)[name = string("op_1433_cast_fp16")]; bool var_1437_transpose_x_0 = const()[name = string("op_1437_transpose_x_0"), val = bool(false)]; bool var_1437_transpose_y_0 = const()[name = string("op_1437_transpose_y_0"), val = bool(false)]; tensor var_1437_cast_fp16 = matmul(transpose_x = var_1437_transpose_x_0, transpose_y = var_1437_transpose_y_0, x = var_1433_cast_fp16, y = v_11)[name = string("op_1437_cast_fp16")]; tensor var_1439 = const()[name = string("op_1439"), val = tensor([0, 2, 1, 3])]; tensor var_1442 = const()[name = string("op_1442"), val = tensor([1, 128, 768])]; tensor var_1440 = transpose(perm = var_1439, x = var_1437_cast_fp16)[name = string("transpose_166")]; tensor attn_out_33 = reshape(shape = var_1442, x = var_1440)[name = string("attn_out_33")]; tensor var_1444 = const()[name = string("op_1444"), val = tensor([0, 2, 1])]; tensor squeeze_5_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297235968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297825856))))[name = string("squeeze_5_quantized")]; string var_1453_pad_type_0 = const()[name = string("op_1453_pad_type_0"), val = string("valid")]; int32 var_1453_groups_0 = const()[name = string("op_1453_groups_0"), val = int32(1)]; tensor var_1453_strides_0 = const()[name = string("op_1453_strides_0"), val = tensor([1])]; tensor var_1453_pad_0 = const()[name = string("op_1453_pad_0"), val = tensor([0, 0])]; tensor var_1453_dilations_0 = const()[name = string("op_1453_dilations_0"), val = tensor([1])]; tensor var_1445 = transpose(perm = var_1444, x = attn_out_33)[name = string("transpose_165")]; tensor var_1453 = conv(dilations = var_1453_dilations_0, groups = var_1453_groups_0, pad = var_1453_pad_0, pad_type = var_1453_pad_type_0, strides = var_1453_strides_0, weight = squeeze_5_quantized, x = var_1445)[name = string("op_1453")]; tensor var_1454 = const()[name = string("op_1454"), val = tensor([0, 2, 1])]; fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_89 = transpose(perm = var_1454, x = var_1453)[name = string("transpose_164")]; tensor var_1458_cast_fp16 = mul(x = x_89, y = const_78_promoted_to_fp16)[name = string("op_1458_cast_fp16")]; bool input_111_interleave_0 = const()[name = string("input_111_interleave_0"), val = bool(false)]; tensor input_111_cast_fp16 = concat(axis = var_23, interleave = input_111_interleave_0, values = (x_89, var_1458_cast_fp16))[name = string("input_111_cast_fp16")]; tensor normed_155_axes_0 = const()[name = string("normed_155_axes_0"), val = tensor([-1])]; tensor normed_155_cast_fp16 = layer_norm(axes = normed_155_axes_0, epsilon = var_8_to_fp16, x = input_111_cast_fp16)[name = string("normed_155_cast_fp16")]; tensor var_1463_split_sizes_0 = const()[name = string("op_1463_split_sizes_0"), val = tensor([768, 768])]; int32 var_1463_axis_0 = const()[name = string("op_1463_axis_0"), val = int32(-1)]; tensor var_1463_cast_fp16_0, tensor var_1463_cast_fp16_1 = split(axis = var_1463_axis_0, split_sizes = var_1463_split_sizes_0, x = normed_155_cast_fp16)[name = string("op_1463_cast_fp16")]; tensor var_1467_to_fp16 = const()[name = string("op_1467_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297827456)))]; tensor out_67_cast_fp16 = mul(x = var_1463_cast_fp16_0, y = var_1467_to_fp16)[name = string("out_67_cast_fp16")]; tensor x_91_cast_fp16 = add(x = x_81_cast_fp16, y = out_67_cast_fp16)[name = string("x_91_cast_fp16")]; fp16 const_80_promoted_to_fp16 = const()[name = string("const_80_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1474_cast_fp16 = mul(x = x_91_cast_fp16, y = const_80_promoted_to_fp16)[name = string("op_1474_cast_fp16")]; bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; tensor input_113_cast_fp16 = concat(axis = var_23, interleave = input_113_interleave_0, values = (x_91_cast_fp16, var_1474_cast_fp16))[name = string("input_113_cast_fp16")]; tensor normed_159_axes_0 = const()[name = string("normed_159_axes_0"), val = tensor([-1])]; tensor normed_159_cast_fp16 = layer_norm(axes = normed_159_axes_0, epsilon = var_8_to_fp16, x = input_113_cast_fp16)[name = string("normed_159_cast_fp16")]; tensor var_1479_split_sizes_0 = const()[name = string("op_1479_split_sizes_0"), val = tensor([768, 768])]; int32 var_1479_axis_0 = const()[name = string("op_1479_axis_0"), val = int32(-1)]; tensor var_1479_cast_fp16_0, tensor var_1479_cast_fp16_1 = split(axis = var_1479_axis_0, split_sizes = var_1479_split_sizes_0, x = normed_159_cast_fp16)[name = string("op_1479_cast_fp16")]; tensor var_1483_to_fp16 = const()[name = string("op_1483_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297829056)))]; tensor out_69_cast_fp16 = mul(x = var_1479_cast_fp16_0, y = var_1483_to_fp16)[name = string("out_69_cast_fp16")]; tensor var_1490 = const()[name = string("op_1490"), val = tensor([0, 2, 1])]; tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; tensor var_1491 = transpose(perm = var_1490, x = out_69_cast_fp16)[name = string("transpose_163")]; tensor input_115 = expand_dims(axes = input_115_axes_0, x = var_1491)[name = string("input_115")]; string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; tensor gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor([1, 1])]; tensor gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor([1, 1])]; int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; tensor gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = encoder_layers_5_mlp_gate_proj_weight_quantized, x = input_115)[name = string("gate_21")]; string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; tensor up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor([1, 1])]; tensor up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor([1, 1])]; int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; tensor up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = encoder_layers_5_mlp_up_proj_weight_quantized, x = input_115)[name = string("up_11")]; string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; tensor input_117 = mul(x = gate_23, y = up_11)[name = string("input_117")]; string var_1512_pad_type_0 = const()[name = string("op_1512_pad_type_0"), val = string("valid")]; tensor var_1512_strides_0 = const()[name = string("op_1512_strides_0"), val = tensor([1, 1])]; tensor var_1512_pad_0 = const()[name = string("op_1512_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1512_dilations_0 = const()[name = string("op_1512_dilations_0"), val = tensor([1, 1])]; int32 var_1512_groups_0 = const()[name = string("op_1512_groups_0"), val = int32(1)]; tensor var_1512 = conv(dilations = var_1512_dilations_0, groups = var_1512_groups_0, pad = var_1512_pad_0, pad_type = var_1512_pad_type_0, strides = var_1512_strides_0, weight = encoder_layers_5_mlp_down_proj_weight_quantized, x = input_117)[name = string("op_1512")]; tensor var_1513_axes_0 = const()[name = string("op_1513_axes_0"), val = tensor([2])]; tensor var_1513 = squeeze(axes = var_1513_axes_0, x = var_1512)[name = string("op_1513")]; tensor var_1514 = const()[name = string("op_1514"), val = tensor([0, 2, 1])]; fp16 const_82_promoted_to_fp16 = const()[name = string("const_82_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_95 = transpose(perm = var_1514, x = var_1513)[name = string("transpose_162")]; tensor var_1518_cast_fp16 = mul(x = x_95, y = const_82_promoted_to_fp16)[name = string("op_1518_cast_fp16")]; bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; tensor input_119_cast_fp16 = concat(axis = var_23, interleave = input_119_interleave_0, values = (x_95, var_1518_cast_fp16))[name = string("input_119_cast_fp16")]; tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_8_to_fp16, x = input_119_cast_fp16)[name = string("normed_165_cast_fp16")]; tensor var_1523_split_sizes_0 = const()[name = string("op_1523_split_sizes_0"), val = tensor([768, 768])]; int32 var_1523_axis_0 = const()[name = string("op_1523_axis_0"), val = int32(-1)]; tensor var_1523_cast_fp16_0, tensor var_1523_cast_fp16_1 = split(axis = var_1523_axis_0, split_sizes = var_1523_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_1523_cast_fp16")]; tensor var_1527_to_fp16 = const()[name = string("op_1527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297830656)))]; tensor out_71_cast_fp16 = mul(x = var_1523_cast_fp16_0, y = var_1527_to_fp16)[name = string("out_71_cast_fp16")]; tensor x_97_cast_fp16 = add(x = x_91_cast_fp16, y = out_71_cast_fp16)[name = string("x_97_cast_fp16")]; fp16 const_84_promoted_to_fp16 = const()[name = string("const_84_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1556_cast_fp16 = mul(x = x_97_cast_fp16, y = const_84_promoted_to_fp16)[name = string("op_1556_cast_fp16")]; bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; tensor input_121_cast_fp16 = concat(axis = var_23, interleave = input_121_interleave_0, values = (x_97_cast_fp16, var_1556_cast_fp16))[name = string("input_121_cast_fp16")]; tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_8_to_fp16, x = input_121_cast_fp16)[name = string("normed_169_cast_fp16")]; tensor var_1561_split_sizes_0 = const()[name = string("op_1561_split_sizes_0"), val = tensor([768, 768])]; int32 var_1561_axis_0 = const()[name = string("op_1561_axis_0"), val = int32(-1)]; tensor var_1561_cast_fp16_0, tensor var_1561_cast_fp16_1 = split(axis = var_1561_axis_0, split_sizes = var_1561_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_1561_cast_fp16")]; tensor var_1565_to_fp16 = const()[name = string("op_1565_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297832256)))]; tensor out_73_cast_fp16 = mul(x = var_1561_cast_fp16_0, y = var_1565_to_fp16)[name = string("out_73_cast_fp16")]; tensor var_1571 = const()[name = string("op_1571"), val = tensor([0, 2, 1])]; tensor var_1573_axes_0 = const()[name = string("op_1573_axes_0"), val = tensor([2])]; tensor var_1572_cast_fp16 = transpose(perm = var_1571, x = out_73_cast_fp16)[name = string("transpose_161")]; tensor var_1573_cast_fp16 = expand_dims(axes = var_1573_axes_0, x = var_1572_cast_fp16)[name = string("op_1573_cast_fp16")]; string var_1580_pad_type_0 = const()[name = string("op_1580_pad_type_0"), val = string("valid")]; tensor var_1580_strides_0 = const()[name = string("op_1580_strides_0"), val = tensor([1, 1])]; tensor var_1580_pad_0 = const()[name = string("op_1580_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1580_dilations_0 = const()[name = string("op_1580_dilations_0"), val = tensor([1, 1])]; int32 var_1580_groups_0 = const()[name = string("op_1580_groups_0"), val = int32(1)]; tensor var_1580 = conv(dilations = var_1580_dilations_0, groups = var_1580_groups_0, pad = var_1580_pad_0, pad_type = var_1580_pad_type_0, strides = var_1580_strides_0, weight = encoder_layers_6_self_attn_q_proj_weight_quantized, x = var_1573_cast_fp16)[name = string("op_1580")]; tensor var_1581 = const()[name = string("op_1581"), val = tensor([1, 3, 256, 128])]; tensor var_1582 = reshape(shape = var_1581, x = var_1580)[name = string("op_1582")]; tensor var_1583 = const()[name = string("op_1583"), val = tensor([0, 1, 3, 2])]; string var_1590_pad_type_0 = const()[name = string("op_1590_pad_type_0"), val = string("valid")]; tensor var_1590_strides_0 = const()[name = string("op_1590_strides_0"), val = tensor([1, 1])]; tensor var_1590_pad_0 = const()[name = string("op_1590_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1590_dilations_0 = const()[name = string("op_1590_dilations_0"), val = tensor([1, 1])]; int32 var_1590_groups_0 = const()[name = string("op_1590_groups_0"), val = int32(1)]; tensor var_1590 = conv(dilations = var_1590_dilations_0, groups = var_1590_groups_0, pad = var_1590_pad_0, pad_type = var_1590_pad_type_0, strides = var_1590_strides_0, weight = encoder_layers_6_self_attn_k_proj_weight_quantized, x = var_1573_cast_fp16)[name = string("op_1590")]; tensor var_1591 = const()[name = string("op_1591"), val = tensor([1, 1, 256, 128])]; tensor var_1592 = reshape(shape = var_1591, x = var_1590)[name = string("op_1592")]; tensor var_1593 = const()[name = string("op_1593"), val = tensor([0, 1, 3, 2])]; string var_1600_pad_type_0 = const()[name = string("op_1600_pad_type_0"), val = string("valid")]; tensor var_1600_strides_0 = const()[name = string("op_1600_strides_0"), val = tensor([1, 1])]; tensor var_1600_pad_0 = const()[name = string("op_1600_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1600_dilations_0 = const()[name = string("op_1600_dilations_0"), val = tensor([1, 1])]; int32 var_1600_groups_0 = const()[name = string("op_1600_groups_0"), val = int32(1)]; tensor var_1600 = conv(dilations = var_1600_dilations_0, groups = var_1600_groups_0, pad = var_1600_pad_0, pad_type = var_1600_pad_type_0, strides = var_1600_strides_0, weight = encoder_layers_6_self_attn_v_proj_weight_quantized, x = var_1573_cast_fp16)[name = string("op_1600")]; tensor var_1601 = const()[name = string("op_1601"), val = tensor([1, 1, 256, 128])]; tensor var_1602 = reshape(shape = var_1601, x = var_1600)[name = string("op_1602")]; tensor var_1603 = const()[name = string("op_1603"), val = tensor([0, 1, 3, 2])]; fp16 const_86_promoted_to_fp16 = const()[name = string("const_86_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_37 = transpose(perm = var_1583, x = var_1582)[name = string("transpose_160")]; tensor var_1609_cast_fp16 = mul(x = q_37, y = const_86_promoted_to_fp16)[name = string("op_1609_cast_fp16")]; bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; tensor input_125_cast_fp16 = concat(axis = var_23, interleave = input_125_interleave_0, values = (q_37, var_1609_cast_fp16))[name = string("input_125_cast_fp16")]; tensor normed_175_axes_0 = const()[name = string("normed_175_axes_0"), val = tensor([-1])]; tensor normed_175_cast_fp16 = layer_norm(axes = normed_175_axes_0, epsilon = var_8_to_fp16, x = input_125_cast_fp16)[name = string("normed_175_cast_fp16")]; tensor var_1614_split_sizes_0 = const()[name = string("op_1614_split_sizes_0"), val = tensor([256, 256])]; int32 var_1614_axis_0 = const()[name = string("op_1614_axis_0"), val = int32(-1)]; tensor var_1614_cast_fp16_0, tensor var_1614_cast_fp16_1 = split(axis = var_1614_axis_0, split_sizes = var_1614_split_sizes_0, x = normed_175_cast_fp16)[name = string("op_1614_cast_fp16")]; tensor var_1618_to_fp16 = const()[name = string("op_1618_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297833856)))]; tensor out_75_cast_fp16 = mul(x = var_1614_cast_fp16_0, y = var_1618_to_fp16)[name = string("out_75_cast_fp16")]; fp16 const_88_promoted_to_fp16 = const()[name = string("const_88_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_37 = transpose(perm = var_1593, x = var_1592)[name = string("transpose_159")]; tensor var_1625_cast_fp16 = mul(x = k_37, y = const_88_promoted_to_fp16)[name = string("op_1625_cast_fp16")]; bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; tensor input_127_cast_fp16 = concat(axis = var_23, interleave = input_127_interleave_0, values = (k_37, var_1625_cast_fp16))[name = string("input_127_cast_fp16")]; tensor normed_179_axes_0 = const()[name = string("normed_179_axes_0"), val = tensor([-1])]; tensor normed_179_cast_fp16 = layer_norm(axes = normed_179_axes_0, epsilon = var_8_to_fp16, x = input_127_cast_fp16)[name = string("normed_179_cast_fp16")]; tensor var_1630_split_sizes_0 = const()[name = string("op_1630_split_sizes_0"), val = tensor([256, 256])]; int32 var_1630_axis_0 = const()[name = string("op_1630_axis_0"), val = int32(-1)]; tensor var_1630_cast_fp16_0, tensor var_1630_cast_fp16_1 = split(axis = var_1630_axis_0, split_sizes = var_1630_split_sizes_0, x = normed_179_cast_fp16)[name = string("op_1630_cast_fp16")]; tensor var_1634_to_fp16 = const()[name = string("op_1634_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297834432)))]; tensor out_77_cast_fp16 = mul(x = var_1630_cast_fp16_0, y = var_1634_to_fp16)[name = string("out_77_cast_fp16")]; tensor var_1637 = mul(x = out_75_cast_fp16, y = cos_1_quantized)[name = string("op_1637")]; tensor var_1638_split_sizes_0 = const()[name = string("op_1638_split_sizes_0"), val = tensor([128, 128])]; int32 var_1638_axis_0 = const()[name = string("op_1638_axis_0"), val = int32(-1)]; tensor var_1638_0, tensor var_1638_1 = split(axis = var_1638_axis_0, split_sizes = var_1638_split_sizes_0, x = out_75_cast_fp16)[name = string("op_1638")]; fp16 const_90_promoted = const()[name = string("const_90_promoted"), val = fp16(-0x1p+0)]; tensor var_1640 = mul(x = var_1638_1, y = const_90_promoted)[name = string("op_1640")]; bool var_1642_interleave_0 = const()[name = string("op_1642_interleave_0"), val = bool(false)]; tensor var_1642 = concat(axis = var_23, interleave = var_1642_interleave_0, values = (var_1640, var_1638_0))[name = string("op_1642")]; tensor var_1643 = mul(x = var_1642, y = sin_1_quantized)[name = string("op_1643")]; tensor q_41 = add(x = var_1637, y = var_1643)[name = string("q_41")]; tensor var_1645 = mul(x = out_77_cast_fp16, y = cos_1_quantized)[name = string("op_1645")]; tensor var_1646_split_sizes_0 = const()[name = string("op_1646_split_sizes_0"), val = tensor([128, 128])]; int32 var_1646_axis_0 = const()[name = string("op_1646_axis_0"), val = int32(-1)]; tensor var_1646_0, tensor var_1646_1 = split(axis = var_1646_axis_0, split_sizes = var_1646_split_sizes_0, x = out_77_cast_fp16)[name = string("op_1646")]; fp16 const_91_promoted = const()[name = string("const_91_promoted"), val = fp16(-0x1p+0)]; tensor var_1648 = mul(x = var_1646_1, y = const_91_promoted)[name = string("op_1648")]; bool var_1650_interleave_0 = const()[name = string("op_1650_interleave_0"), val = bool(false)]; tensor var_1650 = concat(axis = var_23, interleave = var_1650_interleave_0, values = (var_1648, var_1646_0))[name = string("op_1650")]; tensor var_1651 = mul(x = var_1650, y = sin_1_quantized)[name = string("op_1651")]; tensor hidden_states_73 = add(x = var_1645, y = var_1651)[name = string("hidden_states_73")]; tensor hidden_states_75_axes_0 = const()[name = string("hidden_states_75_axes_0"), val = tensor([2])]; tensor hidden_states_75 = expand_dims(axes = hidden_states_75_axes_0, x = hidden_states_73)[name = string("hidden_states_75")]; tensor var_1654 = const()[name = string("op_1654"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_77 = tile(reps = var_1654, x = hidden_states_75)[name = string("hidden_states_77")]; tensor var_1656 = const()[name = string("op_1656"), val = tensor([1, 3, 128, 256])]; tensor k_41 = reshape(shape = var_1656, x = hidden_states_77)[name = string("k_41")]; tensor hidden_states_81_axes_0 = const()[name = string("hidden_states_81_axes_0"), val = tensor([2])]; tensor hidden_states_79 = transpose(perm = var_1603, x = var_1602)[name = string("transpose_158")]; tensor hidden_states_81 = expand_dims(axes = hidden_states_81_axes_0, x = hidden_states_79)[name = string("hidden_states_81")]; tensor var_1659 = const()[name = string("op_1659"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_83 = tile(reps = var_1659, x = hidden_states_81)[name = string("hidden_states_83")]; tensor var_1661 = const()[name = string("op_1661"), val = tensor([1, 3, 128, 256])]; tensor v_13 = reshape(shape = var_1661, x = hidden_states_83)[name = string("v_13")]; bool var_1666_transpose_x_1 = const()[name = string("op_1666_transpose_x_1"), val = bool(false)]; bool var_1666_transpose_y_1 = const()[name = string("op_1666_transpose_y_1"), val = bool(true)]; tensor var_1666_cast_fp16 = matmul(transpose_x = var_1666_transpose_x_1, transpose_y = var_1666_transpose_y_1, x = q_41, y = k_41)[name = string("op_1666_cast_fp16")]; fp16 var_1667_to_fp16 = const()[name = string("op_1667_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_37_cast_fp16 = mul(x = var_1666_cast_fp16, y = var_1667_to_fp16)[name = string("attn_weights_37_cast_fp16")]; tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_39_cast_fp16")]; tensor var_1671_cast_fp16 = softmax(axis = var_23, x = attn_weights_39_cast_fp16)[name = string("op_1671_cast_fp16")]; bool var_1675_transpose_x_0 = const()[name = string("op_1675_transpose_x_0"), val = bool(false)]; bool var_1675_transpose_y_0 = const()[name = string("op_1675_transpose_y_0"), val = bool(false)]; tensor var_1675_cast_fp16 = matmul(transpose_x = var_1675_transpose_x_0, transpose_y = var_1675_transpose_y_0, x = var_1671_cast_fp16, y = v_13)[name = string("op_1675_cast_fp16")]; tensor var_1677 = const()[name = string("op_1677"), val = tensor([0, 2, 1, 3])]; tensor var_1680 = const()[name = string("op_1680"), val = tensor([1, 128, 768])]; tensor var_1678 = transpose(perm = var_1677, x = var_1675_cast_fp16)[name = string("transpose_157")]; tensor attn_out_39 = reshape(shape = var_1680, x = var_1678)[name = string("attn_out_39")]; tensor var_1682 = const()[name = string("op_1682"), val = tensor([0, 2, 1])]; tensor squeeze_6_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297835008))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298424896))))[name = string("squeeze_6_quantized")]; string var_1691_pad_type_0 = const()[name = string("op_1691_pad_type_0"), val = string("valid")]; int32 var_1691_groups_0 = const()[name = string("op_1691_groups_0"), val = int32(1)]; tensor var_1691_strides_0 = const()[name = string("op_1691_strides_0"), val = tensor([1])]; tensor var_1691_pad_0 = const()[name = string("op_1691_pad_0"), val = tensor([0, 0])]; tensor var_1691_dilations_0 = const()[name = string("op_1691_dilations_0"), val = tensor([1])]; tensor var_1683 = transpose(perm = var_1682, x = attn_out_39)[name = string("transpose_156")]; tensor var_1691 = conv(dilations = var_1691_dilations_0, groups = var_1691_groups_0, pad = var_1691_pad_0, pad_type = var_1691_pad_type_0, strides = var_1691_strides_0, weight = squeeze_6_quantized, x = var_1683)[name = string("op_1691")]; tensor var_1692 = const()[name = string("op_1692"), val = tensor([0, 2, 1])]; fp16 const_92_promoted_to_fp16 = const()[name = string("const_92_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_105 = transpose(perm = var_1692, x = var_1691)[name = string("transpose_155")]; tensor var_1696_cast_fp16 = mul(x = x_105, y = const_92_promoted_to_fp16)[name = string("op_1696_cast_fp16")]; bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; tensor input_131_cast_fp16 = concat(axis = var_23, interleave = input_131_interleave_0, values = (x_105, var_1696_cast_fp16))[name = string("input_131_cast_fp16")]; tensor normed_183_axes_0 = const()[name = string("normed_183_axes_0"), val = tensor([-1])]; tensor normed_183_cast_fp16 = layer_norm(axes = normed_183_axes_0, epsilon = var_8_to_fp16, x = input_131_cast_fp16)[name = string("normed_183_cast_fp16")]; tensor var_1701_split_sizes_0 = const()[name = string("op_1701_split_sizes_0"), val = tensor([768, 768])]; int32 var_1701_axis_0 = const()[name = string("op_1701_axis_0"), val = int32(-1)]; tensor var_1701_cast_fp16_0, tensor var_1701_cast_fp16_1 = split(axis = var_1701_axis_0, split_sizes = var_1701_split_sizes_0, x = normed_183_cast_fp16)[name = string("op_1701_cast_fp16")]; tensor var_1705_to_fp16 = const()[name = string("op_1705_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298426496)))]; tensor out_79_cast_fp16 = mul(x = var_1701_cast_fp16_0, y = var_1705_to_fp16)[name = string("out_79_cast_fp16")]; tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = out_79_cast_fp16)[name = string("x_107_cast_fp16")]; fp16 const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1712_cast_fp16 = mul(x = x_107_cast_fp16, y = const_94_promoted_to_fp16)[name = string("op_1712_cast_fp16")]; bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)]; tensor input_133_cast_fp16 = concat(axis = var_23, interleave = input_133_interleave_0, values = (x_107_cast_fp16, var_1712_cast_fp16))[name = string("input_133_cast_fp16")]; tensor normed_187_axes_0 = const()[name = string("normed_187_axes_0"), val = tensor([-1])]; tensor normed_187_cast_fp16 = layer_norm(axes = normed_187_axes_0, epsilon = var_8_to_fp16, x = input_133_cast_fp16)[name = string("normed_187_cast_fp16")]; tensor var_1717_split_sizes_0 = const()[name = string("op_1717_split_sizes_0"), val = tensor([768, 768])]; int32 var_1717_axis_0 = const()[name = string("op_1717_axis_0"), val = int32(-1)]; tensor var_1717_cast_fp16_0, tensor var_1717_cast_fp16_1 = split(axis = var_1717_axis_0, split_sizes = var_1717_split_sizes_0, x = normed_187_cast_fp16)[name = string("op_1717_cast_fp16")]; tensor var_1721_to_fp16 = const()[name = string("op_1721_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298428096)))]; tensor out_81_cast_fp16 = mul(x = var_1717_cast_fp16_0, y = var_1721_to_fp16)[name = string("out_81_cast_fp16")]; tensor var_1728 = const()[name = string("op_1728"), val = tensor([0, 2, 1])]; tensor input_135_axes_0 = const()[name = string("input_135_axes_0"), val = tensor([2])]; tensor var_1729 = transpose(perm = var_1728, x = out_81_cast_fp16)[name = string("transpose_154")]; tensor input_135 = expand_dims(axes = input_135_axes_0, x = var_1729)[name = string("input_135")]; string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; tensor gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor([1, 1])]; tensor gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor([1, 1])]; int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; tensor gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = encoder_layers_6_mlp_gate_proj_weight_quantized, x = input_135)[name = string("gate_25")]; string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; tensor up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor([1, 1])]; tensor up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor([1, 1])]; int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; tensor up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = encoder_layers_6_mlp_up_proj_weight_quantized, x = input_135)[name = string("up_13")]; string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; tensor input_137 = mul(x = gate_27, y = up_13)[name = string("input_137")]; string var_1750_pad_type_0 = const()[name = string("op_1750_pad_type_0"), val = string("valid")]; tensor var_1750_strides_0 = const()[name = string("op_1750_strides_0"), val = tensor([1, 1])]; tensor var_1750_pad_0 = const()[name = string("op_1750_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1750_dilations_0 = const()[name = string("op_1750_dilations_0"), val = tensor([1, 1])]; int32 var_1750_groups_0 = const()[name = string("op_1750_groups_0"), val = int32(1)]; tensor var_1750 = conv(dilations = var_1750_dilations_0, groups = var_1750_groups_0, pad = var_1750_pad_0, pad_type = var_1750_pad_type_0, strides = var_1750_strides_0, weight = encoder_layers_6_mlp_down_proj_weight_quantized, x = input_137)[name = string("op_1750")]; tensor var_1751_axes_0 = const()[name = string("op_1751_axes_0"), val = tensor([2])]; tensor var_1751 = squeeze(axes = var_1751_axes_0, x = var_1750)[name = string("op_1751")]; tensor var_1752 = const()[name = string("op_1752"), val = tensor([0, 2, 1])]; fp16 const_96_promoted_to_fp16 = const()[name = string("const_96_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_111 = transpose(perm = var_1752, x = var_1751)[name = string("transpose_153")]; tensor var_1756_cast_fp16 = mul(x = x_111, y = const_96_promoted_to_fp16)[name = string("op_1756_cast_fp16")]; bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; tensor input_139_cast_fp16 = concat(axis = var_23, interleave = input_139_interleave_0, values = (x_111, var_1756_cast_fp16))[name = string("input_139_cast_fp16")]; tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_8_to_fp16, x = input_139_cast_fp16)[name = string("normed_193_cast_fp16")]; tensor var_1761_split_sizes_0 = const()[name = string("op_1761_split_sizes_0"), val = tensor([768, 768])]; int32 var_1761_axis_0 = const()[name = string("op_1761_axis_0"), val = int32(-1)]; tensor var_1761_cast_fp16_0, tensor var_1761_cast_fp16_1 = split(axis = var_1761_axis_0, split_sizes = var_1761_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_1761_cast_fp16")]; tensor var_1765_to_fp16 = const()[name = string("op_1765_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298429696)))]; tensor out_83_cast_fp16 = mul(x = var_1761_cast_fp16_0, y = var_1765_to_fp16)[name = string("out_83_cast_fp16")]; tensor x_113_cast_fp16 = add(x = x_107_cast_fp16, y = out_83_cast_fp16)[name = string("x_113_cast_fp16")]; fp16 const_98_promoted_to_fp16 = const()[name = string("const_98_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1794_cast_fp16 = mul(x = x_113_cast_fp16, y = const_98_promoted_to_fp16)[name = string("op_1794_cast_fp16")]; bool input_141_interleave_0 = const()[name = string("input_141_interleave_0"), val = bool(false)]; tensor input_141_cast_fp16 = concat(axis = var_23, interleave = input_141_interleave_0, values = (x_113_cast_fp16, var_1794_cast_fp16))[name = string("input_141_cast_fp16")]; tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_8_to_fp16, x = input_141_cast_fp16)[name = string("normed_197_cast_fp16")]; tensor var_1799_split_sizes_0 = const()[name = string("op_1799_split_sizes_0"), val = tensor([768, 768])]; int32 var_1799_axis_0 = const()[name = string("op_1799_axis_0"), val = int32(-1)]; tensor var_1799_cast_fp16_0, tensor var_1799_cast_fp16_1 = split(axis = var_1799_axis_0, split_sizes = var_1799_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_1799_cast_fp16")]; tensor var_1803_to_fp16 = const()[name = string("op_1803_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298431296)))]; tensor out_85_cast_fp16 = mul(x = var_1799_cast_fp16_0, y = var_1803_to_fp16)[name = string("out_85_cast_fp16")]; tensor var_1809 = const()[name = string("op_1809"), val = tensor([0, 2, 1])]; tensor var_1811_axes_0 = const()[name = string("op_1811_axes_0"), val = tensor([2])]; tensor var_1810_cast_fp16 = transpose(perm = var_1809, x = out_85_cast_fp16)[name = string("transpose_152")]; tensor var_1811_cast_fp16 = expand_dims(axes = var_1811_axes_0, x = var_1810_cast_fp16)[name = string("op_1811_cast_fp16")]; string var_1818_pad_type_0 = const()[name = string("op_1818_pad_type_0"), val = string("valid")]; tensor var_1818_strides_0 = const()[name = string("op_1818_strides_0"), val = tensor([1, 1])]; tensor var_1818_pad_0 = const()[name = string("op_1818_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1818_dilations_0 = const()[name = string("op_1818_dilations_0"), val = tensor([1, 1])]; int32 var_1818_groups_0 = const()[name = string("op_1818_groups_0"), val = int32(1)]; tensor var_1818 = conv(dilations = var_1818_dilations_0, groups = var_1818_groups_0, pad = var_1818_pad_0, pad_type = var_1818_pad_type_0, strides = var_1818_strides_0, weight = encoder_layers_7_self_attn_q_proj_weight_quantized, x = var_1811_cast_fp16)[name = string("op_1818")]; tensor var_1819 = const()[name = string("op_1819"), val = tensor([1, 3, 256, 128])]; tensor var_1820 = reshape(shape = var_1819, x = var_1818)[name = string("op_1820")]; tensor var_1821 = const()[name = string("op_1821"), val = tensor([0, 1, 3, 2])]; string var_1828_pad_type_0 = const()[name = string("op_1828_pad_type_0"), val = string("valid")]; tensor var_1828_strides_0 = const()[name = string("op_1828_strides_0"), val = tensor([1, 1])]; tensor var_1828_pad_0 = const()[name = string("op_1828_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1828_dilations_0 = const()[name = string("op_1828_dilations_0"), val = tensor([1, 1])]; int32 var_1828_groups_0 = const()[name = string("op_1828_groups_0"), val = int32(1)]; tensor var_1828 = conv(dilations = var_1828_dilations_0, groups = var_1828_groups_0, pad = var_1828_pad_0, pad_type = var_1828_pad_type_0, strides = var_1828_strides_0, weight = encoder_layers_7_self_attn_k_proj_weight_quantized, x = var_1811_cast_fp16)[name = string("op_1828")]; tensor var_1829 = const()[name = string("op_1829"), val = tensor([1, 1, 256, 128])]; tensor var_1830 = reshape(shape = var_1829, x = var_1828)[name = string("op_1830")]; tensor var_1831 = const()[name = string("op_1831"), val = tensor([0, 1, 3, 2])]; string var_1838_pad_type_0 = const()[name = string("op_1838_pad_type_0"), val = string("valid")]; tensor var_1838_strides_0 = const()[name = string("op_1838_strides_0"), val = tensor([1, 1])]; tensor var_1838_pad_0 = const()[name = string("op_1838_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1838_dilations_0 = const()[name = string("op_1838_dilations_0"), val = tensor([1, 1])]; int32 var_1838_groups_0 = const()[name = string("op_1838_groups_0"), val = int32(1)]; tensor var_1838 = conv(dilations = var_1838_dilations_0, groups = var_1838_groups_0, pad = var_1838_pad_0, pad_type = var_1838_pad_type_0, strides = var_1838_strides_0, weight = encoder_layers_7_self_attn_v_proj_weight_quantized, x = var_1811_cast_fp16)[name = string("op_1838")]; tensor var_1839 = const()[name = string("op_1839"), val = tensor([1, 1, 256, 128])]; tensor var_1840 = reshape(shape = var_1839, x = var_1838)[name = string("op_1840")]; tensor var_1841 = const()[name = string("op_1841"), val = tensor([0, 1, 3, 2])]; fp16 const_100_promoted_to_fp16 = const()[name = string("const_100_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_43 = transpose(perm = var_1821, x = var_1820)[name = string("transpose_151")]; tensor var_1847_cast_fp16 = mul(x = q_43, y = const_100_promoted_to_fp16)[name = string("op_1847_cast_fp16")]; bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; tensor input_145_cast_fp16 = concat(axis = var_23, interleave = input_145_interleave_0, values = (q_43, var_1847_cast_fp16))[name = string("input_145_cast_fp16")]; tensor normed_203_axes_0 = const()[name = string("normed_203_axes_0"), val = tensor([-1])]; tensor normed_203_cast_fp16 = layer_norm(axes = normed_203_axes_0, epsilon = var_8_to_fp16, x = input_145_cast_fp16)[name = string("normed_203_cast_fp16")]; tensor var_1852_split_sizes_0 = const()[name = string("op_1852_split_sizes_0"), val = tensor([256, 256])]; int32 var_1852_axis_0 = const()[name = string("op_1852_axis_0"), val = int32(-1)]; tensor var_1852_cast_fp16_0, tensor var_1852_cast_fp16_1 = split(axis = var_1852_axis_0, split_sizes = var_1852_split_sizes_0, x = normed_203_cast_fp16)[name = string("op_1852_cast_fp16")]; tensor var_1856_to_fp16 = const()[name = string("op_1856_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298432896)))]; tensor out_87_cast_fp16 = mul(x = var_1852_cast_fp16_0, y = var_1856_to_fp16)[name = string("out_87_cast_fp16")]; fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_43 = transpose(perm = var_1831, x = var_1830)[name = string("transpose_150")]; tensor var_1863_cast_fp16 = mul(x = k_43, y = const_102_promoted_to_fp16)[name = string("op_1863_cast_fp16")]; bool input_147_interleave_0 = const()[name = string("input_147_interleave_0"), val = bool(false)]; tensor input_147_cast_fp16 = concat(axis = var_23, interleave = input_147_interleave_0, values = (k_43, var_1863_cast_fp16))[name = string("input_147_cast_fp16")]; tensor normed_207_axes_0 = const()[name = string("normed_207_axes_0"), val = tensor([-1])]; tensor normed_207_cast_fp16 = layer_norm(axes = normed_207_axes_0, epsilon = var_8_to_fp16, x = input_147_cast_fp16)[name = string("normed_207_cast_fp16")]; tensor var_1868_split_sizes_0 = const()[name = string("op_1868_split_sizes_0"), val = tensor([256, 256])]; int32 var_1868_axis_0 = const()[name = string("op_1868_axis_0"), val = int32(-1)]; tensor var_1868_cast_fp16_0, tensor var_1868_cast_fp16_1 = split(axis = var_1868_axis_0, split_sizes = var_1868_split_sizes_0, x = normed_207_cast_fp16)[name = string("op_1868_cast_fp16")]; tensor var_1872_to_fp16 = const()[name = string("op_1872_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298433472)))]; tensor out_89_cast_fp16 = mul(x = var_1868_cast_fp16_0, y = var_1872_to_fp16)[name = string("out_89_cast_fp16")]; tensor var_1875 = mul(x = out_87_cast_fp16, y = cos_1_quantized)[name = string("op_1875")]; tensor var_1876_split_sizes_0 = const()[name = string("op_1876_split_sizes_0"), val = tensor([128, 128])]; int32 var_1876_axis_0 = const()[name = string("op_1876_axis_0"), val = int32(-1)]; tensor var_1876_0, tensor var_1876_1 = split(axis = var_1876_axis_0, split_sizes = var_1876_split_sizes_0, x = out_87_cast_fp16)[name = string("op_1876")]; fp16 const_104_promoted = const()[name = string("const_104_promoted"), val = fp16(-0x1p+0)]; tensor var_1878 = mul(x = var_1876_1, y = const_104_promoted)[name = string("op_1878")]; bool var_1880_interleave_0 = const()[name = string("op_1880_interleave_0"), val = bool(false)]; tensor var_1880 = concat(axis = var_23, interleave = var_1880_interleave_0, values = (var_1878, var_1876_0))[name = string("op_1880")]; tensor var_1881 = mul(x = var_1880, y = sin_1_quantized)[name = string("op_1881")]; tensor q_47 = add(x = var_1875, y = var_1881)[name = string("q_47")]; tensor var_1883 = mul(x = out_89_cast_fp16, y = cos_1_quantized)[name = string("op_1883")]; tensor var_1884_split_sizes_0 = const()[name = string("op_1884_split_sizes_0"), val = tensor([128, 128])]; int32 var_1884_axis_0 = const()[name = string("op_1884_axis_0"), val = int32(-1)]; tensor var_1884_0, tensor var_1884_1 = split(axis = var_1884_axis_0, split_sizes = var_1884_split_sizes_0, x = out_89_cast_fp16)[name = string("op_1884")]; fp16 const_105_promoted = const()[name = string("const_105_promoted"), val = fp16(-0x1p+0)]; tensor var_1886 = mul(x = var_1884_1, y = const_105_promoted)[name = string("op_1886")]; bool var_1888_interleave_0 = const()[name = string("op_1888_interleave_0"), val = bool(false)]; tensor var_1888 = concat(axis = var_23, interleave = var_1888_interleave_0, values = (var_1886, var_1884_0))[name = string("op_1888")]; tensor var_1889 = mul(x = var_1888, y = sin_1_quantized)[name = string("op_1889")]; tensor hidden_states_85 = add(x = var_1883, y = var_1889)[name = string("hidden_states_85")]; tensor hidden_states_87_axes_0 = const()[name = string("hidden_states_87_axes_0"), val = tensor([2])]; tensor hidden_states_87 = expand_dims(axes = hidden_states_87_axes_0, x = hidden_states_85)[name = string("hidden_states_87")]; tensor var_1892 = const()[name = string("op_1892"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_89 = tile(reps = var_1892, x = hidden_states_87)[name = string("hidden_states_89")]; tensor var_1894 = const()[name = string("op_1894"), val = tensor([1, 3, 128, 256])]; tensor k_47 = reshape(shape = var_1894, x = hidden_states_89)[name = string("k_47")]; tensor hidden_states_93_axes_0 = const()[name = string("hidden_states_93_axes_0"), val = tensor([2])]; tensor hidden_states_91 = transpose(perm = var_1841, x = var_1840)[name = string("transpose_149")]; tensor hidden_states_93 = expand_dims(axes = hidden_states_93_axes_0, x = hidden_states_91)[name = string("hidden_states_93")]; tensor var_1897 = const()[name = string("op_1897"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_95 = tile(reps = var_1897, x = hidden_states_93)[name = string("hidden_states_95")]; tensor var_1899 = const()[name = string("op_1899"), val = tensor([1, 3, 128, 256])]; tensor v_15 = reshape(shape = var_1899, x = hidden_states_95)[name = string("v_15")]; bool var_1904_transpose_x_1 = const()[name = string("op_1904_transpose_x_1"), val = bool(false)]; bool var_1904_transpose_y_1 = const()[name = string("op_1904_transpose_y_1"), val = bool(true)]; tensor var_1904_cast_fp16 = matmul(transpose_x = var_1904_transpose_x_1, transpose_y = var_1904_transpose_y_1, x = q_47, y = k_47)[name = string("op_1904_cast_fp16")]; fp16 var_1905_to_fp16 = const()[name = string("op_1905_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_43_cast_fp16 = mul(x = var_1904_cast_fp16, y = var_1905_to_fp16)[name = string("attn_weights_43_cast_fp16")]; tensor attn_weights_45_cast_fp16 = add(x = attn_weights_43_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; tensor var_1909_cast_fp16 = softmax(axis = var_23, x = attn_weights_45_cast_fp16)[name = string("op_1909_cast_fp16")]; bool var_1913_transpose_x_0 = const()[name = string("op_1913_transpose_x_0"), val = bool(false)]; bool var_1913_transpose_y_0 = const()[name = string("op_1913_transpose_y_0"), val = bool(false)]; tensor var_1913_cast_fp16 = matmul(transpose_x = var_1913_transpose_x_0, transpose_y = var_1913_transpose_y_0, x = var_1909_cast_fp16, y = v_15)[name = string("op_1913_cast_fp16")]; tensor var_1915 = const()[name = string("op_1915"), val = tensor([0, 2, 1, 3])]; tensor var_1918 = const()[name = string("op_1918"), val = tensor([1, 128, 768])]; tensor var_1916 = transpose(perm = var_1915, x = var_1913_cast_fp16)[name = string("transpose_148")]; tensor attn_out_45 = reshape(shape = var_1918, x = var_1916)[name = string("attn_out_45")]; tensor var_1920 = const()[name = string("op_1920"), val = tensor([0, 2, 1])]; tensor squeeze_7_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298434048))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299023936))))[name = string("squeeze_7_quantized")]; string var_1929_pad_type_0 = const()[name = string("op_1929_pad_type_0"), val = string("valid")]; int32 var_1929_groups_0 = const()[name = string("op_1929_groups_0"), val = int32(1)]; tensor var_1929_strides_0 = const()[name = string("op_1929_strides_0"), val = tensor([1])]; tensor var_1929_pad_0 = const()[name = string("op_1929_pad_0"), val = tensor([0, 0])]; tensor var_1929_dilations_0 = const()[name = string("op_1929_dilations_0"), val = tensor([1])]; tensor var_1921 = transpose(perm = var_1920, x = attn_out_45)[name = string("transpose_147")]; tensor var_1929 = conv(dilations = var_1929_dilations_0, groups = var_1929_groups_0, pad = var_1929_pad_0, pad_type = var_1929_pad_type_0, strides = var_1929_strides_0, weight = squeeze_7_quantized, x = var_1921)[name = string("op_1929")]; tensor var_1930 = const()[name = string("op_1930"), val = tensor([0, 2, 1])]; fp16 const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_121 = transpose(perm = var_1930, x = var_1929)[name = string("transpose_146")]; tensor var_1934_cast_fp16 = mul(x = x_121, y = const_106_promoted_to_fp16)[name = string("op_1934_cast_fp16")]; bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; tensor input_151_cast_fp16 = concat(axis = var_23, interleave = input_151_interleave_0, values = (x_121, var_1934_cast_fp16))[name = string("input_151_cast_fp16")]; tensor normed_211_axes_0 = const()[name = string("normed_211_axes_0"), val = tensor([-1])]; tensor normed_211_cast_fp16 = layer_norm(axes = normed_211_axes_0, epsilon = var_8_to_fp16, x = input_151_cast_fp16)[name = string("normed_211_cast_fp16")]; tensor var_1939_split_sizes_0 = const()[name = string("op_1939_split_sizes_0"), val = tensor([768, 768])]; int32 var_1939_axis_0 = const()[name = string("op_1939_axis_0"), val = int32(-1)]; tensor var_1939_cast_fp16_0, tensor var_1939_cast_fp16_1 = split(axis = var_1939_axis_0, split_sizes = var_1939_split_sizes_0, x = normed_211_cast_fp16)[name = string("op_1939_cast_fp16")]; tensor var_1943_to_fp16 = const()[name = string("op_1943_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299025536)))]; tensor out_91_cast_fp16 = mul(x = var_1939_cast_fp16_0, y = var_1943_to_fp16)[name = string("out_91_cast_fp16")]; tensor x_123_cast_fp16 = add(x = x_113_cast_fp16, y = out_91_cast_fp16)[name = string("x_123_cast_fp16")]; fp16 const_108_promoted_to_fp16 = const()[name = string("const_108_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1950_cast_fp16 = mul(x = x_123_cast_fp16, y = const_108_promoted_to_fp16)[name = string("op_1950_cast_fp16")]; bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; tensor input_153_cast_fp16 = concat(axis = var_23, interleave = input_153_interleave_0, values = (x_123_cast_fp16, var_1950_cast_fp16))[name = string("input_153_cast_fp16")]; tensor normed_215_axes_0 = const()[name = string("normed_215_axes_0"), val = tensor([-1])]; tensor normed_215_cast_fp16 = layer_norm(axes = normed_215_axes_0, epsilon = var_8_to_fp16, x = input_153_cast_fp16)[name = string("normed_215_cast_fp16")]; tensor var_1955_split_sizes_0 = const()[name = string("op_1955_split_sizes_0"), val = tensor([768, 768])]; int32 var_1955_axis_0 = const()[name = string("op_1955_axis_0"), val = int32(-1)]; tensor var_1955_cast_fp16_0, tensor var_1955_cast_fp16_1 = split(axis = var_1955_axis_0, split_sizes = var_1955_split_sizes_0, x = normed_215_cast_fp16)[name = string("op_1955_cast_fp16")]; tensor var_1959_to_fp16 = const()[name = string("op_1959_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299027136)))]; tensor out_93_cast_fp16 = mul(x = var_1955_cast_fp16_0, y = var_1959_to_fp16)[name = string("out_93_cast_fp16")]; tensor var_1966 = const()[name = string("op_1966"), val = tensor([0, 2, 1])]; tensor input_155_axes_0 = const()[name = string("input_155_axes_0"), val = tensor([2])]; tensor var_1967 = transpose(perm = var_1966, x = out_93_cast_fp16)[name = string("transpose_145")]; tensor input_155 = expand_dims(axes = input_155_axes_0, x = var_1967)[name = string("input_155")]; string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; tensor gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor([1, 1])]; tensor gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor([1, 1])]; int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; tensor gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = encoder_layers_7_mlp_gate_proj_weight_quantized, x = input_155)[name = string("gate_29")]; string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; tensor up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor([1, 1])]; tensor up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor([1, 1])]; int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; tensor up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = encoder_layers_7_mlp_up_proj_weight_quantized, x = input_155)[name = string("up_15")]; string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; tensor input_157 = mul(x = gate_31, y = up_15)[name = string("input_157")]; string var_1988_pad_type_0 = const()[name = string("op_1988_pad_type_0"), val = string("valid")]; tensor var_1988_strides_0 = const()[name = string("op_1988_strides_0"), val = tensor([1, 1])]; tensor var_1988_pad_0 = const()[name = string("op_1988_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1988_dilations_0 = const()[name = string("op_1988_dilations_0"), val = tensor([1, 1])]; int32 var_1988_groups_0 = const()[name = string("op_1988_groups_0"), val = int32(1)]; tensor var_1988 = conv(dilations = var_1988_dilations_0, groups = var_1988_groups_0, pad = var_1988_pad_0, pad_type = var_1988_pad_type_0, strides = var_1988_strides_0, weight = encoder_layers_7_mlp_down_proj_weight_quantized, x = input_157)[name = string("op_1988")]; tensor var_1989_axes_0 = const()[name = string("op_1989_axes_0"), val = tensor([2])]; tensor var_1989 = squeeze(axes = var_1989_axes_0, x = var_1988)[name = string("op_1989")]; tensor var_1990 = const()[name = string("op_1990"), val = tensor([0, 2, 1])]; fp16 const_110_promoted_to_fp16 = const()[name = string("const_110_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_127 = transpose(perm = var_1990, x = var_1989)[name = string("transpose_144")]; tensor var_1994_cast_fp16 = mul(x = x_127, y = const_110_promoted_to_fp16)[name = string("op_1994_cast_fp16")]; bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)]; tensor input_159_cast_fp16 = concat(axis = var_23, interleave = input_159_interleave_0, values = (x_127, var_1994_cast_fp16))[name = string("input_159_cast_fp16")]; tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_8_to_fp16, x = input_159_cast_fp16)[name = string("normed_221_cast_fp16")]; tensor var_1999_split_sizes_0 = const()[name = string("op_1999_split_sizes_0"), val = tensor([768, 768])]; int32 var_1999_axis_0 = const()[name = string("op_1999_axis_0"), val = int32(-1)]; tensor var_1999_cast_fp16_0, tensor var_1999_cast_fp16_1 = split(axis = var_1999_axis_0, split_sizes = var_1999_split_sizes_0, x = normed_221_cast_fp16)[name = string("op_1999_cast_fp16")]; tensor var_2003_to_fp16 = const()[name = string("op_2003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299028736)))]; tensor out_95_cast_fp16 = mul(x = var_1999_cast_fp16_0, y = var_2003_to_fp16)[name = string("out_95_cast_fp16")]; tensor x_129_cast_fp16 = add(x = x_123_cast_fp16, y = out_95_cast_fp16)[name = string("x_129_cast_fp16")]; fp16 const_112_promoted_to_fp16 = const()[name = string("const_112_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2032_cast_fp16 = mul(x = x_129_cast_fp16, y = const_112_promoted_to_fp16)[name = string("op_2032_cast_fp16")]; bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; tensor input_161_cast_fp16 = concat(axis = var_23, interleave = input_161_interleave_0, values = (x_129_cast_fp16, var_2032_cast_fp16))[name = string("input_161_cast_fp16")]; tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_8_to_fp16, x = input_161_cast_fp16)[name = string("normed_225_cast_fp16")]; tensor var_2037_split_sizes_0 = const()[name = string("op_2037_split_sizes_0"), val = tensor([768, 768])]; int32 var_2037_axis_0 = const()[name = string("op_2037_axis_0"), val = int32(-1)]; tensor var_2037_cast_fp16_0, tensor var_2037_cast_fp16_1 = split(axis = var_2037_axis_0, split_sizes = var_2037_split_sizes_0, x = normed_225_cast_fp16)[name = string("op_2037_cast_fp16")]; tensor var_2041_to_fp16 = const()[name = string("op_2041_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299030336)))]; tensor out_97_cast_fp16 = mul(x = var_2037_cast_fp16_0, y = var_2041_to_fp16)[name = string("out_97_cast_fp16")]; tensor var_2047 = const()[name = string("op_2047"), val = tensor([0, 2, 1])]; tensor var_2049_axes_0 = const()[name = string("op_2049_axes_0"), val = tensor([2])]; tensor var_2048_cast_fp16 = transpose(perm = var_2047, x = out_97_cast_fp16)[name = string("transpose_143")]; tensor var_2049_cast_fp16 = expand_dims(axes = var_2049_axes_0, x = var_2048_cast_fp16)[name = string("op_2049_cast_fp16")]; string var_2056_pad_type_0 = const()[name = string("op_2056_pad_type_0"), val = string("valid")]; tensor var_2056_strides_0 = const()[name = string("op_2056_strides_0"), val = tensor([1, 1])]; tensor var_2056_pad_0 = const()[name = string("op_2056_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2056_dilations_0 = const()[name = string("op_2056_dilations_0"), val = tensor([1, 1])]; int32 var_2056_groups_0 = const()[name = string("op_2056_groups_0"), val = int32(1)]; tensor var_2056 = conv(dilations = var_2056_dilations_0, groups = var_2056_groups_0, pad = var_2056_pad_0, pad_type = var_2056_pad_type_0, strides = var_2056_strides_0, weight = encoder_layers_8_self_attn_q_proj_weight_quantized, x = var_2049_cast_fp16)[name = string("op_2056")]; tensor var_2057 = const()[name = string("op_2057"), val = tensor([1, 3, 256, 128])]; tensor var_2058 = reshape(shape = var_2057, x = var_2056)[name = string("op_2058")]; tensor var_2059 = const()[name = string("op_2059"), val = tensor([0, 1, 3, 2])]; string var_2066_pad_type_0 = const()[name = string("op_2066_pad_type_0"), val = string("valid")]; tensor var_2066_strides_0 = const()[name = string("op_2066_strides_0"), val = tensor([1, 1])]; tensor var_2066_pad_0 = const()[name = string("op_2066_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2066_dilations_0 = const()[name = string("op_2066_dilations_0"), val = tensor([1, 1])]; int32 var_2066_groups_0 = const()[name = string("op_2066_groups_0"), val = int32(1)]; tensor var_2066 = conv(dilations = var_2066_dilations_0, groups = var_2066_groups_0, pad = var_2066_pad_0, pad_type = var_2066_pad_type_0, strides = var_2066_strides_0, weight = encoder_layers_8_self_attn_k_proj_weight_quantized, x = var_2049_cast_fp16)[name = string("op_2066")]; tensor var_2067 = const()[name = string("op_2067"), val = tensor([1, 1, 256, 128])]; tensor var_2068 = reshape(shape = var_2067, x = var_2066)[name = string("op_2068")]; tensor var_2069 = const()[name = string("op_2069"), val = tensor([0, 1, 3, 2])]; string var_2076_pad_type_0 = const()[name = string("op_2076_pad_type_0"), val = string("valid")]; tensor var_2076_strides_0 = const()[name = string("op_2076_strides_0"), val = tensor([1, 1])]; tensor var_2076_pad_0 = const()[name = string("op_2076_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2076_dilations_0 = const()[name = string("op_2076_dilations_0"), val = tensor([1, 1])]; int32 var_2076_groups_0 = const()[name = string("op_2076_groups_0"), val = int32(1)]; tensor var_2076 = conv(dilations = var_2076_dilations_0, groups = var_2076_groups_0, pad = var_2076_pad_0, pad_type = var_2076_pad_type_0, strides = var_2076_strides_0, weight = encoder_layers_8_self_attn_v_proj_weight_quantized, x = var_2049_cast_fp16)[name = string("op_2076")]; tensor var_2077 = const()[name = string("op_2077"), val = tensor([1, 1, 256, 128])]; tensor var_2078 = reshape(shape = var_2077, x = var_2076)[name = string("op_2078")]; tensor var_2079 = const()[name = string("op_2079"), val = tensor([0, 1, 3, 2])]; fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_49 = transpose(perm = var_2059, x = var_2058)[name = string("transpose_142")]; tensor var_2085_cast_fp16 = mul(x = q_49, y = const_114_promoted_to_fp16)[name = string("op_2085_cast_fp16")]; bool input_165_interleave_0 = const()[name = string("input_165_interleave_0"), val = bool(false)]; tensor input_165_cast_fp16 = concat(axis = var_23, interleave = input_165_interleave_0, values = (q_49, var_2085_cast_fp16))[name = string("input_165_cast_fp16")]; tensor normed_231_axes_0 = const()[name = string("normed_231_axes_0"), val = tensor([-1])]; tensor normed_231_cast_fp16 = layer_norm(axes = normed_231_axes_0, epsilon = var_8_to_fp16, x = input_165_cast_fp16)[name = string("normed_231_cast_fp16")]; tensor var_2090_split_sizes_0 = const()[name = string("op_2090_split_sizes_0"), val = tensor([256, 256])]; int32 var_2090_axis_0 = const()[name = string("op_2090_axis_0"), val = int32(-1)]; tensor var_2090_cast_fp16_0, tensor var_2090_cast_fp16_1 = split(axis = var_2090_axis_0, split_sizes = var_2090_split_sizes_0, x = normed_231_cast_fp16)[name = string("op_2090_cast_fp16")]; tensor var_2094_to_fp16 = const()[name = string("op_2094_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299031936)))]; tensor out_99_cast_fp16 = mul(x = var_2090_cast_fp16_0, y = var_2094_to_fp16)[name = string("out_99_cast_fp16")]; fp16 const_116_promoted_to_fp16 = const()[name = string("const_116_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_49 = transpose(perm = var_2069, x = var_2068)[name = string("transpose_141")]; tensor var_2101_cast_fp16 = mul(x = k_49, y = const_116_promoted_to_fp16)[name = string("op_2101_cast_fp16")]; bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; tensor input_167_cast_fp16 = concat(axis = var_23, interleave = input_167_interleave_0, values = (k_49, var_2101_cast_fp16))[name = string("input_167_cast_fp16")]; tensor normed_235_axes_0 = const()[name = string("normed_235_axes_0"), val = tensor([-1])]; tensor normed_235_cast_fp16 = layer_norm(axes = normed_235_axes_0, epsilon = var_8_to_fp16, x = input_167_cast_fp16)[name = string("normed_235_cast_fp16")]; tensor var_2106_split_sizes_0 = const()[name = string("op_2106_split_sizes_0"), val = tensor([256, 256])]; int32 var_2106_axis_0 = const()[name = string("op_2106_axis_0"), val = int32(-1)]; tensor var_2106_cast_fp16_0, tensor var_2106_cast_fp16_1 = split(axis = var_2106_axis_0, split_sizes = var_2106_split_sizes_0, x = normed_235_cast_fp16)[name = string("op_2106_cast_fp16")]; tensor var_2110_to_fp16 = const()[name = string("op_2110_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299032512)))]; tensor out_101_cast_fp16 = mul(x = var_2106_cast_fp16_0, y = var_2110_to_fp16)[name = string("out_101_cast_fp16")]; tensor var_2113 = mul(x = out_99_cast_fp16, y = cos_1_quantized)[name = string("op_2113")]; tensor var_2114_split_sizes_0 = const()[name = string("op_2114_split_sizes_0"), val = tensor([128, 128])]; int32 var_2114_axis_0 = const()[name = string("op_2114_axis_0"), val = int32(-1)]; tensor var_2114_0, tensor var_2114_1 = split(axis = var_2114_axis_0, split_sizes = var_2114_split_sizes_0, x = out_99_cast_fp16)[name = string("op_2114")]; fp16 const_118_promoted = const()[name = string("const_118_promoted"), val = fp16(-0x1p+0)]; tensor var_2116 = mul(x = var_2114_1, y = const_118_promoted)[name = string("op_2116")]; bool var_2118_interleave_0 = const()[name = string("op_2118_interleave_0"), val = bool(false)]; tensor var_2118 = concat(axis = var_23, interleave = var_2118_interleave_0, values = (var_2116, var_2114_0))[name = string("op_2118")]; tensor var_2119 = mul(x = var_2118, y = sin_1_quantized)[name = string("op_2119")]; tensor q_53 = add(x = var_2113, y = var_2119)[name = string("q_53")]; tensor var_2121 = mul(x = out_101_cast_fp16, y = cos_1_quantized)[name = string("op_2121")]; tensor var_2122_split_sizes_0 = const()[name = string("op_2122_split_sizes_0"), val = tensor([128, 128])]; int32 var_2122_axis_0 = const()[name = string("op_2122_axis_0"), val = int32(-1)]; tensor var_2122_0, tensor var_2122_1 = split(axis = var_2122_axis_0, split_sizes = var_2122_split_sizes_0, x = out_101_cast_fp16)[name = string("op_2122")]; fp16 const_119_promoted = const()[name = string("const_119_promoted"), val = fp16(-0x1p+0)]; tensor var_2124 = mul(x = var_2122_1, y = const_119_promoted)[name = string("op_2124")]; bool var_2126_interleave_0 = const()[name = string("op_2126_interleave_0"), val = bool(false)]; tensor var_2126 = concat(axis = var_23, interleave = var_2126_interleave_0, values = (var_2124, var_2122_0))[name = string("op_2126")]; tensor var_2127 = mul(x = var_2126, y = sin_1_quantized)[name = string("op_2127")]; tensor hidden_states_97 = add(x = var_2121, y = var_2127)[name = string("hidden_states_97")]; tensor hidden_states_99_axes_0 = const()[name = string("hidden_states_99_axes_0"), val = tensor([2])]; tensor hidden_states_99 = expand_dims(axes = hidden_states_99_axes_0, x = hidden_states_97)[name = string("hidden_states_99")]; tensor var_2130 = const()[name = string("op_2130"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_101 = tile(reps = var_2130, x = hidden_states_99)[name = string("hidden_states_101")]; tensor var_2132 = const()[name = string("op_2132"), val = tensor([1, 3, 128, 256])]; tensor k_53 = reshape(shape = var_2132, x = hidden_states_101)[name = string("k_53")]; tensor hidden_states_105_axes_0 = const()[name = string("hidden_states_105_axes_0"), val = tensor([2])]; tensor hidden_states_103 = transpose(perm = var_2079, x = var_2078)[name = string("transpose_140")]; tensor hidden_states_105 = expand_dims(axes = hidden_states_105_axes_0, x = hidden_states_103)[name = string("hidden_states_105")]; tensor var_2135 = const()[name = string("op_2135"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_107 = tile(reps = var_2135, x = hidden_states_105)[name = string("hidden_states_107")]; tensor var_2137 = const()[name = string("op_2137"), val = tensor([1, 3, 128, 256])]; tensor v_17 = reshape(shape = var_2137, x = hidden_states_107)[name = string("v_17")]; bool var_2142_transpose_x_1 = const()[name = string("op_2142_transpose_x_1"), val = bool(false)]; bool var_2142_transpose_y_1 = const()[name = string("op_2142_transpose_y_1"), val = bool(true)]; tensor var_2142_cast_fp16 = matmul(transpose_x = var_2142_transpose_x_1, transpose_y = var_2142_transpose_y_1, x = q_53, y = k_53)[name = string("op_2142_cast_fp16")]; fp16 var_2143_to_fp16 = const()[name = string("op_2143_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_49_cast_fp16 = mul(x = var_2142_cast_fp16, y = var_2143_to_fp16)[name = string("attn_weights_49_cast_fp16")]; tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_51_cast_fp16")]; tensor var_2147_cast_fp16 = softmax(axis = var_23, x = attn_weights_51_cast_fp16)[name = string("op_2147_cast_fp16")]; bool var_2151_transpose_x_0 = const()[name = string("op_2151_transpose_x_0"), val = bool(false)]; bool var_2151_transpose_y_0 = const()[name = string("op_2151_transpose_y_0"), val = bool(false)]; tensor var_2151_cast_fp16 = matmul(transpose_x = var_2151_transpose_x_0, transpose_y = var_2151_transpose_y_0, x = var_2147_cast_fp16, y = v_17)[name = string("op_2151_cast_fp16")]; tensor var_2153 = const()[name = string("op_2153"), val = tensor([0, 2, 1, 3])]; tensor var_2156 = const()[name = string("op_2156"), val = tensor([1, 128, 768])]; tensor var_2154 = transpose(perm = var_2153, x = var_2151_cast_fp16)[name = string("transpose_139")]; tensor attn_out_51 = reshape(shape = var_2156, x = var_2154)[name = string("attn_out_51")]; tensor var_2158 = const()[name = string("op_2158"), val = tensor([0, 2, 1])]; tensor squeeze_8_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299033088))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299622976))))[name = string("squeeze_8_quantized")]; string var_2167_pad_type_0 = const()[name = string("op_2167_pad_type_0"), val = string("valid")]; int32 var_2167_groups_0 = const()[name = string("op_2167_groups_0"), val = int32(1)]; tensor var_2167_strides_0 = const()[name = string("op_2167_strides_0"), val = tensor([1])]; tensor var_2167_pad_0 = const()[name = string("op_2167_pad_0"), val = tensor([0, 0])]; tensor var_2167_dilations_0 = const()[name = string("op_2167_dilations_0"), val = tensor([1])]; tensor var_2159 = transpose(perm = var_2158, x = attn_out_51)[name = string("transpose_138")]; tensor var_2167 = conv(dilations = var_2167_dilations_0, groups = var_2167_groups_0, pad = var_2167_pad_0, pad_type = var_2167_pad_type_0, strides = var_2167_strides_0, weight = squeeze_8_quantized, x = var_2159)[name = string("op_2167")]; tensor var_2168 = const()[name = string("op_2168"), val = tensor([0, 2, 1])]; fp16 const_120_promoted_to_fp16 = const()[name = string("const_120_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_137 = transpose(perm = var_2168, x = var_2167)[name = string("transpose_137")]; tensor var_2172_cast_fp16 = mul(x = x_137, y = const_120_promoted_to_fp16)[name = string("op_2172_cast_fp16")]; bool input_171_interleave_0 = const()[name = string("input_171_interleave_0"), val = bool(false)]; tensor input_171_cast_fp16 = concat(axis = var_23, interleave = input_171_interleave_0, values = (x_137, var_2172_cast_fp16))[name = string("input_171_cast_fp16")]; tensor normed_239_axes_0 = const()[name = string("normed_239_axes_0"), val = tensor([-1])]; tensor normed_239_cast_fp16 = layer_norm(axes = normed_239_axes_0, epsilon = var_8_to_fp16, x = input_171_cast_fp16)[name = string("normed_239_cast_fp16")]; tensor var_2177_split_sizes_0 = const()[name = string("op_2177_split_sizes_0"), val = tensor([768, 768])]; int32 var_2177_axis_0 = const()[name = string("op_2177_axis_0"), val = int32(-1)]; tensor var_2177_cast_fp16_0, tensor var_2177_cast_fp16_1 = split(axis = var_2177_axis_0, split_sizes = var_2177_split_sizes_0, x = normed_239_cast_fp16)[name = string("op_2177_cast_fp16")]; tensor var_2181_to_fp16 = const()[name = string("op_2181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299624576)))]; tensor out_103_cast_fp16 = mul(x = var_2177_cast_fp16_0, y = var_2181_to_fp16)[name = string("out_103_cast_fp16")]; tensor x_139_cast_fp16 = add(x = x_129_cast_fp16, y = out_103_cast_fp16)[name = string("x_139_cast_fp16")]; fp16 const_122_promoted_to_fp16 = const()[name = string("const_122_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2188_cast_fp16 = mul(x = x_139_cast_fp16, y = const_122_promoted_to_fp16)[name = string("op_2188_cast_fp16")]; bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; tensor input_173_cast_fp16 = concat(axis = var_23, interleave = input_173_interleave_0, values = (x_139_cast_fp16, var_2188_cast_fp16))[name = string("input_173_cast_fp16")]; tensor normed_243_axes_0 = const()[name = string("normed_243_axes_0"), val = tensor([-1])]; tensor normed_243_cast_fp16 = layer_norm(axes = normed_243_axes_0, epsilon = var_8_to_fp16, x = input_173_cast_fp16)[name = string("normed_243_cast_fp16")]; tensor var_2193_split_sizes_0 = const()[name = string("op_2193_split_sizes_0"), val = tensor([768, 768])]; int32 var_2193_axis_0 = const()[name = string("op_2193_axis_0"), val = int32(-1)]; tensor var_2193_cast_fp16_0, tensor var_2193_cast_fp16_1 = split(axis = var_2193_axis_0, split_sizes = var_2193_split_sizes_0, x = normed_243_cast_fp16)[name = string("op_2193_cast_fp16")]; tensor var_2197_to_fp16 = const()[name = string("op_2197_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299626176)))]; tensor out_105_cast_fp16 = mul(x = var_2193_cast_fp16_0, y = var_2197_to_fp16)[name = string("out_105_cast_fp16")]; tensor var_2204 = const()[name = string("op_2204"), val = tensor([0, 2, 1])]; tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; tensor var_2205 = transpose(perm = var_2204, x = out_105_cast_fp16)[name = string("transpose_136")]; tensor input_175 = expand_dims(axes = input_175_axes_0, x = var_2205)[name = string("input_175")]; string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; tensor gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor([1, 1])]; tensor gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor([1, 1])]; int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; tensor gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = encoder_layers_8_mlp_gate_proj_weight_quantized, x = input_175)[name = string("gate_33")]; string up_17_pad_type_0 = const()[name = string("up_17_pad_type_0"), val = string("valid")]; tensor up_17_strides_0 = const()[name = string("up_17_strides_0"), val = tensor([1, 1])]; tensor up_17_pad_0 = const()[name = string("up_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_17_dilations_0 = const()[name = string("up_17_dilations_0"), val = tensor([1, 1])]; int32 up_17_groups_0 = const()[name = string("up_17_groups_0"), val = int32(1)]; tensor up_17 = conv(dilations = up_17_dilations_0, groups = up_17_groups_0, pad = up_17_pad_0, pad_type = up_17_pad_type_0, strides = up_17_strides_0, weight = encoder_layers_8_mlp_up_proj_weight_quantized, x = input_175)[name = string("up_17")]; string gate_35_mode_0 = const()[name = string("gate_35_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_35 = gelu(mode = gate_35_mode_0, x = gate_33)[name = string("gate_35")]; tensor input_177 = mul(x = gate_35, y = up_17)[name = string("input_177")]; string var_2226_pad_type_0 = const()[name = string("op_2226_pad_type_0"), val = string("valid")]; tensor var_2226_strides_0 = const()[name = string("op_2226_strides_0"), val = tensor([1, 1])]; tensor var_2226_pad_0 = const()[name = string("op_2226_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2226_dilations_0 = const()[name = string("op_2226_dilations_0"), val = tensor([1, 1])]; int32 var_2226_groups_0 = const()[name = string("op_2226_groups_0"), val = int32(1)]; tensor var_2226 = conv(dilations = var_2226_dilations_0, groups = var_2226_groups_0, pad = var_2226_pad_0, pad_type = var_2226_pad_type_0, strides = var_2226_strides_0, weight = encoder_layers_8_mlp_down_proj_weight_quantized, x = input_177)[name = string("op_2226")]; tensor var_2227_axes_0 = const()[name = string("op_2227_axes_0"), val = tensor([2])]; tensor var_2227 = squeeze(axes = var_2227_axes_0, x = var_2226)[name = string("op_2227")]; tensor var_2228 = const()[name = string("op_2228"), val = tensor([0, 2, 1])]; fp16 const_124_promoted_to_fp16 = const()[name = string("const_124_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_143 = transpose(perm = var_2228, x = var_2227)[name = string("transpose_135")]; tensor var_2232_cast_fp16 = mul(x = x_143, y = const_124_promoted_to_fp16)[name = string("op_2232_cast_fp16")]; bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; tensor input_179_cast_fp16 = concat(axis = var_23, interleave = input_179_interleave_0, values = (x_143, var_2232_cast_fp16))[name = string("input_179_cast_fp16")]; tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_8_to_fp16, x = input_179_cast_fp16)[name = string("normed_249_cast_fp16")]; tensor var_2237_split_sizes_0 = const()[name = string("op_2237_split_sizes_0"), val = tensor([768, 768])]; int32 var_2237_axis_0 = const()[name = string("op_2237_axis_0"), val = int32(-1)]; tensor var_2237_cast_fp16_0, tensor var_2237_cast_fp16_1 = split(axis = var_2237_axis_0, split_sizes = var_2237_split_sizes_0, x = normed_249_cast_fp16)[name = string("op_2237_cast_fp16")]; tensor var_2241_to_fp16 = const()[name = string("op_2241_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299627776)))]; tensor out_107_cast_fp16 = mul(x = var_2237_cast_fp16_0, y = var_2241_to_fp16)[name = string("out_107_cast_fp16")]; tensor x_145_cast_fp16 = add(x = x_139_cast_fp16, y = out_107_cast_fp16)[name = string("x_145_cast_fp16")]; fp16 const_126_promoted_to_fp16 = const()[name = string("const_126_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2270_cast_fp16 = mul(x = x_145_cast_fp16, y = const_126_promoted_to_fp16)[name = string("op_2270_cast_fp16")]; bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; tensor input_181_cast_fp16 = concat(axis = var_23, interleave = input_181_interleave_0, values = (x_145_cast_fp16, var_2270_cast_fp16))[name = string("input_181_cast_fp16")]; tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_8_to_fp16, x = input_181_cast_fp16)[name = string("normed_253_cast_fp16")]; tensor var_2275_split_sizes_0 = const()[name = string("op_2275_split_sizes_0"), val = tensor([768, 768])]; int32 var_2275_axis_0 = const()[name = string("op_2275_axis_0"), val = int32(-1)]; tensor var_2275_cast_fp16_0, tensor var_2275_cast_fp16_1 = split(axis = var_2275_axis_0, split_sizes = var_2275_split_sizes_0, x = normed_253_cast_fp16)[name = string("op_2275_cast_fp16")]; tensor var_2279_to_fp16 = const()[name = string("op_2279_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299629376)))]; tensor out_109_cast_fp16 = mul(x = var_2275_cast_fp16_0, y = var_2279_to_fp16)[name = string("out_109_cast_fp16")]; tensor var_2285 = const()[name = string("op_2285"), val = tensor([0, 2, 1])]; tensor var_2287_axes_0 = const()[name = string("op_2287_axes_0"), val = tensor([2])]; tensor var_2286_cast_fp16 = transpose(perm = var_2285, x = out_109_cast_fp16)[name = string("transpose_134")]; tensor var_2287_cast_fp16 = expand_dims(axes = var_2287_axes_0, x = var_2286_cast_fp16)[name = string("op_2287_cast_fp16")]; string var_2294_pad_type_0 = const()[name = string("op_2294_pad_type_0"), val = string("valid")]; tensor var_2294_strides_0 = const()[name = string("op_2294_strides_0"), val = tensor([1, 1])]; tensor var_2294_pad_0 = const()[name = string("op_2294_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2294_dilations_0 = const()[name = string("op_2294_dilations_0"), val = tensor([1, 1])]; int32 var_2294_groups_0 = const()[name = string("op_2294_groups_0"), val = int32(1)]; tensor var_2294 = conv(dilations = var_2294_dilations_0, groups = var_2294_groups_0, pad = var_2294_pad_0, pad_type = var_2294_pad_type_0, strides = var_2294_strides_0, weight = encoder_layers_9_self_attn_q_proj_weight_quantized, x = var_2287_cast_fp16)[name = string("op_2294")]; tensor var_2295 = const()[name = string("op_2295"), val = tensor([1, 3, 256, 128])]; tensor var_2296 = reshape(shape = var_2295, x = var_2294)[name = string("op_2296")]; tensor var_2297 = const()[name = string("op_2297"), val = tensor([0, 1, 3, 2])]; string var_2304_pad_type_0 = const()[name = string("op_2304_pad_type_0"), val = string("valid")]; tensor var_2304_strides_0 = const()[name = string("op_2304_strides_0"), val = tensor([1, 1])]; tensor var_2304_pad_0 = const()[name = string("op_2304_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2304_dilations_0 = const()[name = string("op_2304_dilations_0"), val = tensor([1, 1])]; int32 var_2304_groups_0 = const()[name = string("op_2304_groups_0"), val = int32(1)]; tensor var_2304 = conv(dilations = var_2304_dilations_0, groups = var_2304_groups_0, pad = var_2304_pad_0, pad_type = var_2304_pad_type_0, strides = var_2304_strides_0, weight = encoder_layers_9_self_attn_k_proj_weight_quantized, x = var_2287_cast_fp16)[name = string("op_2304")]; tensor var_2305 = const()[name = string("op_2305"), val = tensor([1, 1, 256, 128])]; tensor var_2306 = reshape(shape = var_2305, x = var_2304)[name = string("op_2306")]; tensor var_2307 = const()[name = string("op_2307"), val = tensor([0, 1, 3, 2])]; string var_2314_pad_type_0 = const()[name = string("op_2314_pad_type_0"), val = string("valid")]; tensor var_2314_strides_0 = const()[name = string("op_2314_strides_0"), val = tensor([1, 1])]; tensor var_2314_pad_0 = const()[name = string("op_2314_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2314_dilations_0 = const()[name = string("op_2314_dilations_0"), val = tensor([1, 1])]; int32 var_2314_groups_0 = const()[name = string("op_2314_groups_0"), val = int32(1)]; tensor var_2314 = conv(dilations = var_2314_dilations_0, groups = var_2314_groups_0, pad = var_2314_pad_0, pad_type = var_2314_pad_type_0, strides = var_2314_strides_0, weight = encoder_layers_9_self_attn_v_proj_weight_quantized, x = var_2287_cast_fp16)[name = string("op_2314")]; tensor var_2315 = const()[name = string("op_2315"), val = tensor([1, 1, 256, 128])]; tensor var_2316 = reshape(shape = var_2315, x = var_2314)[name = string("op_2316")]; tensor var_2317 = const()[name = string("op_2317"), val = tensor([0, 1, 3, 2])]; fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_55 = transpose(perm = var_2297, x = var_2296)[name = string("transpose_133")]; tensor var_2323_cast_fp16 = mul(x = q_55, y = const_128_promoted_to_fp16)[name = string("op_2323_cast_fp16")]; bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; tensor input_185_cast_fp16 = concat(axis = var_23, interleave = input_185_interleave_0, values = (q_55, var_2323_cast_fp16))[name = string("input_185_cast_fp16")]; tensor normed_259_axes_0 = const()[name = string("normed_259_axes_0"), val = tensor([-1])]; tensor normed_259_cast_fp16 = layer_norm(axes = normed_259_axes_0, epsilon = var_8_to_fp16, x = input_185_cast_fp16)[name = string("normed_259_cast_fp16")]; tensor var_2328_split_sizes_0 = const()[name = string("op_2328_split_sizes_0"), val = tensor([256, 256])]; int32 var_2328_axis_0 = const()[name = string("op_2328_axis_0"), val = int32(-1)]; tensor var_2328_cast_fp16_0, tensor var_2328_cast_fp16_1 = split(axis = var_2328_axis_0, split_sizes = var_2328_split_sizes_0, x = normed_259_cast_fp16)[name = string("op_2328_cast_fp16")]; tensor var_2332_to_fp16 = const()[name = string("op_2332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299630976)))]; tensor out_111_cast_fp16 = mul(x = var_2328_cast_fp16_0, y = var_2332_to_fp16)[name = string("out_111_cast_fp16")]; fp16 const_130_promoted_to_fp16 = const()[name = string("const_130_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_55 = transpose(perm = var_2307, x = var_2306)[name = string("transpose_132")]; tensor var_2339_cast_fp16 = mul(x = k_55, y = const_130_promoted_to_fp16)[name = string("op_2339_cast_fp16")]; bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; tensor input_187_cast_fp16 = concat(axis = var_23, interleave = input_187_interleave_0, values = (k_55, var_2339_cast_fp16))[name = string("input_187_cast_fp16")]; tensor normed_263_axes_0 = const()[name = string("normed_263_axes_0"), val = tensor([-1])]; tensor normed_263_cast_fp16 = layer_norm(axes = normed_263_axes_0, epsilon = var_8_to_fp16, x = input_187_cast_fp16)[name = string("normed_263_cast_fp16")]; tensor var_2344_split_sizes_0 = const()[name = string("op_2344_split_sizes_0"), val = tensor([256, 256])]; int32 var_2344_axis_0 = const()[name = string("op_2344_axis_0"), val = int32(-1)]; tensor var_2344_cast_fp16_0, tensor var_2344_cast_fp16_1 = split(axis = var_2344_axis_0, split_sizes = var_2344_split_sizes_0, x = normed_263_cast_fp16)[name = string("op_2344_cast_fp16")]; tensor var_2348_to_fp16 = const()[name = string("op_2348_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299631552)))]; tensor out_113_cast_fp16 = mul(x = var_2344_cast_fp16_0, y = var_2348_to_fp16)[name = string("out_113_cast_fp16")]; tensor var_2351 = mul(x = out_111_cast_fp16, y = cos_1_quantized)[name = string("op_2351")]; tensor var_2352_split_sizes_0 = const()[name = string("op_2352_split_sizes_0"), val = tensor([128, 128])]; int32 var_2352_axis_0 = const()[name = string("op_2352_axis_0"), val = int32(-1)]; tensor var_2352_0, tensor var_2352_1 = split(axis = var_2352_axis_0, split_sizes = var_2352_split_sizes_0, x = out_111_cast_fp16)[name = string("op_2352")]; fp16 const_132_promoted = const()[name = string("const_132_promoted"), val = fp16(-0x1p+0)]; tensor var_2354 = mul(x = var_2352_1, y = const_132_promoted)[name = string("op_2354")]; bool var_2356_interleave_0 = const()[name = string("op_2356_interleave_0"), val = bool(false)]; tensor var_2356 = concat(axis = var_23, interleave = var_2356_interleave_0, values = (var_2354, var_2352_0))[name = string("op_2356")]; tensor var_2357 = mul(x = var_2356, y = sin_1_quantized)[name = string("op_2357")]; tensor q_59 = add(x = var_2351, y = var_2357)[name = string("q_59")]; tensor var_2359 = mul(x = out_113_cast_fp16, y = cos_1_quantized)[name = string("op_2359")]; tensor var_2360_split_sizes_0 = const()[name = string("op_2360_split_sizes_0"), val = tensor([128, 128])]; int32 var_2360_axis_0 = const()[name = string("op_2360_axis_0"), val = int32(-1)]; tensor var_2360_0, tensor var_2360_1 = split(axis = var_2360_axis_0, split_sizes = var_2360_split_sizes_0, x = out_113_cast_fp16)[name = string("op_2360")]; fp16 const_133_promoted = const()[name = string("const_133_promoted"), val = fp16(-0x1p+0)]; tensor var_2362 = mul(x = var_2360_1, y = const_133_promoted)[name = string("op_2362")]; bool var_2364_interleave_0 = const()[name = string("op_2364_interleave_0"), val = bool(false)]; tensor var_2364 = concat(axis = var_23, interleave = var_2364_interleave_0, values = (var_2362, var_2360_0))[name = string("op_2364")]; tensor var_2365 = mul(x = var_2364, y = sin_1_quantized)[name = string("op_2365")]; tensor hidden_states_109 = add(x = var_2359, y = var_2365)[name = string("hidden_states_109")]; tensor hidden_states_111_axes_0 = const()[name = string("hidden_states_111_axes_0"), val = tensor([2])]; tensor hidden_states_111 = expand_dims(axes = hidden_states_111_axes_0, x = hidden_states_109)[name = string("hidden_states_111")]; tensor var_2368 = const()[name = string("op_2368"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_113 = tile(reps = var_2368, x = hidden_states_111)[name = string("hidden_states_113")]; tensor var_2370 = const()[name = string("op_2370"), val = tensor([1, 3, 128, 256])]; tensor k_59 = reshape(shape = var_2370, x = hidden_states_113)[name = string("k_59")]; tensor hidden_states_117_axes_0 = const()[name = string("hidden_states_117_axes_0"), val = tensor([2])]; tensor hidden_states_115 = transpose(perm = var_2317, x = var_2316)[name = string("transpose_131")]; tensor hidden_states_117 = expand_dims(axes = hidden_states_117_axes_0, x = hidden_states_115)[name = string("hidden_states_117")]; tensor var_2373 = const()[name = string("op_2373"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_119 = tile(reps = var_2373, x = hidden_states_117)[name = string("hidden_states_119")]; tensor var_2375 = const()[name = string("op_2375"), val = tensor([1, 3, 128, 256])]; tensor v_19 = reshape(shape = var_2375, x = hidden_states_119)[name = string("v_19")]; bool var_2380_transpose_x_1 = const()[name = string("op_2380_transpose_x_1"), val = bool(false)]; bool var_2380_transpose_y_1 = const()[name = string("op_2380_transpose_y_1"), val = bool(true)]; tensor var_2380_cast_fp16 = matmul(transpose_x = var_2380_transpose_x_1, transpose_y = var_2380_transpose_y_1, x = q_59, y = k_59)[name = string("op_2380_cast_fp16")]; fp16 var_2381_to_fp16 = const()[name = string("op_2381_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_55_cast_fp16 = mul(x = var_2380_cast_fp16, y = var_2381_to_fp16)[name = string("attn_weights_55_cast_fp16")]; tensor attn_weights_57_cast_fp16 = add(x = attn_weights_55_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_57_cast_fp16")]; tensor var_2385_cast_fp16 = softmax(axis = var_23, x = attn_weights_57_cast_fp16)[name = string("op_2385_cast_fp16")]; bool var_2389_transpose_x_0 = const()[name = string("op_2389_transpose_x_0"), val = bool(false)]; bool var_2389_transpose_y_0 = const()[name = string("op_2389_transpose_y_0"), val = bool(false)]; tensor var_2389_cast_fp16 = matmul(transpose_x = var_2389_transpose_x_0, transpose_y = var_2389_transpose_y_0, x = var_2385_cast_fp16, y = v_19)[name = string("op_2389_cast_fp16")]; tensor var_2391 = const()[name = string("op_2391"), val = tensor([0, 2, 1, 3])]; tensor var_2394 = const()[name = string("op_2394"), val = tensor([1, 128, 768])]; tensor var_2392 = transpose(perm = var_2391, x = var_2389_cast_fp16)[name = string("transpose_130")]; tensor attn_out_57 = reshape(shape = var_2394, x = var_2392)[name = string("attn_out_57")]; tensor var_2396 = const()[name = string("op_2396"), val = tensor([0, 2, 1])]; tensor squeeze_9_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299632128))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300222016))))[name = string("squeeze_9_quantized")]; string var_2405_pad_type_0 = const()[name = string("op_2405_pad_type_0"), val = string("valid")]; int32 var_2405_groups_0 = const()[name = string("op_2405_groups_0"), val = int32(1)]; tensor var_2405_strides_0 = const()[name = string("op_2405_strides_0"), val = tensor([1])]; tensor var_2405_pad_0 = const()[name = string("op_2405_pad_0"), val = tensor([0, 0])]; tensor var_2405_dilations_0 = const()[name = string("op_2405_dilations_0"), val = tensor([1])]; tensor var_2397 = transpose(perm = var_2396, x = attn_out_57)[name = string("transpose_129")]; tensor var_2405 = conv(dilations = var_2405_dilations_0, groups = var_2405_groups_0, pad = var_2405_pad_0, pad_type = var_2405_pad_type_0, strides = var_2405_strides_0, weight = squeeze_9_quantized, x = var_2397)[name = string("op_2405")]; tensor var_2406 = const()[name = string("op_2406"), val = tensor([0, 2, 1])]; fp16 const_134_promoted_to_fp16 = const()[name = string("const_134_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_153 = transpose(perm = var_2406, x = var_2405)[name = string("transpose_128")]; tensor var_2410_cast_fp16 = mul(x = x_153, y = const_134_promoted_to_fp16)[name = string("op_2410_cast_fp16")]; bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; tensor input_191_cast_fp16 = concat(axis = var_23, interleave = input_191_interleave_0, values = (x_153, var_2410_cast_fp16))[name = string("input_191_cast_fp16")]; tensor normed_267_axes_0 = const()[name = string("normed_267_axes_0"), val = tensor([-1])]; tensor normed_267_cast_fp16 = layer_norm(axes = normed_267_axes_0, epsilon = var_8_to_fp16, x = input_191_cast_fp16)[name = string("normed_267_cast_fp16")]; tensor var_2415_split_sizes_0 = const()[name = string("op_2415_split_sizes_0"), val = tensor([768, 768])]; int32 var_2415_axis_0 = const()[name = string("op_2415_axis_0"), val = int32(-1)]; tensor var_2415_cast_fp16_0, tensor var_2415_cast_fp16_1 = split(axis = var_2415_axis_0, split_sizes = var_2415_split_sizes_0, x = normed_267_cast_fp16)[name = string("op_2415_cast_fp16")]; tensor var_2419_to_fp16 = const()[name = string("op_2419_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300223616)))]; tensor out_115_cast_fp16 = mul(x = var_2415_cast_fp16_0, y = var_2419_to_fp16)[name = string("out_115_cast_fp16")]; tensor x_155_cast_fp16 = add(x = x_145_cast_fp16, y = out_115_cast_fp16)[name = string("x_155_cast_fp16")]; fp16 const_136_promoted_to_fp16 = const()[name = string("const_136_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2426_cast_fp16 = mul(x = x_155_cast_fp16, y = const_136_promoted_to_fp16)[name = string("op_2426_cast_fp16")]; bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; tensor input_193_cast_fp16 = concat(axis = var_23, interleave = input_193_interleave_0, values = (x_155_cast_fp16, var_2426_cast_fp16))[name = string("input_193_cast_fp16")]; tensor normed_271_axes_0 = const()[name = string("normed_271_axes_0"), val = tensor([-1])]; tensor normed_271_cast_fp16 = layer_norm(axes = normed_271_axes_0, epsilon = var_8_to_fp16, x = input_193_cast_fp16)[name = string("normed_271_cast_fp16")]; tensor var_2431_split_sizes_0 = const()[name = string("op_2431_split_sizes_0"), val = tensor([768, 768])]; int32 var_2431_axis_0 = const()[name = string("op_2431_axis_0"), val = int32(-1)]; tensor var_2431_cast_fp16_0, tensor var_2431_cast_fp16_1 = split(axis = var_2431_axis_0, split_sizes = var_2431_split_sizes_0, x = normed_271_cast_fp16)[name = string("op_2431_cast_fp16")]; tensor var_2435_to_fp16 = const()[name = string("op_2435_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300225216)))]; tensor out_117_cast_fp16 = mul(x = var_2431_cast_fp16_0, y = var_2435_to_fp16)[name = string("out_117_cast_fp16")]; tensor var_2442 = const()[name = string("op_2442"), val = tensor([0, 2, 1])]; tensor input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor([2])]; tensor var_2443 = transpose(perm = var_2442, x = out_117_cast_fp16)[name = string("transpose_127")]; tensor input_195 = expand_dims(axes = input_195_axes_0, x = var_2443)[name = string("input_195")]; string gate_37_pad_type_0 = const()[name = string("gate_37_pad_type_0"), val = string("valid")]; tensor gate_37_strides_0 = const()[name = string("gate_37_strides_0"), val = tensor([1, 1])]; tensor gate_37_pad_0 = const()[name = string("gate_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_37_dilations_0 = const()[name = string("gate_37_dilations_0"), val = tensor([1, 1])]; int32 gate_37_groups_0 = const()[name = string("gate_37_groups_0"), val = int32(1)]; tensor gate_37 = conv(dilations = gate_37_dilations_0, groups = gate_37_groups_0, pad = gate_37_pad_0, pad_type = gate_37_pad_type_0, strides = gate_37_strides_0, weight = encoder_layers_9_mlp_gate_proj_weight_quantized, x = input_195)[name = string("gate_37")]; string up_19_pad_type_0 = const()[name = string("up_19_pad_type_0"), val = string("valid")]; tensor up_19_strides_0 = const()[name = string("up_19_strides_0"), val = tensor([1, 1])]; tensor up_19_pad_0 = const()[name = string("up_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_19_dilations_0 = const()[name = string("up_19_dilations_0"), val = tensor([1, 1])]; int32 up_19_groups_0 = const()[name = string("up_19_groups_0"), val = int32(1)]; tensor up_19 = conv(dilations = up_19_dilations_0, groups = up_19_groups_0, pad = up_19_pad_0, pad_type = up_19_pad_type_0, strides = up_19_strides_0, weight = encoder_layers_9_mlp_up_proj_weight_quantized, x = input_195)[name = string("up_19")]; string gate_39_mode_0 = const()[name = string("gate_39_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_39 = gelu(mode = gate_39_mode_0, x = gate_37)[name = string("gate_39")]; tensor input_197 = mul(x = gate_39, y = up_19)[name = string("input_197")]; string var_2464_pad_type_0 = const()[name = string("op_2464_pad_type_0"), val = string("valid")]; tensor var_2464_strides_0 = const()[name = string("op_2464_strides_0"), val = tensor([1, 1])]; tensor var_2464_pad_0 = const()[name = string("op_2464_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2464_dilations_0 = const()[name = string("op_2464_dilations_0"), val = tensor([1, 1])]; int32 var_2464_groups_0 = const()[name = string("op_2464_groups_0"), val = int32(1)]; tensor var_2464 = conv(dilations = var_2464_dilations_0, groups = var_2464_groups_0, pad = var_2464_pad_0, pad_type = var_2464_pad_type_0, strides = var_2464_strides_0, weight = encoder_layers_9_mlp_down_proj_weight_quantized, x = input_197)[name = string("op_2464")]; tensor var_2465_axes_0 = const()[name = string("op_2465_axes_0"), val = tensor([2])]; tensor var_2465 = squeeze(axes = var_2465_axes_0, x = var_2464)[name = string("op_2465")]; tensor var_2466 = const()[name = string("op_2466"), val = tensor([0, 2, 1])]; fp16 const_138_promoted_to_fp16 = const()[name = string("const_138_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_159 = transpose(perm = var_2466, x = var_2465)[name = string("transpose_126")]; tensor var_2470_cast_fp16 = mul(x = x_159, y = const_138_promoted_to_fp16)[name = string("op_2470_cast_fp16")]; bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; tensor input_199_cast_fp16 = concat(axis = var_23, interleave = input_199_interleave_0, values = (x_159, var_2470_cast_fp16))[name = string("input_199_cast_fp16")]; tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_8_to_fp16, x = input_199_cast_fp16)[name = string("normed_277_cast_fp16")]; tensor var_2475_split_sizes_0 = const()[name = string("op_2475_split_sizes_0"), val = tensor([768, 768])]; int32 var_2475_axis_0 = const()[name = string("op_2475_axis_0"), val = int32(-1)]; tensor var_2475_cast_fp16_0, tensor var_2475_cast_fp16_1 = split(axis = var_2475_axis_0, split_sizes = var_2475_split_sizes_0, x = normed_277_cast_fp16)[name = string("op_2475_cast_fp16")]; tensor var_2479_to_fp16 = const()[name = string("op_2479_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300226816)))]; tensor out_119_cast_fp16 = mul(x = var_2475_cast_fp16_0, y = var_2479_to_fp16)[name = string("out_119_cast_fp16")]; tensor x_161_cast_fp16 = add(x = x_155_cast_fp16, y = out_119_cast_fp16)[name = string("x_161_cast_fp16")]; fp16 const_140_promoted_to_fp16 = const()[name = string("const_140_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2508_cast_fp16 = mul(x = x_161_cast_fp16, y = const_140_promoted_to_fp16)[name = string("op_2508_cast_fp16")]; bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; tensor input_201_cast_fp16 = concat(axis = var_23, interleave = input_201_interleave_0, values = (x_161_cast_fp16, var_2508_cast_fp16))[name = string("input_201_cast_fp16")]; tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_8_to_fp16, x = input_201_cast_fp16)[name = string("normed_281_cast_fp16")]; tensor var_2513_split_sizes_0 = const()[name = string("op_2513_split_sizes_0"), val = tensor([768, 768])]; int32 var_2513_axis_0 = const()[name = string("op_2513_axis_0"), val = int32(-1)]; tensor var_2513_cast_fp16_0, tensor var_2513_cast_fp16_1 = split(axis = var_2513_axis_0, split_sizes = var_2513_split_sizes_0, x = normed_281_cast_fp16)[name = string("op_2513_cast_fp16")]; tensor var_2517_to_fp16 = const()[name = string("op_2517_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300228416)))]; tensor out_121_cast_fp16 = mul(x = var_2513_cast_fp16_0, y = var_2517_to_fp16)[name = string("out_121_cast_fp16")]; tensor var_2523 = const()[name = string("op_2523"), val = tensor([0, 2, 1])]; tensor var_2525_axes_0 = const()[name = string("op_2525_axes_0"), val = tensor([2])]; tensor var_2524_cast_fp16 = transpose(perm = var_2523, x = out_121_cast_fp16)[name = string("transpose_125")]; tensor var_2525_cast_fp16 = expand_dims(axes = var_2525_axes_0, x = var_2524_cast_fp16)[name = string("op_2525_cast_fp16")]; string var_2532_pad_type_0 = const()[name = string("op_2532_pad_type_0"), val = string("valid")]; tensor var_2532_strides_0 = const()[name = string("op_2532_strides_0"), val = tensor([1, 1])]; tensor var_2532_pad_0 = const()[name = string("op_2532_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2532_dilations_0 = const()[name = string("op_2532_dilations_0"), val = tensor([1, 1])]; int32 var_2532_groups_0 = const()[name = string("op_2532_groups_0"), val = int32(1)]; tensor var_2532 = conv(dilations = var_2532_dilations_0, groups = var_2532_groups_0, pad = var_2532_pad_0, pad_type = var_2532_pad_type_0, strides = var_2532_strides_0, weight = encoder_layers_10_self_attn_q_proj_weight_quantized, x = var_2525_cast_fp16)[name = string("op_2532")]; tensor var_2533 = const()[name = string("op_2533"), val = tensor([1, 3, 256, 128])]; tensor var_2534 = reshape(shape = var_2533, x = var_2532)[name = string("op_2534")]; tensor var_2535 = const()[name = string("op_2535"), val = tensor([0, 1, 3, 2])]; string var_2542_pad_type_0 = const()[name = string("op_2542_pad_type_0"), val = string("valid")]; tensor var_2542_strides_0 = const()[name = string("op_2542_strides_0"), val = tensor([1, 1])]; tensor var_2542_pad_0 = const()[name = string("op_2542_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2542_dilations_0 = const()[name = string("op_2542_dilations_0"), val = tensor([1, 1])]; int32 var_2542_groups_0 = const()[name = string("op_2542_groups_0"), val = int32(1)]; tensor var_2542 = conv(dilations = var_2542_dilations_0, groups = var_2542_groups_0, pad = var_2542_pad_0, pad_type = var_2542_pad_type_0, strides = var_2542_strides_0, weight = encoder_layers_10_self_attn_k_proj_weight_quantized, x = var_2525_cast_fp16)[name = string("op_2542")]; tensor var_2543 = const()[name = string("op_2543"), val = tensor([1, 1, 256, 128])]; tensor var_2544 = reshape(shape = var_2543, x = var_2542)[name = string("op_2544")]; tensor var_2545 = const()[name = string("op_2545"), val = tensor([0, 1, 3, 2])]; string var_2552_pad_type_0 = const()[name = string("op_2552_pad_type_0"), val = string("valid")]; tensor var_2552_strides_0 = const()[name = string("op_2552_strides_0"), val = tensor([1, 1])]; tensor var_2552_pad_0 = const()[name = string("op_2552_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2552_dilations_0 = const()[name = string("op_2552_dilations_0"), val = tensor([1, 1])]; int32 var_2552_groups_0 = const()[name = string("op_2552_groups_0"), val = int32(1)]; tensor var_2552 = conv(dilations = var_2552_dilations_0, groups = var_2552_groups_0, pad = var_2552_pad_0, pad_type = var_2552_pad_type_0, strides = var_2552_strides_0, weight = encoder_layers_10_self_attn_v_proj_weight_quantized, x = var_2525_cast_fp16)[name = string("op_2552")]; tensor var_2553 = const()[name = string("op_2553"), val = tensor([1, 1, 256, 128])]; tensor var_2554 = reshape(shape = var_2553, x = var_2552)[name = string("op_2554")]; tensor var_2555 = const()[name = string("op_2555"), val = tensor([0, 1, 3, 2])]; fp16 const_142_promoted_to_fp16 = const()[name = string("const_142_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_61 = transpose(perm = var_2535, x = var_2534)[name = string("transpose_124")]; tensor var_2561_cast_fp16 = mul(x = q_61, y = const_142_promoted_to_fp16)[name = string("op_2561_cast_fp16")]; bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; tensor input_205_cast_fp16 = concat(axis = var_23, interleave = input_205_interleave_0, values = (q_61, var_2561_cast_fp16))[name = string("input_205_cast_fp16")]; tensor normed_287_axes_0 = const()[name = string("normed_287_axes_0"), val = tensor([-1])]; tensor normed_287_cast_fp16 = layer_norm(axes = normed_287_axes_0, epsilon = var_8_to_fp16, x = input_205_cast_fp16)[name = string("normed_287_cast_fp16")]; tensor var_2566_split_sizes_0 = const()[name = string("op_2566_split_sizes_0"), val = tensor([256, 256])]; int32 var_2566_axis_0 = const()[name = string("op_2566_axis_0"), val = int32(-1)]; tensor var_2566_cast_fp16_0, tensor var_2566_cast_fp16_1 = split(axis = var_2566_axis_0, split_sizes = var_2566_split_sizes_0, x = normed_287_cast_fp16)[name = string("op_2566_cast_fp16")]; tensor var_2570_to_fp16 = const()[name = string("op_2570_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300230016)))]; tensor out_123_cast_fp16 = mul(x = var_2566_cast_fp16_0, y = var_2570_to_fp16)[name = string("out_123_cast_fp16")]; fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_61 = transpose(perm = var_2545, x = var_2544)[name = string("transpose_123")]; tensor var_2577_cast_fp16 = mul(x = k_61, y = const_144_promoted_to_fp16)[name = string("op_2577_cast_fp16")]; bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)]; tensor input_207_cast_fp16 = concat(axis = var_23, interleave = input_207_interleave_0, values = (k_61, var_2577_cast_fp16))[name = string("input_207_cast_fp16")]; tensor normed_291_axes_0 = const()[name = string("normed_291_axes_0"), val = tensor([-1])]; tensor normed_291_cast_fp16 = layer_norm(axes = normed_291_axes_0, epsilon = var_8_to_fp16, x = input_207_cast_fp16)[name = string("normed_291_cast_fp16")]; tensor var_2582_split_sizes_0 = const()[name = string("op_2582_split_sizes_0"), val = tensor([256, 256])]; int32 var_2582_axis_0 = const()[name = string("op_2582_axis_0"), val = int32(-1)]; tensor var_2582_cast_fp16_0, tensor var_2582_cast_fp16_1 = split(axis = var_2582_axis_0, split_sizes = var_2582_split_sizes_0, x = normed_291_cast_fp16)[name = string("op_2582_cast_fp16")]; tensor var_2586_to_fp16 = const()[name = string("op_2586_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300230592)))]; tensor out_125_cast_fp16 = mul(x = var_2582_cast_fp16_0, y = var_2586_to_fp16)[name = string("out_125_cast_fp16")]; tensor var_2589 = mul(x = out_123_cast_fp16, y = cos_1_quantized)[name = string("op_2589")]; tensor var_2590_split_sizes_0 = const()[name = string("op_2590_split_sizes_0"), val = tensor([128, 128])]; int32 var_2590_axis_0 = const()[name = string("op_2590_axis_0"), val = int32(-1)]; tensor var_2590_0, tensor var_2590_1 = split(axis = var_2590_axis_0, split_sizes = var_2590_split_sizes_0, x = out_123_cast_fp16)[name = string("op_2590")]; fp16 const_146_promoted = const()[name = string("const_146_promoted"), val = fp16(-0x1p+0)]; tensor var_2592 = mul(x = var_2590_1, y = const_146_promoted)[name = string("op_2592")]; bool var_2594_interleave_0 = const()[name = string("op_2594_interleave_0"), val = bool(false)]; tensor var_2594 = concat(axis = var_23, interleave = var_2594_interleave_0, values = (var_2592, var_2590_0))[name = string("op_2594")]; tensor var_2595 = mul(x = var_2594, y = sin_1_quantized)[name = string("op_2595")]; tensor q_65 = add(x = var_2589, y = var_2595)[name = string("q_65")]; tensor var_2597 = mul(x = out_125_cast_fp16, y = cos_1_quantized)[name = string("op_2597")]; tensor var_2598_split_sizes_0 = const()[name = string("op_2598_split_sizes_0"), val = tensor([128, 128])]; int32 var_2598_axis_0 = const()[name = string("op_2598_axis_0"), val = int32(-1)]; tensor var_2598_0, tensor var_2598_1 = split(axis = var_2598_axis_0, split_sizes = var_2598_split_sizes_0, x = out_125_cast_fp16)[name = string("op_2598")]; fp16 const_147_promoted = const()[name = string("const_147_promoted"), val = fp16(-0x1p+0)]; tensor var_2600 = mul(x = var_2598_1, y = const_147_promoted)[name = string("op_2600")]; bool var_2602_interleave_0 = const()[name = string("op_2602_interleave_0"), val = bool(false)]; tensor var_2602 = concat(axis = var_23, interleave = var_2602_interleave_0, values = (var_2600, var_2598_0))[name = string("op_2602")]; tensor var_2603 = mul(x = var_2602, y = sin_1_quantized)[name = string("op_2603")]; tensor hidden_states_121 = add(x = var_2597, y = var_2603)[name = string("hidden_states_121")]; tensor hidden_states_123_axes_0 = const()[name = string("hidden_states_123_axes_0"), val = tensor([2])]; tensor hidden_states_123 = expand_dims(axes = hidden_states_123_axes_0, x = hidden_states_121)[name = string("hidden_states_123")]; tensor var_2606 = const()[name = string("op_2606"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_125 = tile(reps = var_2606, x = hidden_states_123)[name = string("hidden_states_125")]; tensor var_2608 = const()[name = string("op_2608"), val = tensor([1, 3, 128, 256])]; tensor k_65 = reshape(shape = var_2608, x = hidden_states_125)[name = string("k_65")]; tensor hidden_states_129_axes_0 = const()[name = string("hidden_states_129_axes_0"), val = tensor([2])]; tensor hidden_states_127 = transpose(perm = var_2555, x = var_2554)[name = string("transpose_122")]; tensor hidden_states_129 = expand_dims(axes = hidden_states_129_axes_0, x = hidden_states_127)[name = string("hidden_states_129")]; tensor var_2611 = const()[name = string("op_2611"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_131 = tile(reps = var_2611, x = hidden_states_129)[name = string("hidden_states_131")]; tensor var_2613 = const()[name = string("op_2613"), val = tensor([1, 3, 128, 256])]; tensor v_21 = reshape(shape = var_2613, x = hidden_states_131)[name = string("v_21")]; bool var_2618_transpose_x_1 = const()[name = string("op_2618_transpose_x_1"), val = bool(false)]; bool var_2618_transpose_y_1 = const()[name = string("op_2618_transpose_y_1"), val = bool(true)]; tensor var_2618_cast_fp16 = matmul(transpose_x = var_2618_transpose_x_1, transpose_y = var_2618_transpose_y_1, x = q_65, y = k_65)[name = string("op_2618_cast_fp16")]; fp16 var_2619_to_fp16 = const()[name = string("op_2619_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_61_cast_fp16 = mul(x = var_2618_cast_fp16, y = var_2619_to_fp16)[name = string("attn_weights_61_cast_fp16")]; tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_63_cast_fp16")]; tensor var_2623_cast_fp16 = softmax(axis = var_23, x = attn_weights_63_cast_fp16)[name = string("op_2623_cast_fp16")]; bool var_2627_transpose_x_0 = const()[name = string("op_2627_transpose_x_0"), val = bool(false)]; bool var_2627_transpose_y_0 = const()[name = string("op_2627_transpose_y_0"), val = bool(false)]; tensor var_2627_cast_fp16 = matmul(transpose_x = var_2627_transpose_x_0, transpose_y = var_2627_transpose_y_0, x = var_2623_cast_fp16, y = v_21)[name = string("op_2627_cast_fp16")]; tensor var_2629 = const()[name = string("op_2629"), val = tensor([0, 2, 1, 3])]; tensor var_2632 = const()[name = string("op_2632"), val = tensor([1, 128, 768])]; tensor var_2630 = transpose(perm = var_2629, x = var_2627_cast_fp16)[name = string("transpose_121")]; tensor attn_out_63 = reshape(shape = var_2632, x = var_2630)[name = string("attn_out_63")]; tensor var_2634 = const()[name = string("op_2634"), val = tensor([0, 2, 1])]; tensor squeeze_10_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300231168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300821056))))[name = string("squeeze_10_quantized")]; string var_2643_pad_type_0 = const()[name = string("op_2643_pad_type_0"), val = string("valid")]; int32 var_2643_groups_0 = const()[name = string("op_2643_groups_0"), val = int32(1)]; tensor var_2643_strides_0 = const()[name = string("op_2643_strides_0"), val = tensor([1])]; tensor var_2643_pad_0 = const()[name = string("op_2643_pad_0"), val = tensor([0, 0])]; tensor var_2643_dilations_0 = const()[name = string("op_2643_dilations_0"), val = tensor([1])]; tensor var_2635 = transpose(perm = var_2634, x = attn_out_63)[name = string("transpose_120")]; tensor var_2643 = conv(dilations = var_2643_dilations_0, groups = var_2643_groups_0, pad = var_2643_pad_0, pad_type = var_2643_pad_type_0, strides = var_2643_strides_0, weight = squeeze_10_quantized, x = var_2635)[name = string("op_2643")]; tensor var_2644 = const()[name = string("op_2644"), val = tensor([0, 2, 1])]; fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_169 = transpose(perm = var_2644, x = var_2643)[name = string("transpose_119")]; tensor var_2648_cast_fp16 = mul(x = x_169, y = const_148_promoted_to_fp16)[name = string("op_2648_cast_fp16")]; bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; tensor input_211_cast_fp16 = concat(axis = var_23, interleave = input_211_interleave_0, values = (x_169, var_2648_cast_fp16))[name = string("input_211_cast_fp16")]; tensor normed_295_axes_0 = const()[name = string("normed_295_axes_0"), val = tensor([-1])]; tensor normed_295_cast_fp16 = layer_norm(axes = normed_295_axes_0, epsilon = var_8_to_fp16, x = input_211_cast_fp16)[name = string("normed_295_cast_fp16")]; tensor var_2653_split_sizes_0 = const()[name = string("op_2653_split_sizes_0"), val = tensor([768, 768])]; int32 var_2653_axis_0 = const()[name = string("op_2653_axis_0"), val = int32(-1)]; tensor var_2653_cast_fp16_0, tensor var_2653_cast_fp16_1 = split(axis = var_2653_axis_0, split_sizes = var_2653_split_sizes_0, x = normed_295_cast_fp16)[name = string("op_2653_cast_fp16")]; tensor var_2657_to_fp16 = const()[name = string("op_2657_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300822656)))]; tensor out_127_cast_fp16 = mul(x = var_2653_cast_fp16_0, y = var_2657_to_fp16)[name = string("out_127_cast_fp16")]; tensor x_171_cast_fp16 = add(x = x_161_cast_fp16, y = out_127_cast_fp16)[name = string("x_171_cast_fp16")]; fp16 const_150_promoted_to_fp16 = const()[name = string("const_150_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2664_cast_fp16 = mul(x = x_171_cast_fp16, y = const_150_promoted_to_fp16)[name = string("op_2664_cast_fp16")]; bool input_213_interleave_0 = const()[name = string("input_213_interleave_0"), val = bool(false)]; tensor input_213_cast_fp16 = concat(axis = var_23, interleave = input_213_interleave_0, values = (x_171_cast_fp16, var_2664_cast_fp16))[name = string("input_213_cast_fp16")]; tensor normed_299_axes_0 = const()[name = string("normed_299_axes_0"), val = tensor([-1])]; tensor normed_299_cast_fp16 = layer_norm(axes = normed_299_axes_0, epsilon = var_8_to_fp16, x = input_213_cast_fp16)[name = string("normed_299_cast_fp16")]; tensor var_2669_split_sizes_0 = const()[name = string("op_2669_split_sizes_0"), val = tensor([768, 768])]; int32 var_2669_axis_0 = const()[name = string("op_2669_axis_0"), val = int32(-1)]; tensor var_2669_cast_fp16_0, tensor var_2669_cast_fp16_1 = split(axis = var_2669_axis_0, split_sizes = var_2669_split_sizes_0, x = normed_299_cast_fp16)[name = string("op_2669_cast_fp16")]; tensor var_2673_to_fp16 = const()[name = string("op_2673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300824256)))]; tensor out_129_cast_fp16 = mul(x = var_2669_cast_fp16_0, y = var_2673_to_fp16)[name = string("out_129_cast_fp16")]; tensor var_2680 = const()[name = string("op_2680"), val = tensor([0, 2, 1])]; tensor input_215_axes_0 = const()[name = string("input_215_axes_0"), val = tensor([2])]; tensor var_2681 = transpose(perm = var_2680, x = out_129_cast_fp16)[name = string("transpose_118")]; tensor input_215 = expand_dims(axes = input_215_axes_0, x = var_2681)[name = string("input_215")]; string gate_41_pad_type_0 = const()[name = string("gate_41_pad_type_0"), val = string("valid")]; tensor gate_41_strides_0 = const()[name = string("gate_41_strides_0"), val = tensor([1, 1])]; tensor gate_41_pad_0 = const()[name = string("gate_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_41_dilations_0 = const()[name = string("gate_41_dilations_0"), val = tensor([1, 1])]; int32 gate_41_groups_0 = const()[name = string("gate_41_groups_0"), val = int32(1)]; tensor gate_41 = conv(dilations = gate_41_dilations_0, groups = gate_41_groups_0, pad = gate_41_pad_0, pad_type = gate_41_pad_type_0, strides = gate_41_strides_0, weight = encoder_layers_10_mlp_gate_proj_weight_quantized, x = input_215)[name = string("gate_41")]; string up_21_pad_type_0 = const()[name = string("up_21_pad_type_0"), val = string("valid")]; tensor up_21_strides_0 = const()[name = string("up_21_strides_0"), val = tensor([1, 1])]; tensor up_21_pad_0 = const()[name = string("up_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_21_dilations_0 = const()[name = string("up_21_dilations_0"), val = tensor([1, 1])]; int32 up_21_groups_0 = const()[name = string("up_21_groups_0"), val = int32(1)]; tensor up_21 = conv(dilations = up_21_dilations_0, groups = up_21_groups_0, pad = up_21_pad_0, pad_type = up_21_pad_type_0, strides = up_21_strides_0, weight = encoder_layers_10_mlp_up_proj_weight_quantized, x = input_215)[name = string("up_21")]; string gate_43_mode_0 = const()[name = string("gate_43_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_43 = gelu(mode = gate_43_mode_0, x = gate_41)[name = string("gate_43")]; tensor input_217 = mul(x = gate_43, y = up_21)[name = string("input_217")]; string var_2702_pad_type_0 = const()[name = string("op_2702_pad_type_0"), val = string("valid")]; tensor var_2702_strides_0 = const()[name = string("op_2702_strides_0"), val = tensor([1, 1])]; tensor var_2702_pad_0 = const()[name = string("op_2702_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2702_dilations_0 = const()[name = string("op_2702_dilations_0"), val = tensor([1, 1])]; int32 var_2702_groups_0 = const()[name = string("op_2702_groups_0"), val = int32(1)]; tensor var_2702 = conv(dilations = var_2702_dilations_0, groups = var_2702_groups_0, pad = var_2702_pad_0, pad_type = var_2702_pad_type_0, strides = var_2702_strides_0, weight = encoder_layers_10_mlp_down_proj_weight_quantized, x = input_217)[name = string("op_2702")]; tensor var_2703_axes_0 = const()[name = string("op_2703_axes_0"), val = tensor([2])]; tensor var_2703 = squeeze(axes = var_2703_axes_0, x = var_2702)[name = string("op_2703")]; tensor var_2704 = const()[name = string("op_2704"), val = tensor([0, 2, 1])]; fp16 const_152_promoted_to_fp16 = const()[name = string("const_152_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_175 = transpose(perm = var_2704, x = var_2703)[name = string("transpose_117")]; tensor var_2708_cast_fp16 = mul(x = x_175, y = const_152_promoted_to_fp16)[name = string("op_2708_cast_fp16")]; bool input_219_interleave_0 = const()[name = string("input_219_interleave_0"), val = bool(false)]; tensor input_219_cast_fp16 = concat(axis = var_23, interleave = input_219_interleave_0, values = (x_175, var_2708_cast_fp16))[name = string("input_219_cast_fp16")]; tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_8_to_fp16, x = input_219_cast_fp16)[name = string("normed_305_cast_fp16")]; tensor var_2713_split_sizes_0 = const()[name = string("op_2713_split_sizes_0"), val = tensor([768, 768])]; int32 var_2713_axis_0 = const()[name = string("op_2713_axis_0"), val = int32(-1)]; tensor var_2713_cast_fp16_0, tensor var_2713_cast_fp16_1 = split(axis = var_2713_axis_0, split_sizes = var_2713_split_sizes_0, x = normed_305_cast_fp16)[name = string("op_2713_cast_fp16")]; tensor var_2717_to_fp16 = const()[name = string("op_2717_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300825856)))]; tensor out_131_cast_fp16 = mul(x = var_2713_cast_fp16_0, y = var_2717_to_fp16)[name = string("out_131_cast_fp16")]; tensor x_177_cast_fp16 = add(x = x_171_cast_fp16, y = out_131_cast_fp16)[name = string("x_177_cast_fp16")]; fp16 const_154_promoted_to_fp16 = const()[name = string("const_154_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2746_cast_fp16 = mul(x = x_177_cast_fp16, y = const_154_promoted_to_fp16)[name = string("op_2746_cast_fp16")]; bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; tensor input_221_cast_fp16 = concat(axis = var_23, interleave = input_221_interleave_0, values = (x_177_cast_fp16, var_2746_cast_fp16))[name = string("input_221_cast_fp16")]; tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_8_to_fp16, x = input_221_cast_fp16)[name = string("normed_309_cast_fp16")]; tensor var_2751_split_sizes_0 = const()[name = string("op_2751_split_sizes_0"), val = tensor([768, 768])]; int32 var_2751_axis_0 = const()[name = string("op_2751_axis_0"), val = int32(-1)]; tensor var_2751_cast_fp16_0, tensor var_2751_cast_fp16_1 = split(axis = var_2751_axis_0, split_sizes = var_2751_split_sizes_0, x = normed_309_cast_fp16)[name = string("op_2751_cast_fp16")]; tensor var_2755_to_fp16 = const()[name = string("op_2755_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300827456)))]; tensor out_133_cast_fp16 = mul(x = var_2751_cast_fp16_0, y = var_2755_to_fp16)[name = string("out_133_cast_fp16")]; tensor var_2761 = const()[name = string("op_2761"), val = tensor([0, 2, 1])]; tensor var_2763_axes_0 = const()[name = string("op_2763_axes_0"), val = tensor([2])]; tensor var_2762_cast_fp16 = transpose(perm = var_2761, x = out_133_cast_fp16)[name = string("transpose_116")]; tensor var_2763_cast_fp16 = expand_dims(axes = var_2763_axes_0, x = var_2762_cast_fp16)[name = string("op_2763_cast_fp16")]; string var_2770_pad_type_0 = const()[name = string("op_2770_pad_type_0"), val = string("valid")]; tensor var_2770_strides_0 = const()[name = string("op_2770_strides_0"), val = tensor([1, 1])]; tensor var_2770_pad_0 = const()[name = string("op_2770_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2770_dilations_0 = const()[name = string("op_2770_dilations_0"), val = tensor([1, 1])]; int32 var_2770_groups_0 = const()[name = string("op_2770_groups_0"), val = int32(1)]; tensor var_2770 = conv(dilations = var_2770_dilations_0, groups = var_2770_groups_0, pad = var_2770_pad_0, pad_type = var_2770_pad_type_0, strides = var_2770_strides_0, weight = encoder_layers_11_self_attn_q_proj_weight_quantized, x = var_2763_cast_fp16)[name = string("op_2770")]; tensor var_2771 = const()[name = string("op_2771"), val = tensor([1, 3, 256, 128])]; tensor var_2772 = reshape(shape = var_2771, x = var_2770)[name = string("op_2772")]; tensor var_2773 = const()[name = string("op_2773"), val = tensor([0, 1, 3, 2])]; string var_2780_pad_type_0 = const()[name = string("op_2780_pad_type_0"), val = string("valid")]; tensor var_2780_strides_0 = const()[name = string("op_2780_strides_0"), val = tensor([1, 1])]; tensor var_2780_pad_0 = const()[name = string("op_2780_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2780_dilations_0 = const()[name = string("op_2780_dilations_0"), val = tensor([1, 1])]; int32 var_2780_groups_0 = const()[name = string("op_2780_groups_0"), val = int32(1)]; tensor var_2780 = conv(dilations = var_2780_dilations_0, groups = var_2780_groups_0, pad = var_2780_pad_0, pad_type = var_2780_pad_type_0, strides = var_2780_strides_0, weight = encoder_layers_11_self_attn_k_proj_weight_quantized, x = var_2763_cast_fp16)[name = string("op_2780")]; tensor var_2781 = const()[name = string("op_2781"), val = tensor([1, 1, 256, 128])]; tensor var_2782 = reshape(shape = var_2781, x = var_2780)[name = string("op_2782")]; tensor var_2783 = const()[name = string("op_2783"), val = tensor([0, 1, 3, 2])]; string var_2790_pad_type_0 = const()[name = string("op_2790_pad_type_0"), val = string("valid")]; tensor var_2790_strides_0 = const()[name = string("op_2790_strides_0"), val = tensor([1, 1])]; tensor var_2790_pad_0 = const()[name = string("op_2790_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2790_dilations_0 = const()[name = string("op_2790_dilations_0"), val = tensor([1, 1])]; int32 var_2790_groups_0 = const()[name = string("op_2790_groups_0"), val = int32(1)]; tensor var_2790 = conv(dilations = var_2790_dilations_0, groups = var_2790_groups_0, pad = var_2790_pad_0, pad_type = var_2790_pad_type_0, strides = var_2790_strides_0, weight = encoder_layers_11_self_attn_v_proj_weight_quantized, x = var_2763_cast_fp16)[name = string("op_2790")]; tensor var_2791 = const()[name = string("op_2791"), val = tensor([1, 1, 256, 128])]; tensor var_2792 = reshape(shape = var_2791, x = var_2790)[name = string("op_2792")]; tensor var_2793 = const()[name = string("op_2793"), val = tensor([0, 1, 3, 2])]; fp16 const_156_promoted_to_fp16 = const()[name = string("const_156_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_67 = transpose(perm = var_2773, x = var_2772)[name = string("transpose_115")]; tensor var_2799_cast_fp16 = mul(x = q_67, y = const_156_promoted_to_fp16)[name = string("op_2799_cast_fp16")]; bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; tensor input_225_cast_fp16 = concat(axis = var_23, interleave = input_225_interleave_0, values = (q_67, var_2799_cast_fp16))[name = string("input_225_cast_fp16")]; tensor normed_315_axes_0 = const()[name = string("normed_315_axes_0"), val = tensor([-1])]; tensor normed_315_cast_fp16 = layer_norm(axes = normed_315_axes_0, epsilon = var_8_to_fp16, x = input_225_cast_fp16)[name = string("normed_315_cast_fp16")]; tensor var_2804_split_sizes_0 = const()[name = string("op_2804_split_sizes_0"), val = tensor([256, 256])]; int32 var_2804_axis_0 = const()[name = string("op_2804_axis_0"), val = int32(-1)]; tensor var_2804_cast_fp16_0, tensor var_2804_cast_fp16_1 = split(axis = var_2804_axis_0, split_sizes = var_2804_split_sizes_0, x = normed_315_cast_fp16)[name = string("op_2804_cast_fp16")]; tensor var_2808_to_fp16 = const()[name = string("op_2808_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300829056)))]; tensor out_135_cast_fp16 = mul(x = var_2804_cast_fp16_0, y = var_2808_to_fp16)[name = string("out_135_cast_fp16")]; fp16 const_158_promoted_to_fp16 = const()[name = string("const_158_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_67 = transpose(perm = var_2783, x = var_2782)[name = string("transpose_114")]; tensor var_2815_cast_fp16 = mul(x = k_67, y = const_158_promoted_to_fp16)[name = string("op_2815_cast_fp16")]; bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; tensor input_227_cast_fp16 = concat(axis = var_23, interleave = input_227_interleave_0, values = (k_67, var_2815_cast_fp16))[name = string("input_227_cast_fp16")]; tensor normed_319_axes_0 = const()[name = string("normed_319_axes_0"), val = tensor([-1])]; tensor normed_319_cast_fp16 = layer_norm(axes = normed_319_axes_0, epsilon = var_8_to_fp16, x = input_227_cast_fp16)[name = string("normed_319_cast_fp16")]; tensor var_2820_split_sizes_0 = const()[name = string("op_2820_split_sizes_0"), val = tensor([256, 256])]; int32 var_2820_axis_0 = const()[name = string("op_2820_axis_0"), val = int32(-1)]; tensor var_2820_cast_fp16_0, tensor var_2820_cast_fp16_1 = split(axis = var_2820_axis_0, split_sizes = var_2820_split_sizes_0, x = normed_319_cast_fp16)[name = string("op_2820_cast_fp16")]; tensor var_2824_to_fp16 = const()[name = string("op_2824_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300829632)))]; tensor out_137_cast_fp16 = mul(x = var_2820_cast_fp16_0, y = var_2824_to_fp16)[name = string("out_137_cast_fp16")]; tensor var_2827 = mul(x = out_135_cast_fp16, y = cos_quantized)[name = string("op_2827")]; tensor var_2828_split_sizes_0 = const()[name = string("op_2828_split_sizes_0"), val = tensor([128, 128])]; int32 var_2828_axis_0 = const()[name = string("op_2828_axis_0"), val = int32(-1)]; tensor var_2828_0, tensor var_2828_1 = split(axis = var_2828_axis_0, split_sizes = var_2828_split_sizes_0, x = out_135_cast_fp16)[name = string("op_2828")]; fp16 const_160_promoted = const()[name = string("const_160_promoted"), val = fp16(-0x1p+0)]; tensor var_2830 = mul(x = var_2828_1, y = const_160_promoted)[name = string("op_2830")]; bool var_2832_interleave_0 = const()[name = string("op_2832_interleave_0"), val = bool(false)]; tensor var_2832 = concat(axis = var_23, interleave = var_2832_interleave_0, values = (var_2830, var_2828_0))[name = string("op_2832")]; tensor var_2833 = mul(x = var_2832, y = sin_quantized)[name = string("op_2833")]; tensor q_71 = add(x = var_2827, y = var_2833)[name = string("q_71")]; tensor var_2835 = mul(x = out_137_cast_fp16, y = cos_quantized)[name = string("op_2835")]; tensor var_2836_split_sizes_0 = const()[name = string("op_2836_split_sizes_0"), val = tensor([128, 128])]; int32 var_2836_axis_0 = const()[name = string("op_2836_axis_0"), val = int32(-1)]; tensor var_2836_0, tensor var_2836_1 = split(axis = var_2836_axis_0, split_sizes = var_2836_split_sizes_0, x = out_137_cast_fp16)[name = string("op_2836")]; fp16 const_161_promoted = const()[name = string("const_161_promoted"), val = fp16(-0x1p+0)]; tensor var_2838 = mul(x = var_2836_1, y = const_161_promoted)[name = string("op_2838")]; bool var_2840_interleave_0 = const()[name = string("op_2840_interleave_0"), val = bool(false)]; tensor var_2840 = concat(axis = var_23, interleave = var_2840_interleave_0, values = (var_2838, var_2836_0))[name = string("op_2840")]; tensor var_2841 = mul(x = var_2840, y = sin_quantized)[name = string("op_2841")]; tensor hidden_states_133 = add(x = var_2835, y = var_2841)[name = string("hidden_states_133")]; tensor hidden_states_135_axes_0 = const()[name = string("hidden_states_135_axes_0"), val = tensor([2])]; tensor hidden_states_135 = expand_dims(axes = hidden_states_135_axes_0, x = hidden_states_133)[name = string("hidden_states_135")]; tensor var_2844 = const()[name = string("op_2844"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_137 = tile(reps = var_2844, x = hidden_states_135)[name = string("hidden_states_137")]; tensor var_2846 = const()[name = string("op_2846"), val = tensor([1, 3, 128, 256])]; tensor k_71 = reshape(shape = var_2846, x = hidden_states_137)[name = string("k_71")]; tensor hidden_states_141_axes_0 = const()[name = string("hidden_states_141_axes_0"), val = tensor([2])]; tensor hidden_states_139 = transpose(perm = var_2793, x = var_2792)[name = string("transpose_113")]; tensor hidden_states_141 = expand_dims(axes = hidden_states_141_axes_0, x = hidden_states_139)[name = string("hidden_states_141")]; tensor var_2849 = const()[name = string("op_2849"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_143 = tile(reps = var_2849, x = hidden_states_141)[name = string("hidden_states_143")]; tensor var_2851 = const()[name = string("op_2851"), val = tensor([1, 3, 128, 256])]; tensor v_23 = reshape(shape = var_2851, x = hidden_states_143)[name = string("v_23")]; bool var_2856_transpose_x_1 = const()[name = string("op_2856_transpose_x_1"), val = bool(false)]; bool var_2856_transpose_y_1 = const()[name = string("op_2856_transpose_y_1"), val = bool(true)]; tensor var_2856_cast_fp16 = matmul(transpose_x = var_2856_transpose_x_1, transpose_y = var_2856_transpose_y_1, x = q_71, y = k_71)[name = string("op_2856_cast_fp16")]; fp16 var_2857_to_fp16 = const()[name = string("op_2857_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_67_cast_fp16 = mul(x = var_2856_cast_fp16, y = var_2857_to_fp16)[name = string("attn_weights_67_cast_fp16")]; tensor attn_weights_69_cast_fp16 = add(x = attn_weights_67_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_69_cast_fp16")]; tensor var_2861_cast_fp16 = softmax(axis = var_23, x = attn_weights_69_cast_fp16)[name = string("op_2861_cast_fp16")]; bool var_2865_transpose_x_0 = const()[name = string("op_2865_transpose_x_0"), val = bool(false)]; bool var_2865_transpose_y_0 = const()[name = string("op_2865_transpose_y_0"), val = bool(false)]; tensor var_2865_cast_fp16 = matmul(transpose_x = var_2865_transpose_x_0, transpose_y = var_2865_transpose_y_0, x = var_2861_cast_fp16, y = v_23)[name = string("op_2865_cast_fp16")]; tensor var_2867 = const()[name = string("op_2867"), val = tensor([0, 2, 1, 3])]; tensor var_2870 = const()[name = string("op_2870"), val = tensor([1, 128, 768])]; tensor var_2868 = transpose(perm = var_2867, x = var_2865_cast_fp16)[name = string("transpose_112")]; tensor attn_out_69 = reshape(shape = var_2870, x = var_2868)[name = string("attn_out_69")]; tensor var_2872 = const()[name = string("op_2872"), val = tensor([0, 2, 1])]; tensor squeeze_11_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300830208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301420096))))[name = string("squeeze_11_quantized")]; string var_2881_pad_type_0 = const()[name = string("op_2881_pad_type_0"), val = string("valid")]; int32 var_2881_groups_0 = const()[name = string("op_2881_groups_0"), val = int32(1)]; tensor var_2881_strides_0 = const()[name = string("op_2881_strides_0"), val = tensor([1])]; tensor var_2881_pad_0 = const()[name = string("op_2881_pad_0"), val = tensor([0, 0])]; tensor var_2881_dilations_0 = const()[name = string("op_2881_dilations_0"), val = tensor([1])]; tensor var_2873 = transpose(perm = var_2872, x = attn_out_69)[name = string("transpose_111")]; tensor var_2881 = conv(dilations = var_2881_dilations_0, groups = var_2881_groups_0, pad = var_2881_pad_0, pad_type = var_2881_pad_type_0, strides = var_2881_strides_0, weight = squeeze_11_quantized, x = var_2873)[name = string("op_2881")]; tensor var_2882 = const()[name = string("op_2882"), val = tensor([0, 2, 1])]; fp16 const_162_promoted_to_fp16 = const()[name = string("const_162_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_185 = transpose(perm = var_2882, x = var_2881)[name = string("transpose_110")]; tensor var_2886_cast_fp16 = mul(x = x_185, y = const_162_promoted_to_fp16)[name = string("op_2886_cast_fp16")]; bool input_231_interleave_0 = const()[name = string("input_231_interleave_0"), val = bool(false)]; tensor input_231_cast_fp16 = concat(axis = var_23, interleave = input_231_interleave_0, values = (x_185, var_2886_cast_fp16))[name = string("input_231_cast_fp16")]; tensor normed_323_axes_0 = const()[name = string("normed_323_axes_0"), val = tensor([-1])]; tensor normed_323_cast_fp16 = layer_norm(axes = normed_323_axes_0, epsilon = var_8_to_fp16, x = input_231_cast_fp16)[name = string("normed_323_cast_fp16")]; tensor var_2891_split_sizes_0 = const()[name = string("op_2891_split_sizes_0"), val = tensor([768, 768])]; int32 var_2891_axis_0 = const()[name = string("op_2891_axis_0"), val = int32(-1)]; tensor var_2891_cast_fp16_0, tensor var_2891_cast_fp16_1 = split(axis = var_2891_axis_0, split_sizes = var_2891_split_sizes_0, x = normed_323_cast_fp16)[name = string("op_2891_cast_fp16")]; tensor var_2895_to_fp16 = const()[name = string("op_2895_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301421696)))]; tensor out_139_cast_fp16 = mul(x = var_2891_cast_fp16_0, y = var_2895_to_fp16)[name = string("out_139_cast_fp16")]; tensor x_187_cast_fp16 = add(x = x_177_cast_fp16, y = out_139_cast_fp16)[name = string("x_187_cast_fp16")]; fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2902_cast_fp16 = mul(x = x_187_cast_fp16, y = const_164_promoted_to_fp16)[name = string("op_2902_cast_fp16")]; bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; tensor input_233_cast_fp16 = concat(axis = var_23, interleave = input_233_interleave_0, values = (x_187_cast_fp16, var_2902_cast_fp16))[name = string("input_233_cast_fp16")]; tensor normed_327_axes_0 = const()[name = string("normed_327_axes_0"), val = tensor([-1])]; tensor normed_327_cast_fp16 = layer_norm(axes = normed_327_axes_0, epsilon = var_8_to_fp16, x = input_233_cast_fp16)[name = string("normed_327_cast_fp16")]; tensor var_2907_split_sizes_0 = const()[name = string("op_2907_split_sizes_0"), val = tensor([768, 768])]; int32 var_2907_axis_0 = const()[name = string("op_2907_axis_0"), val = int32(-1)]; tensor var_2907_cast_fp16_0, tensor var_2907_cast_fp16_1 = split(axis = var_2907_axis_0, split_sizes = var_2907_split_sizes_0, x = normed_327_cast_fp16)[name = string("op_2907_cast_fp16")]; tensor var_2911_to_fp16 = const()[name = string("op_2911_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301423296)))]; tensor out_141_cast_fp16 = mul(x = var_2907_cast_fp16_0, y = var_2911_to_fp16)[name = string("out_141_cast_fp16")]; tensor var_2918 = const()[name = string("op_2918"), val = tensor([0, 2, 1])]; tensor input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor([2])]; tensor var_2919 = transpose(perm = var_2918, x = out_141_cast_fp16)[name = string("transpose_109")]; tensor input_235 = expand_dims(axes = input_235_axes_0, x = var_2919)[name = string("input_235")]; string gate_45_pad_type_0 = const()[name = string("gate_45_pad_type_0"), val = string("valid")]; tensor gate_45_strides_0 = const()[name = string("gate_45_strides_0"), val = tensor([1, 1])]; tensor gate_45_pad_0 = const()[name = string("gate_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_45_dilations_0 = const()[name = string("gate_45_dilations_0"), val = tensor([1, 1])]; int32 gate_45_groups_0 = const()[name = string("gate_45_groups_0"), val = int32(1)]; tensor gate_45 = conv(dilations = gate_45_dilations_0, groups = gate_45_groups_0, pad = gate_45_pad_0, pad_type = gate_45_pad_type_0, strides = gate_45_strides_0, weight = encoder_layers_11_mlp_gate_proj_weight_quantized, x = input_235)[name = string("gate_45")]; string up_23_pad_type_0 = const()[name = string("up_23_pad_type_0"), val = string("valid")]; tensor up_23_strides_0 = const()[name = string("up_23_strides_0"), val = tensor([1, 1])]; tensor up_23_pad_0 = const()[name = string("up_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_23_dilations_0 = const()[name = string("up_23_dilations_0"), val = tensor([1, 1])]; int32 up_23_groups_0 = const()[name = string("up_23_groups_0"), val = int32(1)]; tensor up_23 = conv(dilations = up_23_dilations_0, groups = up_23_groups_0, pad = up_23_pad_0, pad_type = up_23_pad_type_0, strides = up_23_strides_0, weight = encoder_layers_11_mlp_up_proj_weight_quantized, x = input_235)[name = string("up_23")]; string gate_47_mode_0 = const()[name = string("gate_47_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_47 = gelu(mode = gate_47_mode_0, x = gate_45)[name = string("gate_47")]; tensor input_237 = mul(x = gate_47, y = up_23)[name = string("input_237")]; string var_2940_pad_type_0 = const()[name = string("op_2940_pad_type_0"), val = string("valid")]; tensor var_2940_strides_0 = const()[name = string("op_2940_strides_0"), val = tensor([1, 1])]; tensor var_2940_pad_0 = const()[name = string("op_2940_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2940_dilations_0 = const()[name = string("op_2940_dilations_0"), val = tensor([1, 1])]; int32 var_2940_groups_0 = const()[name = string("op_2940_groups_0"), val = int32(1)]; tensor var_2940 = conv(dilations = var_2940_dilations_0, groups = var_2940_groups_0, pad = var_2940_pad_0, pad_type = var_2940_pad_type_0, strides = var_2940_strides_0, weight = encoder_layers_11_mlp_down_proj_weight_quantized, x = input_237)[name = string("op_2940")]; tensor var_2941_axes_0 = const()[name = string("op_2941_axes_0"), val = tensor([2])]; tensor var_2941 = squeeze(axes = var_2941_axes_0, x = var_2940)[name = string("op_2941")]; tensor var_2942 = const()[name = string("op_2942"), val = tensor([0, 2, 1])]; fp16 const_166_promoted_to_fp16 = const()[name = string("const_166_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_191 = transpose(perm = var_2942, x = var_2941)[name = string("transpose_108")]; tensor var_2946_cast_fp16 = mul(x = x_191, y = const_166_promoted_to_fp16)[name = string("op_2946_cast_fp16")]; bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; tensor input_239_cast_fp16 = concat(axis = var_23, interleave = input_239_interleave_0, values = (x_191, var_2946_cast_fp16))[name = string("input_239_cast_fp16")]; tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_8_to_fp16, x = input_239_cast_fp16)[name = string("normed_333_cast_fp16")]; tensor var_2951_split_sizes_0 = const()[name = string("op_2951_split_sizes_0"), val = tensor([768, 768])]; int32 var_2951_axis_0 = const()[name = string("op_2951_axis_0"), val = int32(-1)]; tensor var_2951_cast_fp16_0, tensor var_2951_cast_fp16_1 = split(axis = var_2951_axis_0, split_sizes = var_2951_split_sizes_0, x = normed_333_cast_fp16)[name = string("op_2951_cast_fp16")]; tensor var_2955_to_fp16 = const()[name = string("op_2955_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301424896)))]; tensor out_143_cast_fp16 = mul(x = var_2951_cast_fp16_0, y = var_2955_to_fp16)[name = string("out_143_cast_fp16")]; tensor x_193_cast_fp16 = add(x = x_187_cast_fp16, y = out_143_cast_fp16)[name = string("x_193_cast_fp16")]; fp16 const_168_promoted_to_fp16 = const()[name = string("const_168_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2984_cast_fp16 = mul(x = x_193_cast_fp16, y = const_168_promoted_to_fp16)[name = string("op_2984_cast_fp16")]; bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; tensor input_241_cast_fp16 = concat(axis = var_23, interleave = input_241_interleave_0, values = (x_193_cast_fp16, var_2984_cast_fp16))[name = string("input_241_cast_fp16")]; tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_8_to_fp16, x = input_241_cast_fp16)[name = string("normed_337_cast_fp16")]; tensor var_2989_split_sizes_0 = const()[name = string("op_2989_split_sizes_0"), val = tensor([768, 768])]; int32 var_2989_axis_0 = const()[name = string("op_2989_axis_0"), val = int32(-1)]; tensor var_2989_cast_fp16_0, tensor var_2989_cast_fp16_1 = split(axis = var_2989_axis_0, split_sizes = var_2989_split_sizes_0, x = normed_337_cast_fp16)[name = string("op_2989_cast_fp16")]; tensor var_2993_to_fp16 = const()[name = string("op_2993_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301426496)))]; tensor out_145_cast_fp16 = mul(x = var_2989_cast_fp16_0, y = var_2993_to_fp16)[name = string("out_145_cast_fp16")]; tensor var_2999 = const()[name = string("op_2999"), val = tensor([0, 2, 1])]; tensor var_3001_axes_0 = const()[name = string("op_3001_axes_0"), val = tensor([2])]; tensor var_3000_cast_fp16 = transpose(perm = var_2999, x = out_145_cast_fp16)[name = string("transpose_107")]; tensor var_3001_cast_fp16 = expand_dims(axes = var_3001_axes_0, x = var_3000_cast_fp16)[name = string("op_3001_cast_fp16")]; string var_3008_pad_type_0 = const()[name = string("op_3008_pad_type_0"), val = string("valid")]; tensor var_3008_strides_0 = const()[name = string("op_3008_strides_0"), val = tensor([1, 1])]; tensor var_3008_pad_0 = const()[name = string("op_3008_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3008_dilations_0 = const()[name = string("op_3008_dilations_0"), val = tensor([1, 1])]; int32 var_3008_groups_0 = const()[name = string("op_3008_groups_0"), val = int32(1)]; tensor var_3008 = conv(dilations = var_3008_dilations_0, groups = var_3008_groups_0, pad = var_3008_pad_0, pad_type = var_3008_pad_type_0, strides = var_3008_strides_0, weight = encoder_layers_12_self_attn_q_proj_weight_quantized, x = var_3001_cast_fp16)[name = string("op_3008")]; tensor var_3009 = const()[name = string("op_3009"), val = tensor([1, 3, 256, 128])]; tensor var_3010 = reshape(shape = var_3009, x = var_3008)[name = string("op_3010")]; tensor var_3011 = const()[name = string("op_3011"), val = tensor([0, 1, 3, 2])]; string var_3018_pad_type_0 = const()[name = string("op_3018_pad_type_0"), val = string("valid")]; tensor var_3018_strides_0 = const()[name = string("op_3018_strides_0"), val = tensor([1, 1])]; tensor var_3018_pad_0 = const()[name = string("op_3018_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3018_dilations_0 = const()[name = string("op_3018_dilations_0"), val = tensor([1, 1])]; int32 var_3018_groups_0 = const()[name = string("op_3018_groups_0"), val = int32(1)]; tensor var_3018 = conv(dilations = var_3018_dilations_0, groups = var_3018_groups_0, pad = var_3018_pad_0, pad_type = var_3018_pad_type_0, strides = var_3018_strides_0, weight = encoder_layers_12_self_attn_k_proj_weight_quantized, x = var_3001_cast_fp16)[name = string("op_3018")]; tensor var_3019 = const()[name = string("op_3019"), val = tensor([1, 1, 256, 128])]; tensor var_3020 = reshape(shape = var_3019, x = var_3018)[name = string("op_3020")]; tensor var_3021 = const()[name = string("op_3021"), val = tensor([0, 1, 3, 2])]; string var_3028_pad_type_0 = const()[name = string("op_3028_pad_type_0"), val = string("valid")]; tensor var_3028_strides_0 = const()[name = string("op_3028_strides_0"), val = tensor([1, 1])]; tensor var_3028_pad_0 = const()[name = string("op_3028_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3028_dilations_0 = const()[name = string("op_3028_dilations_0"), val = tensor([1, 1])]; int32 var_3028_groups_0 = const()[name = string("op_3028_groups_0"), val = int32(1)]; tensor var_3028 = conv(dilations = var_3028_dilations_0, groups = var_3028_groups_0, pad = var_3028_pad_0, pad_type = var_3028_pad_type_0, strides = var_3028_strides_0, weight = encoder_layers_12_self_attn_v_proj_weight_quantized, x = var_3001_cast_fp16)[name = string("op_3028")]; tensor var_3029 = const()[name = string("op_3029"), val = tensor([1, 1, 256, 128])]; tensor var_3030 = reshape(shape = var_3029, x = var_3028)[name = string("op_3030")]; tensor var_3031 = const()[name = string("op_3031"), val = tensor([0, 1, 3, 2])]; fp16 const_170_promoted_to_fp16 = const()[name = string("const_170_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_73 = transpose(perm = var_3011, x = var_3010)[name = string("transpose_106")]; tensor var_3037_cast_fp16 = mul(x = q_73, y = const_170_promoted_to_fp16)[name = string("op_3037_cast_fp16")]; bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; tensor input_245_cast_fp16 = concat(axis = var_23, interleave = input_245_interleave_0, values = (q_73, var_3037_cast_fp16))[name = string("input_245_cast_fp16")]; tensor normed_343_axes_0 = const()[name = string("normed_343_axes_0"), val = tensor([-1])]; tensor normed_343_cast_fp16 = layer_norm(axes = normed_343_axes_0, epsilon = var_8_to_fp16, x = input_245_cast_fp16)[name = string("normed_343_cast_fp16")]; tensor var_3042_split_sizes_0 = const()[name = string("op_3042_split_sizes_0"), val = tensor([256, 256])]; int32 var_3042_axis_0 = const()[name = string("op_3042_axis_0"), val = int32(-1)]; tensor var_3042_cast_fp16_0, tensor var_3042_cast_fp16_1 = split(axis = var_3042_axis_0, split_sizes = var_3042_split_sizes_0, x = normed_343_cast_fp16)[name = string("op_3042_cast_fp16")]; tensor var_3046_to_fp16 = const()[name = string("op_3046_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301428096)))]; tensor out_147_cast_fp16 = mul(x = var_3042_cast_fp16_0, y = var_3046_to_fp16)[name = string("out_147_cast_fp16")]; fp16 const_172_promoted_to_fp16 = const()[name = string("const_172_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_73 = transpose(perm = var_3021, x = var_3020)[name = string("transpose_105")]; tensor var_3053_cast_fp16 = mul(x = k_73, y = const_172_promoted_to_fp16)[name = string("op_3053_cast_fp16")]; bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)]; tensor input_247_cast_fp16 = concat(axis = var_23, interleave = input_247_interleave_0, values = (k_73, var_3053_cast_fp16))[name = string("input_247_cast_fp16")]; tensor normed_347_axes_0 = const()[name = string("normed_347_axes_0"), val = tensor([-1])]; tensor normed_347_cast_fp16 = layer_norm(axes = normed_347_axes_0, epsilon = var_8_to_fp16, x = input_247_cast_fp16)[name = string("normed_347_cast_fp16")]; tensor var_3058_split_sizes_0 = const()[name = string("op_3058_split_sizes_0"), val = tensor([256, 256])]; int32 var_3058_axis_0 = const()[name = string("op_3058_axis_0"), val = int32(-1)]; tensor var_3058_cast_fp16_0, tensor var_3058_cast_fp16_1 = split(axis = var_3058_axis_0, split_sizes = var_3058_split_sizes_0, x = normed_347_cast_fp16)[name = string("op_3058_cast_fp16")]; tensor var_3062_to_fp16 = const()[name = string("op_3062_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301428672)))]; tensor out_149_cast_fp16 = mul(x = var_3058_cast_fp16_0, y = var_3062_to_fp16)[name = string("out_149_cast_fp16")]; tensor var_3065 = mul(x = out_147_cast_fp16, y = cos_1_quantized)[name = string("op_3065")]; tensor var_3066_split_sizes_0 = const()[name = string("op_3066_split_sizes_0"), val = tensor([128, 128])]; int32 var_3066_axis_0 = const()[name = string("op_3066_axis_0"), val = int32(-1)]; tensor var_3066_0, tensor var_3066_1 = split(axis = var_3066_axis_0, split_sizes = var_3066_split_sizes_0, x = out_147_cast_fp16)[name = string("op_3066")]; fp16 const_174_promoted = const()[name = string("const_174_promoted"), val = fp16(-0x1p+0)]; tensor var_3068 = mul(x = var_3066_1, y = const_174_promoted)[name = string("op_3068")]; bool var_3070_interleave_0 = const()[name = string("op_3070_interleave_0"), val = bool(false)]; tensor var_3070 = concat(axis = var_23, interleave = var_3070_interleave_0, values = (var_3068, var_3066_0))[name = string("op_3070")]; tensor var_3071 = mul(x = var_3070, y = sin_1_quantized)[name = string("op_3071")]; tensor q_77 = add(x = var_3065, y = var_3071)[name = string("q_77")]; tensor var_3073 = mul(x = out_149_cast_fp16, y = cos_1_quantized)[name = string("op_3073")]; tensor var_3074_split_sizes_0 = const()[name = string("op_3074_split_sizes_0"), val = tensor([128, 128])]; int32 var_3074_axis_0 = const()[name = string("op_3074_axis_0"), val = int32(-1)]; tensor var_3074_0, tensor var_3074_1 = split(axis = var_3074_axis_0, split_sizes = var_3074_split_sizes_0, x = out_149_cast_fp16)[name = string("op_3074")]; fp16 const_175_promoted = const()[name = string("const_175_promoted"), val = fp16(-0x1p+0)]; tensor var_3076 = mul(x = var_3074_1, y = const_175_promoted)[name = string("op_3076")]; bool var_3078_interleave_0 = const()[name = string("op_3078_interleave_0"), val = bool(false)]; tensor var_3078 = concat(axis = var_23, interleave = var_3078_interleave_0, values = (var_3076, var_3074_0))[name = string("op_3078")]; tensor var_3079 = mul(x = var_3078, y = sin_1_quantized)[name = string("op_3079")]; tensor hidden_states_145 = add(x = var_3073, y = var_3079)[name = string("hidden_states_145")]; tensor hidden_states_147_axes_0 = const()[name = string("hidden_states_147_axes_0"), val = tensor([2])]; tensor hidden_states_147 = expand_dims(axes = hidden_states_147_axes_0, x = hidden_states_145)[name = string("hidden_states_147")]; tensor var_3082 = const()[name = string("op_3082"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_149 = tile(reps = var_3082, x = hidden_states_147)[name = string("hidden_states_149")]; tensor var_3084 = const()[name = string("op_3084"), val = tensor([1, 3, 128, 256])]; tensor k_77 = reshape(shape = var_3084, x = hidden_states_149)[name = string("k_77")]; tensor hidden_states_153_axes_0 = const()[name = string("hidden_states_153_axes_0"), val = tensor([2])]; tensor hidden_states_151 = transpose(perm = var_3031, x = var_3030)[name = string("transpose_104")]; tensor hidden_states_153 = expand_dims(axes = hidden_states_153_axes_0, x = hidden_states_151)[name = string("hidden_states_153")]; tensor var_3087 = const()[name = string("op_3087"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_155 = tile(reps = var_3087, x = hidden_states_153)[name = string("hidden_states_155")]; tensor var_3089 = const()[name = string("op_3089"), val = tensor([1, 3, 128, 256])]; tensor v_25 = reshape(shape = var_3089, x = hidden_states_155)[name = string("v_25")]; bool var_3094_transpose_x_1 = const()[name = string("op_3094_transpose_x_1"), val = bool(false)]; bool var_3094_transpose_y_1 = const()[name = string("op_3094_transpose_y_1"), val = bool(true)]; tensor var_3094_cast_fp16 = matmul(transpose_x = var_3094_transpose_x_1, transpose_y = var_3094_transpose_y_1, x = q_77, y = k_77)[name = string("op_3094_cast_fp16")]; fp16 var_3095_to_fp16 = const()[name = string("op_3095_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_73_cast_fp16 = mul(x = var_3094_cast_fp16, y = var_3095_to_fp16)[name = string("attn_weights_73_cast_fp16")]; tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_75_cast_fp16")]; tensor var_3099_cast_fp16 = softmax(axis = var_23, x = attn_weights_75_cast_fp16)[name = string("op_3099_cast_fp16")]; bool var_3103_transpose_x_0 = const()[name = string("op_3103_transpose_x_0"), val = bool(false)]; bool var_3103_transpose_y_0 = const()[name = string("op_3103_transpose_y_0"), val = bool(false)]; tensor var_3103_cast_fp16 = matmul(transpose_x = var_3103_transpose_x_0, transpose_y = var_3103_transpose_y_0, x = var_3099_cast_fp16, y = v_25)[name = string("op_3103_cast_fp16")]; tensor var_3105 = const()[name = string("op_3105"), val = tensor([0, 2, 1, 3])]; tensor var_3108 = const()[name = string("op_3108"), val = tensor([1, 128, 768])]; tensor var_3106 = transpose(perm = var_3105, x = var_3103_cast_fp16)[name = string("transpose_103")]; tensor attn_out_75 = reshape(shape = var_3108, x = var_3106)[name = string("attn_out_75")]; tensor var_3110 = const()[name = string("op_3110"), val = tensor([0, 2, 1])]; tensor squeeze_12_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301429248))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302019136))))[name = string("squeeze_12_quantized")]; string var_3119_pad_type_0 = const()[name = string("op_3119_pad_type_0"), val = string("valid")]; int32 var_3119_groups_0 = const()[name = string("op_3119_groups_0"), val = int32(1)]; tensor var_3119_strides_0 = const()[name = string("op_3119_strides_0"), val = tensor([1])]; tensor var_3119_pad_0 = const()[name = string("op_3119_pad_0"), val = tensor([0, 0])]; tensor var_3119_dilations_0 = const()[name = string("op_3119_dilations_0"), val = tensor([1])]; tensor var_3111 = transpose(perm = var_3110, x = attn_out_75)[name = string("transpose_102")]; tensor var_3119 = conv(dilations = var_3119_dilations_0, groups = var_3119_groups_0, pad = var_3119_pad_0, pad_type = var_3119_pad_type_0, strides = var_3119_strides_0, weight = squeeze_12_quantized, x = var_3111)[name = string("op_3119")]; tensor var_3120 = const()[name = string("op_3120"), val = tensor([0, 2, 1])]; fp16 const_176_promoted_to_fp16 = const()[name = string("const_176_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_201 = transpose(perm = var_3120, x = var_3119)[name = string("transpose_101")]; tensor var_3124_cast_fp16 = mul(x = x_201, y = const_176_promoted_to_fp16)[name = string("op_3124_cast_fp16")]; bool input_251_interleave_0 = const()[name = string("input_251_interleave_0"), val = bool(false)]; tensor input_251_cast_fp16 = concat(axis = var_23, interleave = input_251_interleave_0, values = (x_201, var_3124_cast_fp16))[name = string("input_251_cast_fp16")]; tensor normed_351_axes_0 = const()[name = string("normed_351_axes_0"), val = tensor([-1])]; tensor normed_351_cast_fp16 = layer_norm(axes = normed_351_axes_0, epsilon = var_8_to_fp16, x = input_251_cast_fp16)[name = string("normed_351_cast_fp16")]; tensor var_3129_split_sizes_0 = const()[name = string("op_3129_split_sizes_0"), val = tensor([768, 768])]; int32 var_3129_axis_0 = const()[name = string("op_3129_axis_0"), val = int32(-1)]; tensor var_3129_cast_fp16_0, tensor var_3129_cast_fp16_1 = split(axis = var_3129_axis_0, split_sizes = var_3129_split_sizes_0, x = normed_351_cast_fp16)[name = string("op_3129_cast_fp16")]; tensor var_3133_to_fp16 = const()[name = string("op_3133_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302020736)))]; tensor out_151_cast_fp16 = mul(x = var_3129_cast_fp16_0, y = var_3133_to_fp16)[name = string("out_151_cast_fp16")]; tensor x_203_cast_fp16 = add(x = x_193_cast_fp16, y = out_151_cast_fp16)[name = string("x_203_cast_fp16")]; fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3140_cast_fp16 = mul(x = x_203_cast_fp16, y = const_178_promoted_to_fp16)[name = string("op_3140_cast_fp16")]; bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; tensor input_253_cast_fp16 = concat(axis = var_23, interleave = input_253_interleave_0, values = (x_203_cast_fp16, var_3140_cast_fp16))[name = string("input_253_cast_fp16")]; tensor normed_355_axes_0 = const()[name = string("normed_355_axes_0"), val = tensor([-1])]; tensor normed_355_cast_fp16 = layer_norm(axes = normed_355_axes_0, epsilon = var_8_to_fp16, x = input_253_cast_fp16)[name = string("normed_355_cast_fp16")]; tensor var_3145_split_sizes_0 = const()[name = string("op_3145_split_sizes_0"), val = tensor([768, 768])]; int32 var_3145_axis_0 = const()[name = string("op_3145_axis_0"), val = int32(-1)]; tensor var_3145_cast_fp16_0, tensor var_3145_cast_fp16_1 = split(axis = var_3145_axis_0, split_sizes = var_3145_split_sizes_0, x = normed_355_cast_fp16)[name = string("op_3145_cast_fp16")]; tensor var_3149_to_fp16 = const()[name = string("op_3149_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302022336)))]; tensor out_153_cast_fp16 = mul(x = var_3145_cast_fp16_0, y = var_3149_to_fp16)[name = string("out_153_cast_fp16")]; tensor var_3156 = const()[name = string("op_3156"), val = tensor([0, 2, 1])]; tensor input_255_axes_0 = const()[name = string("input_255_axes_0"), val = tensor([2])]; tensor var_3157 = transpose(perm = var_3156, x = out_153_cast_fp16)[name = string("transpose_100")]; tensor input_255 = expand_dims(axes = input_255_axes_0, x = var_3157)[name = string("input_255")]; string gate_49_pad_type_0 = const()[name = string("gate_49_pad_type_0"), val = string("valid")]; tensor gate_49_strides_0 = const()[name = string("gate_49_strides_0"), val = tensor([1, 1])]; tensor gate_49_pad_0 = const()[name = string("gate_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_49_dilations_0 = const()[name = string("gate_49_dilations_0"), val = tensor([1, 1])]; int32 gate_49_groups_0 = const()[name = string("gate_49_groups_0"), val = int32(1)]; tensor gate_49 = conv(dilations = gate_49_dilations_0, groups = gate_49_groups_0, pad = gate_49_pad_0, pad_type = gate_49_pad_type_0, strides = gate_49_strides_0, weight = encoder_layers_12_mlp_gate_proj_weight_quantized, x = input_255)[name = string("gate_49")]; string up_25_pad_type_0 = const()[name = string("up_25_pad_type_0"), val = string("valid")]; tensor up_25_strides_0 = const()[name = string("up_25_strides_0"), val = tensor([1, 1])]; tensor up_25_pad_0 = const()[name = string("up_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_25_dilations_0 = const()[name = string("up_25_dilations_0"), val = tensor([1, 1])]; int32 up_25_groups_0 = const()[name = string("up_25_groups_0"), val = int32(1)]; tensor up_25 = conv(dilations = up_25_dilations_0, groups = up_25_groups_0, pad = up_25_pad_0, pad_type = up_25_pad_type_0, strides = up_25_strides_0, weight = encoder_layers_12_mlp_up_proj_weight_quantized, x = input_255)[name = string("up_25")]; string gate_51_mode_0 = const()[name = string("gate_51_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_51 = gelu(mode = gate_51_mode_0, x = gate_49)[name = string("gate_51")]; tensor input_257 = mul(x = gate_51, y = up_25)[name = string("input_257")]; string var_3178_pad_type_0 = const()[name = string("op_3178_pad_type_0"), val = string("valid")]; tensor var_3178_strides_0 = const()[name = string("op_3178_strides_0"), val = tensor([1, 1])]; tensor var_3178_pad_0 = const()[name = string("op_3178_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3178_dilations_0 = const()[name = string("op_3178_dilations_0"), val = tensor([1, 1])]; int32 var_3178_groups_0 = const()[name = string("op_3178_groups_0"), val = int32(1)]; tensor var_3178 = conv(dilations = var_3178_dilations_0, groups = var_3178_groups_0, pad = var_3178_pad_0, pad_type = var_3178_pad_type_0, strides = var_3178_strides_0, weight = encoder_layers_12_mlp_down_proj_weight_quantized, x = input_257)[name = string("op_3178")]; tensor var_3179_axes_0 = const()[name = string("op_3179_axes_0"), val = tensor([2])]; tensor var_3179 = squeeze(axes = var_3179_axes_0, x = var_3178)[name = string("op_3179")]; tensor var_3180 = const()[name = string("op_3180"), val = tensor([0, 2, 1])]; fp16 const_180_promoted_to_fp16 = const()[name = string("const_180_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_207 = transpose(perm = var_3180, x = var_3179)[name = string("transpose_99")]; tensor var_3184_cast_fp16 = mul(x = x_207, y = const_180_promoted_to_fp16)[name = string("op_3184_cast_fp16")]; bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; tensor input_259_cast_fp16 = concat(axis = var_23, interleave = input_259_interleave_0, values = (x_207, var_3184_cast_fp16))[name = string("input_259_cast_fp16")]; tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_8_to_fp16, x = input_259_cast_fp16)[name = string("normed_361_cast_fp16")]; tensor var_3189_split_sizes_0 = const()[name = string("op_3189_split_sizes_0"), val = tensor([768, 768])]; int32 var_3189_axis_0 = const()[name = string("op_3189_axis_0"), val = int32(-1)]; tensor var_3189_cast_fp16_0, tensor var_3189_cast_fp16_1 = split(axis = var_3189_axis_0, split_sizes = var_3189_split_sizes_0, x = normed_361_cast_fp16)[name = string("op_3189_cast_fp16")]; tensor var_3193_to_fp16 = const()[name = string("op_3193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302023936)))]; tensor out_155_cast_fp16 = mul(x = var_3189_cast_fp16_0, y = var_3193_to_fp16)[name = string("out_155_cast_fp16")]; tensor x_209_cast_fp16 = add(x = x_203_cast_fp16, y = out_155_cast_fp16)[name = string("x_209_cast_fp16")]; fp16 const_182_promoted_to_fp16 = const()[name = string("const_182_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3222_cast_fp16 = mul(x = x_209_cast_fp16, y = const_182_promoted_to_fp16)[name = string("op_3222_cast_fp16")]; bool input_261_interleave_0 = const()[name = string("input_261_interleave_0"), val = bool(false)]; tensor input_261_cast_fp16 = concat(axis = var_23, interleave = input_261_interleave_0, values = (x_209_cast_fp16, var_3222_cast_fp16))[name = string("input_261_cast_fp16")]; tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_8_to_fp16, x = input_261_cast_fp16)[name = string("normed_365_cast_fp16")]; tensor var_3227_split_sizes_0 = const()[name = string("op_3227_split_sizes_0"), val = tensor([768, 768])]; int32 var_3227_axis_0 = const()[name = string("op_3227_axis_0"), val = int32(-1)]; tensor var_3227_cast_fp16_0, tensor var_3227_cast_fp16_1 = split(axis = var_3227_axis_0, split_sizes = var_3227_split_sizes_0, x = normed_365_cast_fp16)[name = string("op_3227_cast_fp16")]; tensor var_3231_to_fp16 = const()[name = string("op_3231_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302025536)))]; tensor out_157_cast_fp16 = mul(x = var_3227_cast_fp16_0, y = var_3231_to_fp16)[name = string("out_157_cast_fp16")]; tensor var_3237 = const()[name = string("op_3237"), val = tensor([0, 2, 1])]; tensor var_3239_axes_0 = const()[name = string("op_3239_axes_0"), val = tensor([2])]; tensor var_3238_cast_fp16 = transpose(perm = var_3237, x = out_157_cast_fp16)[name = string("transpose_98")]; tensor var_3239_cast_fp16 = expand_dims(axes = var_3239_axes_0, x = var_3238_cast_fp16)[name = string("op_3239_cast_fp16")]; string var_3246_pad_type_0 = const()[name = string("op_3246_pad_type_0"), val = string("valid")]; tensor var_3246_strides_0 = const()[name = string("op_3246_strides_0"), val = tensor([1, 1])]; tensor var_3246_pad_0 = const()[name = string("op_3246_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3246_dilations_0 = const()[name = string("op_3246_dilations_0"), val = tensor([1, 1])]; int32 var_3246_groups_0 = const()[name = string("op_3246_groups_0"), val = int32(1)]; tensor var_3246 = conv(dilations = var_3246_dilations_0, groups = var_3246_groups_0, pad = var_3246_pad_0, pad_type = var_3246_pad_type_0, strides = var_3246_strides_0, weight = encoder_layers_13_self_attn_q_proj_weight_quantized, x = var_3239_cast_fp16)[name = string("op_3246")]; tensor var_3247 = const()[name = string("op_3247"), val = tensor([1, 3, 256, 128])]; tensor var_3248 = reshape(shape = var_3247, x = var_3246)[name = string("op_3248")]; tensor var_3249 = const()[name = string("op_3249"), val = tensor([0, 1, 3, 2])]; string var_3256_pad_type_0 = const()[name = string("op_3256_pad_type_0"), val = string("valid")]; tensor var_3256_strides_0 = const()[name = string("op_3256_strides_0"), val = tensor([1, 1])]; tensor var_3256_pad_0 = const()[name = string("op_3256_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3256_dilations_0 = const()[name = string("op_3256_dilations_0"), val = tensor([1, 1])]; int32 var_3256_groups_0 = const()[name = string("op_3256_groups_0"), val = int32(1)]; tensor var_3256 = conv(dilations = var_3256_dilations_0, groups = var_3256_groups_0, pad = var_3256_pad_0, pad_type = var_3256_pad_type_0, strides = var_3256_strides_0, weight = encoder_layers_13_self_attn_k_proj_weight_quantized, x = var_3239_cast_fp16)[name = string("op_3256")]; tensor var_3257 = const()[name = string("op_3257"), val = tensor([1, 1, 256, 128])]; tensor var_3258 = reshape(shape = var_3257, x = var_3256)[name = string("op_3258")]; tensor var_3259 = const()[name = string("op_3259"), val = tensor([0, 1, 3, 2])]; string var_3266_pad_type_0 = const()[name = string("op_3266_pad_type_0"), val = string("valid")]; tensor var_3266_strides_0 = const()[name = string("op_3266_strides_0"), val = tensor([1, 1])]; tensor var_3266_pad_0 = const()[name = string("op_3266_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3266_dilations_0 = const()[name = string("op_3266_dilations_0"), val = tensor([1, 1])]; int32 var_3266_groups_0 = const()[name = string("op_3266_groups_0"), val = int32(1)]; tensor var_3266 = conv(dilations = var_3266_dilations_0, groups = var_3266_groups_0, pad = var_3266_pad_0, pad_type = var_3266_pad_type_0, strides = var_3266_strides_0, weight = encoder_layers_13_self_attn_v_proj_weight_quantized, x = var_3239_cast_fp16)[name = string("op_3266")]; tensor var_3267 = const()[name = string("op_3267"), val = tensor([1, 1, 256, 128])]; tensor var_3268 = reshape(shape = var_3267, x = var_3266)[name = string("op_3268")]; tensor var_3269 = const()[name = string("op_3269"), val = tensor([0, 1, 3, 2])]; fp16 const_184_promoted_to_fp16 = const()[name = string("const_184_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_79 = transpose(perm = var_3249, x = var_3248)[name = string("transpose_97")]; tensor var_3275_cast_fp16 = mul(x = q_79, y = const_184_promoted_to_fp16)[name = string("op_3275_cast_fp16")]; bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; tensor input_265_cast_fp16 = concat(axis = var_23, interleave = input_265_interleave_0, values = (q_79, var_3275_cast_fp16))[name = string("input_265_cast_fp16")]; tensor normed_371_axes_0 = const()[name = string("normed_371_axes_0"), val = tensor([-1])]; tensor normed_371_cast_fp16 = layer_norm(axes = normed_371_axes_0, epsilon = var_8_to_fp16, x = input_265_cast_fp16)[name = string("normed_371_cast_fp16")]; tensor var_3280_split_sizes_0 = const()[name = string("op_3280_split_sizes_0"), val = tensor([256, 256])]; int32 var_3280_axis_0 = const()[name = string("op_3280_axis_0"), val = int32(-1)]; tensor var_3280_cast_fp16_0, tensor var_3280_cast_fp16_1 = split(axis = var_3280_axis_0, split_sizes = var_3280_split_sizes_0, x = normed_371_cast_fp16)[name = string("op_3280_cast_fp16")]; tensor var_3284_to_fp16 = const()[name = string("op_3284_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302027136)))]; tensor out_159_cast_fp16 = mul(x = var_3280_cast_fp16_0, y = var_3284_to_fp16)[name = string("out_159_cast_fp16")]; fp16 const_186_promoted_to_fp16 = const()[name = string("const_186_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_79 = transpose(perm = var_3259, x = var_3258)[name = string("transpose_96")]; tensor var_3291_cast_fp16 = mul(x = k_79, y = const_186_promoted_to_fp16)[name = string("op_3291_cast_fp16")]; bool input_267_interleave_0 = const()[name = string("input_267_interleave_0"), val = bool(false)]; tensor input_267_cast_fp16 = concat(axis = var_23, interleave = input_267_interleave_0, values = (k_79, var_3291_cast_fp16))[name = string("input_267_cast_fp16")]; tensor normed_375_axes_0 = const()[name = string("normed_375_axes_0"), val = tensor([-1])]; tensor normed_375_cast_fp16 = layer_norm(axes = normed_375_axes_0, epsilon = var_8_to_fp16, x = input_267_cast_fp16)[name = string("normed_375_cast_fp16")]; tensor var_3296_split_sizes_0 = const()[name = string("op_3296_split_sizes_0"), val = tensor([256, 256])]; int32 var_3296_axis_0 = const()[name = string("op_3296_axis_0"), val = int32(-1)]; tensor var_3296_cast_fp16_0, tensor var_3296_cast_fp16_1 = split(axis = var_3296_axis_0, split_sizes = var_3296_split_sizes_0, x = normed_375_cast_fp16)[name = string("op_3296_cast_fp16")]; tensor var_3300_to_fp16 = const()[name = string("op_3300_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302027712)))]; tensor out_161_cast_fp16 = mul(x = var_3296_cast_fp16_0, y = var_3300_to_fp16)[name = string("out_161_cast_fp16")]; tensor var_3303 = mul(x = out_159_cast_fp16, y = cos_1_quantized)[name = string("op_3303")]; tensor var_3304_split_sizes_0 = const()[name = string("op_3304_split_sizes_0"), val = tensor([128, 128])]; int32 var_3304_axis_0 = const()[name = string("op_3304_axis_0"), val = int32(-1)]; tensor var_3304_0, tensor var_3304_1 = split(axis = var_3304_axis_0, split_sizes = var_3304_split_sizes_0, x = out_159_cast_fp16)[name = string("op_3304")]; fp16 const_188_promoted = const()[name = string("const_188_promoted"), val = fp16(-0x1p+0)]; tensor var_3306 = mul(x = var_3304_1, y = const_188_promoted)[name = string("op_3306")]; bool var_3308_interleave_0 = const()[name = string("op_3308_interleave_0"), val = bool(false)]; tensor var_3308 = concat(axis = var_23, interleave = var_3308_interleave_0, values = (var_3306, var_3304_0))[name = string("op_3308")]; tensor var_3309 = mul(x = var_3308, y = sin_1_quantized)[name = string("op_3309")]; tensor q_83 = add(x = var_3303, y = var_3309)[name = string("q_83")]; tensor var_3311 = mul(x = out_161_cast_fp16, y = cos_1_quantized)[name = string("op_3311")]; tensor var_3312_split_sizes_0 = const()[name = string("op_3312_split_sizes_0"), val = tensor([128, 128])]; int32 var_3312_axis_0 = const()[name = string("op_3312_axis_0"), val = int32(-1)]; tensor var_3312_0, tensor var_3312_1 = split(axis = var_3312_axis_0, split_sizes = var_3312_split_sizes_0, x = out_161_cast_fp16)[name = string("op_3312")]; fp16 const_189_promoted = const()[name = string("const_189_promoted"), val = fp16(-0x1p+0)]; tensor var_3314 = mul(x = var_3312_1, y = const_189_promoted)[name = string("op_3314")]; bool var_3316_interleave_0 = const()[name = string("op_3316_interleave_0"), val = bool(false)]; tensor var_3316 = concat(axis = var_23, interleave = var_3316_interleave_0, values = (var_3314, var_3312_0))[name = string("op_3316")]; tensor var_3317 = mul(x = var_3316, y = sin_1_quantized)[name = string("op_3317")]; tensor hidden_states_157 = add(x = var_3311, y = var_3317)[name = string("hidden_states_157")]; tensor hidden_states_159_axes_0 = const()[name = string("hidden_states_159_axes_0"), val = tensor([2])]; tensor hidden_states_159 = expand_dims(axes = hidden_states_159_axes_0, x = hidden_states_157)[name = string("hidden_states_159")]; tensor var_3320 = const()[name = string("op_3320"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_161 = tile(reps = var_3320, x = hidden_states_159)[name = string("hidden_states_161")]; tensor var_3322 = const()[name = string("op_3322"), val = tensor([1, 3, 128, 256])]; tensor k_83 = reshape(shape = var_3322, x = hidden_states_161)[name = string("k_83")]; tensor hidden_states_165_axes_0 = const()[name = string("hidden_states_165_axes_0"), val = tensor([2])]; tensor hidden_states_163 = transpose(perm = var_3269, x = var_3268)[name = string("transpose_95")]; tensor hidden_states_165 = expand_dims(axes = hidden_states_165_axes_0, x = hidden_states_163)[name = string("hidden_states_165")]; tensor var_3325 = const()[name = string("op_3325"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_167 = tile(reps = var_3325, x = hidden_states_165)[name = string("hidden_states_167")]; tensor var_3327 = const()[name = string("op_3327"), val = tensor([1, 3, 128, 256])]; tensor v_27 = reshape(shape = var_3327, x = hidden_states_167)[name = string("v_27")]; bool var_3332_transpose_x_1 = const()[name = string("op_3332_transpose_x_1"), val = bool(false)]; bool var_3332_transpose_y_1 = const()[name = string("op_3332_transpose_y_1"), val = bool(true)]; tensor var_3332_cast_fp16 = matmul(transpose_x = var_3332_transpose_x_1, transpose_y = var_3332_transpose_y_1, x = q_83, y = k_83)[name = string("op_3332_cast_fp16")]; fp16 var_3333_to_fp16 = const()[name = string("op_3333_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_79_cast_fp16 = mul(x = var_3332_cast_fp16, y = var_3333_to_fp16)[name = string("attn_weights_79_cast_fp16")]; tensor attn_weights_81_cast_fp16 = add(x = attn_weights_79_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_81_cast_fp16")]; tensor var_3337_cast_fp16 = softmax(axis = var_23, x = attn_weights_81_cast_fp16)[name = string("op_3337_cast_fp16")]; bool var_3341_transpose_x_0 = const()[name = string("op_3341_transpose_x_0"), val = bool(false)]; bool var_3341_transpose_y_0 = const()[name = string("op_3341_transpose_y_0"), val = bool(false)]; tensor var_3341_cast_fp16 = matmul(transpose_x = var_3341_transpose_x_0, transpose_y = var_3341_transpose_y_0, x = var_3337_cast_fp16, y = v_27)[name = string("op_3341_cast_fp16")]; tensor var_3343 = const()[name = string("op_3343"), val = tensor([0, 2, 1, 3])]; tensor var_3346 = const()[name = string("op_3346"), val = tensor([1, 128, 768])]; tensor var_3344 = transpose(perm = var_3343, x = var_3341_cast_fp16)[name = string("transpose_94")]; tensor attn_out_81 = reshape(shape = var_3346, x = var_3344)[name = string("attn_out_81")]; tensor var_3348 = const()[name = string("op_3348"), val = tensor([0, 2, 1])]; tensor squeeze_13_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302028288))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302618176))))[name = string("squeeze_13_quantized")]; string var_3357_pad_type_0 = const()[name = string("op_3357_pad_type_0"), val = string("valid")]; int32 var_3357_groups_0 = const()[name = string("op_3357_groups_0"), val = int32(1)]; tensor var_3357_strides_0 = const()[name = string("op_3357_strides_0"), val = tensor([1])]; tensor var_3357_pad_0 = const()[name = string("op_3357_pad_0"), val = tensor([0, 0])]; tensor var_3357_dilations_0 = const()[name = string("op_3357_dilations_0"), val = tensor([1])]; tensor var_3349 = transpose(perm = var_3348, x = attn_out_81)[name = string("transpose_93")]; tensor var_3357 = conv(dilations = var_3357_dilations_0, groups = var_3357_groups_0, pad = var_3357_pad_0, pad_type = var_3357_pad_type_0, strides = var_3357_strides_0, weight = squeeze_13_quantized, x = var_3349)[name = string("op_3357")]; tensor var_3358 = const()[name = string("op_3358"), val = tensor([0, 2, 1])]; fp16 const_190_promoted_to_fp16 = const()[name = string("const_190_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_217 = transpose(perm = var_3358, x = var_3357)[name = string("transpose_92")]; tensor var_3362_cast_fp16 = mul(x = x_217, y = const_190_promoted_to_fp16)[name = string("op_3362_cast_fp16")]; bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; tensor input_271_cast_fp16 = concat(axis = var_23, interleave = input_271_interleave_0, values = (x_217, var_3362_cast_fp16))[name = string("input_271_cast_fp16")]; tensor normed_379_axes_0 = const()[name = string("normed_379_axes_0"), val = tensor([-1])]; tensor normed_379_cast_fp16 = layer_norm(axes = normed_379_axes_0, epsilon = var_8_to_fp16, x = input_271_cast_fp16)[name = string("normed_379_cast_fp16")]; tensor var_3367_split_sizes_0 = const()[name = string("op_3367_split_sizes_0"), val = tensor([768, 768])]; int32 var_3367_axis_0 = const()[name = string("op_3367_axis_0"), val = int32(-1)]; tensor var_3367_cast_fp16_0, tensor var_3367_cast_fp16_1 = split(axis = var_3367_axis_0, split_sizes = var_3367_split_sizes_0, x = normed_379_cast_fp16)[name = string("op_3367_cast_fp16")]; tensor var_3371_to_fp16 = const()[name = string("op_3371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302619776)))]; tensor out_163_cast_fp16 = mul(x = var_3367_cast_fp16_0, y = var_3371_to_fp16)[name = string("out_163_cast_fp16")]; tensor x_219_cast_fp16 = add(x = x_209_cast_fp16, y = out_163_cast_fp16)[name = string("x_219_cast_fp16")]; fp16 const_192_promoted_to_fp16 = const()[name = string("const_192_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3378_cast_fp16 = mul(x = x_219_cast_fp16, y = const_192_promoted_to_fp16)[name = string("op_3378_cast_fp16")]; bool input_273_interleave_0 = const()[name = string("input_273_interleave_0"), val = bool(false)]; tensor input_273_cast_fp16 = concat(axis = var_23, interleave = input_273_interleave_0, values = (x_219_cast_fp16, var_3378_cast_fp16))[name = string("input_273_cast_fp16")]; tensor normed_383_axes_0 = const()[name = string("normed_383_axes_0"), val = tensor([-1])]; tensor normed_383_cast_fp16 = layer_norm(axes = normed_383_axes_0, epsilon = var_8_to_fp16, x = input_273_cast_fp16)[name = string("normed_383_cast_fp16")]; tensor var_3383_split_sizes_0 = const()[name = string("op_3383_split_sizes_0"), val = tensor([768, 768])]; int32 var_3383_axis_0 = const()[name = string("op_3383_axis_0"), val = int32(-1)]; tensor var_3383_cast_fp16_0, tensor var_3383_cast_fp16_1 = split(axis = var_3383_axis_0, split_sizes = var_3383_split_sizes_0, x = normed_383_cast_fp16)[name = string("op_3383_cast_fp16")]; tensor var_3387_to_fp16 = const()[name = string("op_3387_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302621376)))]; tensor out_165_cast_fp16 = mul(x = var_3383_cast_fp16_0, y = var_3387_to_fp16)[name = string("out_165_cast_fp16")]; tensor var_3394 = const()[name = string("op_3394"), val = tensor([0, 2, 1])]; tensor input_275_axes_0 = const()[name = string("input_275_axes_0"), val = tensor([2])]; tensor var_3395 = transpose(perm = var_3394, x = out_165_cast_fp16)[name = string("transpose_91")]; tensor input_275 = expand_dims(axes = input_275_axes_0, x = var_3395)[name = string("input_275")]; string gate_53_pad_type_0 = const()[name = string("gate_53_pad_type_0"), val = string("valid")]; tensor gate_53_strides_0 = const()[name = string("gate_53_strides_0"), val = tensor([1, 1])]; tensor gate_53_pad_0 = const()[name = string("gate_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_53_dilations_0 = const()[name = string("gate_53_dilations_0"), val = tensor([1, 1])]; int32 gate_53_groups_0 = const()[name = string("gate_53_groups_0"), val = int32(1)]; tensor gate_53 = conv(dilations = gate_53_dilations_0, groups = gate_53_groups_0, pad = gate_53_pad_0, pad_type = gate_53_pad_type_0, strides = gate_53_strides_0, weight = encoder_layers_13_mlp_gate_proj_weight_quantized, x = input_275)[name = string("gate_53")]; string up_27_pad_type_0 = const()[name = string("up_27_pad_type_0"), val = string("valid")]; tensor up_27_strides_0 = const()[name = string("up_27_strides_0"), val = tensor([1, 1])]; tensor up_27_pad_0 = const()[name = string("up_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_27_dilations_0 = const()[name = string("up_27_dilations_0"), val = tensor([1, 1])]; int32 up_27_groups_0 = const()[name = string("up_27_groups_0"), val = int32(1)]; tensor up_27 = conv(dilations = up_27_dilations_0, groups = up_27_groups_0, pad = up_27_pad_0, pad_type = up_27_pad_type_0, strides = up_27_strides_0, weight = encoder_layers_13_mlp_up_proj_weight_quantized, x = input_275)[name = string("up_27")]; string gate_55_mode_0 = const()[name = string("gate_55_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_55 = gelu(mode = gate_55_mode_0, x = gate_53)[name = string("gate_55")]; tensor input_277 = mul(x = gate_55, y = up_27)[name = string("input_277")]; string var_3416_pad_type_0 = const()[name = string("op_3416_pad_type_0"), val = string("valid")]; tensor var_3416_strides_0 = const()[name = string("op_3416_strides_0"), val = tensor([1, 1])]; tensor var_3416_pad_0 = const()[name = string("op_3416_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3416_dilations_0 = const()[name = string("op_3416_dilations_0"), val = tensor([1, 1])]; int32 var_3416_groups_0 = const()[name = string("op_3416_groups_0"), val = int32(1)]; tensor var_3416 = conv(dilations = var_3416_dilations_0, groups = var_3416_groups_0, pad = var_3416_pad_0, pad_type = var_3416_pad_type_0, strides = var_3416_strides_0, weight = encoder_layers_13_mlp_down_proj_weight_quantized, x = input_277)[name = string("op_3416")]; tensor var_3417_axes_0 = const()[name = string("op_3417_axes_0"), val = tensor([2])]; tensor var_3417 = squeeze(axes = var_3417_axes_0, x = var_3416)[name = string("op_3417")]; tensor var_3418 = const()[name = string("op_3418"), val = tensor([0, 2, 1])]; fp16 const_194_promoted_to_fp16 = const()[name = string("const_194_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_223 = transpose(perm = var_3418, x = var_3417)[name = string("transpose_90")]; tensor var_3422_cast_fp16 = mul(x = x_223, y = const_194_promoted_to_fp16)[name = string("op_3422_cast_fp16")]; bool input_279_interleave_0 = const()[name = string("input_279_interleave_0"), val = bool(false)]; tensor input_279_cast_fp16 = concat(axis = var_23, interleave = input_279_interleave_0, values = (x_223, var_3422_cast_fp16))[name = string("input_279_cast_fp16")]; tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_8_to_fp16, x = input_279_cast_fp16)[name = string("normed_389_cast_fp16")]; tensor var_3427_split_sizes_0 = const()[name = string("op_3427_split_sizes_0"), val = tensor([768, 768])]; int32 var_3427_axis_0 = const()[name = string("op_3427_axis_0"), val = int32(-1)]; tensor var_3427_cast_fp16_0, tensor var_3427_cast_fp16_1 = split(axis = var_3427_axis_0, split_sizes = var_3427_split_sizes_0, x = normed_389_cast_fp16)[name = string("op_3427_cast_fp16")]; tensor var_3431_to_fp16 = const()[name = string("op_3431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302622976)))]; tensor out_167_cast_fp16 = mul(x = var_3427_cast_fp16_0, y = var_3431_to_fp16)[name = string("out_167_cast_fp16")]; tensor x_225_cast_fp16 = add(x = x_219_cast_fp16, y = out_167_cast_fp16)[name = string("x_225_cast_fp16")]; fp16 const_196_promoted_to_fp16 = const()[name = string("const_196_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3460_cast_fp16 = mul(x = x_225_cast_fp16, y = const_196_promoted_to_fp16)[name = string("op_3460_cast_fp16")]; bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; tensor input_281_cast_fp16 = concat(axis = var_23, interleave = input_281_interleave_0, values = (x_225_cast_fp16, var_3460_cast_fp16))[name = string("input_281_cast_fp16")]; tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_8_to_fp16, x = input_281_cast_fp16)[name = string("normed_393_cast_fp16")]; tensor var_3465_split_sizes_0 = const()[name = string("op_3465_split_sizes_0"), val = tensor([768, 768])]; int32 var_3465_axis_0 = const()[name = string("op_3465_axis_0"), val = int32(-1)]; tensor var_3465_cast_fp16_0, tensor var_3465_cast_fp16_1 = split(axis = var_3465_axis_0, split_sizes = var_3465_split_sizes_0, x = normed_393_cast_fp16)[name = string("op_3465_cast_fp16")]; tensor var_3469_to_fp16 = const()[name = string("op_3469_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302624576)))]; tensor out_169_cast_fp16 = mul(x = var_3465_cast_fp16_0, y = var_3469_to_fp16)[name = string("out_169_cast_fp16")]; tensor var_3475 = const()[name = string("op_3475"), val = tensor([0, 2, 1])]; tensor var_3477_axes_0 = const()[name = string("op_3477_axes_0"), val = tensor([2])]; tensor var_3476_cast_fp16 = transpose(perm = var_3475, x = out_169_cast_fp16)[name = string("transpose_89")]; tensor var_3477_cast_fp16 = expand_dims(axes = var_3477_axes_0, x = var_3476_cast_fp16)[name = string("op_3477_cast_fp16")]; string var_3484_pad_type_0 = const()[name = string("op_3484_pad_type_0"), val = string("valid")]; tensor var_3484_strides_0 = const()[name = string("op_3484_strides_0"), val = tensor([1, 1])]; tensor var_3484_pad_0 = const()[name = string("op_3484_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3484_dilations_0 = const()[name = string("op_3484_dilations_0"), val = tensor([1, 1])]; int32 var_3484_groups_0 = const()[name = string("op_3484_groups_0"), val = int32(1)]; tensor var_3484 = conv(dilations = var_3484_dilations_0, groups = var_3484_groups_0, pad = var_3484_pad_0, pad_type = var_3484_pad_type_0, strides = var_3484_strides_0, weight = encoder_layers_14_self_attn_q_proj_weight_quantized, x = var_3477_cast_fp16)[name = string("op_3484")]; tensor var_3485 = const()[name = string("op_3485"), val = tensor([1, 3, 256, 128])]; tensor var_3486 = reshape(shape = var_3485, x = var_3484)[name = string("op_3486")]; tensor var_3487 = const()[name = string("op_3487"), val = tensor([0, 1, 3, 2])]; string var_3494_pad_type_0 = const()[name = string("op_3494_pad_type_0"), val = string("valid")]; tensor var_3494_strides_0 = const()[name = string("op_3494_strides_0"), val = tensor([1, 1])]; tensor var_3494_pad_0 = const()[name = string("op_3494_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3494_dilations_0 = const()[name = string("op_3494_dilations_0"), val = tensor([1, 1])]; int32 var_3494_groups_0 = const()[name = string("op_3494_groups_0"), val = int32(1)]; tensor var_3494 = conv(dilations = var_3494_dilations_0, groups = var_3494_groups_0, pad = var_3494_pad_0, pad_type = var_3494_pad_type_0, strides = var_3494_strides_0, weight = encoder_layers_14_self_attn_k_proj_weight_quantized, x = var_3477_cast_fp16)[name = string("op_3494")]; tensor var_3495 = const()[name = string("op_3495"), val = tensor([1, 1, 256, 128])]; tensor var_3496 = reshape(shape = var_3495, x = var_3494)[name = string("op_3496")]; tensor var_3497 = const()[name = string("op_3497"), val = tensor([0, 1, 3, 2])]; string var_3504_pad_type_0 = const()[name = string("op_3504_pad_type_0"), val = string("valid")]; tensor var_3504_strides_0 = const()[name = string("op_3504_strides_0"), val = tensor([1, 1])]; tensor var_3504_pad_0 = const()[name = string("op_3504_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3504_dilations_0 = const()[name = string("op_3504_dilations_0"), val = tensor([1, 1])]; int32 var_3504_groups_0 = const()[name = string("op_3504_groups_0"), val = int32(1)]; tensor var_3504 = conv(dilations = var_3504_dilations_0, groups = var_3504_groups_0, pad = var_3504_pad_0, pad_type = var_3504_pad_type_0, strides = var_3504_strides_0, weight = encoder_layers_14_self_attn_v_proj_weight_quantized, x = var_3477_cast_fp16)[name = string("op_3504")]; tensor var_3505 = const()[name = string("op_3505"), val = tensor([1, 1, 256, 128])]; tensor var_3506 = reshape(shape = var_3505, x = var_3504)[name = string("op_3506")]; tensor var_3507 = const()[name = string("op_3507"), val = tensor([0, 1, 3, 2])]; fp16 const_198_promoted_to_fp16 = const()[name = string("const_198_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_85 = transpose(perm = var_3487, x = var_3486)[name = string("transpose_88")]; tensor var_3513_cast_fp16 = mul(x = q_85, y = const_198_promoted_to_fp16)[name = string("op_3513_cast_fp16")]; bool input_285_interleave_0 = const()[name = string("input_285_interleave_0"), val = bool(false)]; tensor input_285_cast_fp16 = concat(axis = var_23, interleave = input_285_interleave_0, values = (q_85, var_3513_cast_fp16))[name = string("input_285_cast_fp16")]; tensor normed_399_axes_0 = const()[name = string("normed_399_axes_0"), val = tensor([-1])]; tensor normed_399_cast_fp16 = layer_norm(axes = normed_399_axes_0, epsilon = var_8_to_fp16, x = input_285_cast_fp16)[name = string("normed_399_cast_fp16")]; tensor var_3518_split_sizes_0 = const()[name = string("op_3518_split_sizes_0"), val = tensor([256, 256])]; int32 var_3518_axis_0 = const()[name = string("op_3518_axis_0"), val = int32(-1)]; tensor var_3518_cast_fp16_0, tensor var_3518_cast_fp16_1 = split(axis = var_3518_axis_0, split_sizes = var_3518_split_sizes_0, x = normed_399_cast_fp16)[name = string("op_3518_cast_fp16")]; tensor var_3522_to_fp16 = const()[name = string("op_3522_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302626176)))]; tensor out_171_cast_fp16 = mul(x = var_3518_cast_fp16_0, y = var_3522_to_fp16)[name = string("out_171_cast_fp16")]; fp16 const_200_promoted_to_fp16 = const()[name = string("const_200_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_85 = transpose(perm = var_3497, x = var_3496)[name = string("transpose_87")]; tensor var_3529_cast_fp16 = mul(x = k_85, y = const_200_promoted_to_fp16)[name = string("op_3529_cast_fp16")]; bool input_287_interleave_0 = const()[name = string("input_287_interleave_0"), val = bool(false)]; tensor input_287_cast_fp16 = concat(axis = var_23, interleave = input_287_interleave_0, values = (k_85, var_3529_cast_fp16))[name = string("input_287_cast_fp16")]; tensor normed_403_axes_0 = const()[name = string("normed_403_axes_0"), val = tensor([-1])]; tensor normed_403_cast_fp16 = layer_norm(axes = normed_403_axes_0, epsilon = var_8_to_fp16, x = input_287_cast_fp16)[name = string("normed_403_cast_fp16")]; tensor var_3534_split_sizes_0 = const()[name = string("op_3534_split_sizes_0"), val = tensor([256, 256])]; int32 var_3534_axis_0 = const()[name = string("op_3534_axis_0"), val = int32(-1)]; tensor var_3534_cast_fp16_0, tensor var_3534_cast_fp16_1 = split(axis = var_3534_axis_0, split_sizes = var_3534_split_sizes_0, x = normed_403_cast_fp16)[name = string("op_3534_cast_fp16")]; tensor var_3538_to_fp16 = const()[name = string("op_3538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302626752)))]; tensor out_173_cast_fp16 = mul(x = var_3534_cast_fp16_0, y = var_3538_to_fp16)[name = string("out_173_cast_fp16")]; tensor var_3541 = mul(x = out_171_cast_fp16, y = cos_1_quantized)[name = string("op_3541")]; tensor var_3542_split_sizes_0 = const()[name = string("op_3542_split_sizes_0"), val = tensor([128, 128])]; int32 var_3542_axis_0 = const()[name = string("op_3542_axis_0"), val = int32(-1)]; tensor var_3542_0, tensor var_3542_1 = split(axis = var_3542_axis_0, split_sizes = var_3542_split_sizes_0, x = out_171_cast_fp16)[name = string("op_3542")]; fp16 const_202_promoted = const()[name = string("const_202_promoted"), val = fp16(-0x1p+0)]; tensor var_3544 = mul(x = var_3542_1, y = const_202_promoted)[name = string("op_3544")]; bool var_3546_interleave_0 = const()[name = string("op_3546_interleave_0"), val = bool(false)]; tensor var_3546 = concat(axis = var_23, interleave = var_3546_interleave_0, values = (var_3544, var_3542_0))[name = string("op_3546")]; tensor var_3547 = mul(x = var_3546, y = sin_1_quantized)[name = string("op_3547")]; tensor q_89 = add(x = var_3541, y = var_3547)[name = string("q_89")]; tensor var_3549 = mul(x = out_173_cast_fp16, y = cos_1_quantized)[name = string("op_3549")]; tensor var_3550_split_sizes_0 = const()[name = string("op_3550_split_sizes_0"), val = tensor([128, 128])]; int32 var_3550_axis_0 = const()[name = string("op_3550_axis_0"), val = int32(-1)]; tensor var_3550_0, tensor var_3550_1 = split(axis = var_3550_axis_0, split_sizes = var_3550_split_sizes_0, x = out_173_cast_fp16)[name = string("op_3550")]; fp16 const_203_promoted = const()[name = string("const_203_promoted"), val = fp16(-0x1p+0)]; tensor var_3552 = mul(x = var_3550_1, y = const_203_promoted)[name = string("op_3552")]; bool var_3554_interleave_0 = const()[name = string("op_3554_interleave_0"), val = bool(false)]; tensor var_3554 = concat(axis = var_23, interleave = var_3554_interleave_0, values = (var_3552, var_3550_0))[name = string("op_3554")]; tensor var_3555 = mul(x = var_3554, y = sin_1_quantized)[name = string("op_3555")]; tensor hidden_states_169 = add(x = var_3549, y = var_3555)[name = string("hidden_states_169")]; tensor hidden_states_171_axes_0 = const()[name = string("hidden_states_171_axes_0"), val = tensor([2])]; tensor hidden_states_171 = expand_dims(axes = hidden_states_171_axes_0, x = hidden_states_169)[name = string("hidden_states_171")]; tensor var_3558 = const()[name = string("op_3558"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_173 = tile(reps = var_3558, x = hidden_states_171)[name = string("hidden_states_173")]; tensor var_3560 = const()[name = string("op_3560"), val = tensor([1, 3, 128, 256])]; tensor k_89 = reshape(shape = var_3560, x = hidden_states_173)[name = string("k_89")]; tensor hidden_states_177_axes_0 = const()[name = string("hidden_states_177_axes_0"), val = tensor([2])]; tensor hidden_states_175 = transpose(perm = var_3507, x = var_3506)[name = string("transpose_86")]; tensor hidden_states_177 = expand_dims(axes = hidden_states_177_axes_0, x = hidden_states_175)[name = string("hidden_states_177")]; tensor var_3563 = const()[name = string("op_3563"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_179 = tile(reps = var_3563, x = hidden_states_177)[name = string("hidden_states_179")]; tensor var_3565 = const()[name = string("op_3565"), val = tensor([1, 3, 128, 256])]; tensor v_29 = reshape(shape = var_3565, x = hidden_states_179)[name = string("v_29")]; bool var_3570_transpose_x_1 = const()[name = string("op_3570_transpose_x_1"), val = bool(false)]; bool var_3570_transpose_y_1 = const()[name = string("op_3570_transpose_y_1"), val = bool(true)]; tensor var_3570_cast_fp16 = matmul(transpose_x = var_3570_transpose_x_1, transpose_y = var_3570_transpose_y_1, x = q_89, y = k_89)[name = string("op_3570_cast_fp16")]; fp16 var_3571_to_fp16 = const()[name = string("op_3571_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_85_cast_fp16 = mul(x = var_3570_cast_fp16, y = var_3571_to_fp16)[name = string("attn_weights_85_cast_fp16")]; tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_87_cast_fp16")]; tensor var_3575_cast_fp16 = softmax(axis = var_23, x = attn_weights_87_cast_fp16)[name = string("op_3575_cast_fp16")]; bool var_3579_transpose_x_0 = const()[name = string("op_3579_transpose_x_0"), val = bool(false)]; bool var_3579_transpose_y_0 = const()[name = string("op_3579_transpose_y_0"), val = bool(false)]; tensor var_3579_cast_fp16 = matmul(transpose_x = var_3579_transpose_x_0, transpose_y = var_3579_transpose_y_0, x = var_3575_cast_fp16, y = v_29)[name = string("op_3579_cast_fp16")]; tensor var_3581 = const()[name = string("op_3581"), val = tensor([0, 2, 1, 3])]; tensor var_3584 = const()[name = string("op_3584"), val = tensor([1, 128, 768])]; tensor var_3582 = transpose(perm = var_3581, x = var_3579_cast_fp16)[name = string("transpose_85")]; tensor attn_out_87 = reshape(shape = var_3584, x = var_3582)[name = string("attn_out_87")]; tensor var_3586 = const()[name = string("op_3586"), val = tensor([0, 2, 1])]; tensor squeeze_14_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302627328))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303217216))))[name = string("squeeze_14_quantized")]; string var_3595_pad_type_0 = const()[name = string("op_3595_pad_type_0"), val = string("valid")]; int32 var_3595_groups_0 = const()[name = string("op_3595_groups_0"), val = int32(1)]; tensor var_3595_strides_0 = const()[name = string("op_3595_strides_0"), val = tensor([1])]; tensor var_3595_pad_0 = const()[name = string("op_3595_pad_0"), val = tensor([0, 0])]; tensor var_3595_dilations_0 = const()[name = string("op_3595_dilations_0"), val = tensor([1])]; tensor var_3587 = transpose(perm = var_3586, x = attn_out_87)[name = string("transpose_84")]; tensor var_3595 = conv(dilations = var_3595_dilations_0, groups = var_3595_groups_0, pad = var_3595_pad_0, pad_type = var_3595_pad_type_0, strides = var_3595_strides_0, weight = squeeze_14_quantized, x = var_3587)[name = string("op_3595")]; tensor var_3596 = const()[name = string("op_3596"), val = tensor([0, 2, 1])]; fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_233 = transpose(perm = var_3596, x = var_3595)[name = string("transpose_83")]; tensor var_3600_cast_fp16 = mul(x = x_233, y = const_204_promoted_to_fp16)[name = string("op_3600_cast_fp16")]; bool input_291_interleave_0 = const()[name = string("input_291_interleave_0"), val = bool(false)]; tensor input_291_cast_fp16 = concat(axis = var_23, interleave = input_291_interleave_0, values = (x_233, var_3600_cast_fp16))[name = string("input_291_cast_fp16")]; tensor normed_407_axes_0 = const()[name = string("normed_407_axes_0"), val = tensor([-1])]; tensor normed_407_cast_fp16 = layer_norm(axes = normed_407_axes_0, epsilon = var_8_to_fp16, x = input_291_cast_fp16)[name = string("normed_407_cast_fp16")]; tensor var_3605_split_sizes_0 = const()[name = string("op_3605_split_sizes_0"), val = tensor([768, 768])]; int32 var_3605_axis_0 = const()[name = string("op_3605_axis_0"), val = int32(-1)]; tensor var_3605_cast_fp16_0, tensor var_3605_cast_fp16_1 = split(axis = var_3605_axis_0, split_sizes = var_3605_split_sizes_0, x = normed_407_cast_fp16)[name = string("op_3605_cast_fp16")]; tensor var_3609_to_fp16 = const()[name = string("op_3609_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303218816)))]; tensor out_175_cast_fp16 = mul(x = var_3605_cast_fp16_0, y = var_3609_to_fp16)[name = string("out_175_cast_fp16")]; tensor x_235_cast_fp16 = add(x = x_225_cast_fp16, y = out_175_cast_fp16)[name = string("x_235_cast_fp16")]; fp16 const_206_promoted_to_fp16 = const()[name = string("const_206_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3616_cast_fp16 = mul(x = x_235_cast_fp16, y = const_206_promoted_to_fp16)[name = string("op_3616_cast_fp16")]; bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; tensor input_293_cast_fp16 = concat(axis = var_23, interleave = input_293_interleave_0, values = (x_235_cast_fp16, var_3616_cast_fp16))[name = string("input_293_cast_fp16")]; tensor normed_411_axes_0 = const()[name = string("normed_411_axes_0"), val = tensor([-1])]; tensor normed_411_cast_fp16 = layer_norm(axes = normed_411_axes_0, epsilon = var_8_to_fp16, x = input_293_cast_fp16)[name = string("normed_411_cast_fp16")]; tensor var_3621_split_sizes_0 = const()[name = string("op_3621_split_sizes_0"), val = tensor([768, 768])]; int32 var_3621_axis_0 = const()[name = string("op_3621_axis_0"), val = int32(-1)]; tensor var_3621_cast_fp16_0, tensor var_3621_cast_fp16_1 = split(axis = var_3621_axis_0, split_sizes = var_3621_split_sizes_0, x = normed_411_cast_fp16)[name = string("op_3621_cast_fp16")]; tensor var_3625_to_fp16 = const()[name = string("op_3625_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303220416)))]; tensor out_177_cast_fp16 = mul(x = var_3621_cast_fp16_0, y = var_3625_to_fp16)[name = string("out_177_cast_fp16")]; tensor var_3632 = const()[name = string("op_3632"), val = tensor([0, 2, 1])]; tensor input_295_axes_0 = const()[name = string("input_295_axes_0"), val = tensor([2])]; tensor var_3633 = transpose(perm = var_3632, x = out_177_cast_fp16)[name = string("transpose_82")]; tensor input_295 = expand_dims(axes = input_295_axes_0, x = var_3633)[name = string("input_295")]; string gate_57_pad_type_0 = const()[name = string("gate_57_pad_type_0"), val = string("valid")]; tensor gate_57_strides_0 = const()[name = string("gate_57_strides_0"), val = tensor([1, 1])]; tensor gate_57_pad_0 = const()[name = string("gate_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_57_dilations_0 = const()[name = string("gate_57_dilations_0"), val = tensor([1, 1])]; int32 gate_57_groups_0 = const()[name = string("gate_57_groups_0"), val = int32(1)]; tensor gate_57 = conv(dilations = gate_57_dilations_0, groups = gate_57_groups_0, pad = gate_57_pad_0, pad_type = gate_57_pad_type_0, strides = gate_57_strides_0, weight = encoder_layers_14_mlp_gate_proj_weight_quantized, x = input_295)[name = string("gate_57")]; string up_29_pad_type_0 = const()[name = string("up_29_pad_type_0"), val = string("valid")]; tensor up_29_strides_0 = const()[name = string("up_29_strides_0"), val = tensor([1, 1])]; tensor up_29_pad_0 = const()[name = string("up_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_29_dilations_0 = const()[name = string("up_29_dilations_0"), val = tensor([1, 1])]; int32 up_29_groups_0 = const()[name = string("up_29_groups_0"), val = int32(1)]; tensor up_29 = conv(dilations = up_29_dilations_0, groups = up_29_groups_0, pad = up_29_pad_0, pad_type = up_29_pad_type_0, strides = up_29_strides_0, weight = encoder_layers_14_mlp_up_proj_weight_quantized, x = input_295)[name = string("up_29")]; string gate_59_mode_0 = const()[name = string("gate_59_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_59 = gelu(mode = gate_59_mode_0, x = gate_57)[name = string("gate_59")]; tensor input_297 = mul(x = gate_59, y = up_29)[name = string("input_297")]; string var_3654_pad_type_0 = const()[name = string("op_3654_pad_type_0"), val = string("valid")]; tensor var_3654_strides_0 = const()[name = string("op_3654_strides_0"), val = tensor([1, 1])]; tensor var_3654_pad_0 = const()[name = string("op_3654_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3654_dilations_0 = const()[name = string("op_3654_dilations_0"), val = tensor([1, 1])]; int32 var_3654_groups_0 = const()[name = string("op_3654_groups_0"), val = int32(1)]; tensor var_3654 = conv(dilations = var_3654_dilations_0, groups = var_3654_groups_0, pad = var_3654_pad_0, pad_type = var_3654_pad_type_0, strides = var_3654_strides_0, weight = encoder_layers_14_mlp_down_proj_weight_quantized, x = input_297)[name = string("op_3654")]; tensor var_3655_axes_0 = const()[name = string("op_3655_axes_0"), val = tensor([2])]; tensor var_3655 = squeeze(axes = var_3655_axes_0, x = var_3654)[name = string("op_3655")]; tensor var_3656 = const()[name = string("op_3656"), val = tensor([0, 2, 1])]; fp16 const_208_promoted_to_fp16 = const()[name = string("const_208_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_239 = transpose(perm = var_3656, x = var_3655)[name = string("transpose_81")]; tensor var_3660_cast_fp16 = mul(x = x_239, y = const_208_promoted_to_fp16)[name = string("op_3660_cast_fp16")]; bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; tensor input_299_cast_fp16 = concat(axis = var_23, interleave = input_299_interleave_0, values = (x_239, var_3660_cast_fp16))[name = string("input_299_cast_fp16")]; tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_8_to_fp16, x = input_299_cast_fp16)[name = string("normed_417_cast_fp16")]; tensor var_3665_split_sizes_0 = const()[name = string("op_3665_split_sizes_0"), val = tensor([768, 768])]; int32 var_3665_axis_0 = const()[name = string("op_3665_axis_0"), val = int32(-1)]; tensor var_3665_cast_fp16_0, tensor var_3665_cast_fp16_1 = split(axis = var_3665_axis_0, split_sizes = var_3665_split_sizes_0, x = normed_417_cast_fp16)[name = string("op_3665_cast_fp16")]; tensor var_3669_to_fp16 = const()[name = string("op_3669_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303222016)))]; tensor out_179_cast_fp16 = mul(x = var_3665_cast_fp16_0, y = var_3669_to_fp16)[name = string("out_179_cast_fp16")]; tensor x_241_cast_fp16 = add(x = x_235_cast_fp16, y = out_179_cast_fp16)[name = string("x_241_cast_fp16")]; fp16 const_210_promoted_to_fp16 = const()[name = string("const_210_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3698_cast_fp16 = mul(x = x_241_cast_fp16, y = const_210_promoted_to_fp16)[name = string("op_3698_cast_fp16")]; bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; tensor input_301_cast_fp16 = concat(axis = var_23, interleave = input_301_interleave_0, values = (x_241_cast_fp16, var_3698_cast_fp16))[name = string("input_301_cast_fp16")]; tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_8_to_fp16, x = input_301_cast_fp16)[name = string("normed_421_cast_fp16")]; tensor var_3703_split_sizes_0 = const()[name = string("op_3703_split_sizes_0"), val = tensor([768, 768])]; int32 var_3703_axis_0 = const()[name = string("op_3703_axis_0"), val = int32(-1)]; tensor var_3703_cast_fp16_0, tensor var_3703_cast_fp16_1 = split(axis = var_3703_axis_0, split_sizes = var_3703_split_sizes_0, x = normed_421_cast_fp16)[name = string("op_3703_cast_fp16")]; tensor var_3707_to_fp16 = const()[name = string("op_3707_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303223616)))]; tensor out_181_cast_fp16 = mul(x = var_3703_cast_fp16_0, y = var_3707_to_fp16)[name = string("out_181_cast_fp16")]; tensor var_3713 = const()[name = string("op_3713"), val = tensor([0, 2, 1])]; tensor var_3715_axes_0 = const()[name = string("op_3715_axes_0"), val = tensor([2])]; tensor var_3714_cast_fp16 = transpose(perm = var_3713, x = out_181_cast_fp16)[name = string("transpose_80")]; tensor var_3715_cast_fp16 = expand_dims(axes = var_3715_axes_0, x = var_3714_cast_fp16)[name = string("op_3715_cast_fp16")]; string var_3722_pad_type_0 = const()[name = string("op_3722_pad_type_0"), val = string("valid")]; tensor var_3722_strides_0 = const()[name = string("op_3722_strides_0"), val = tensor([1, 1])]; tensor var_3722_pad_0 = const()[name = string("op_3722_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3722_dilations_0 = const()[name = string("op_3722_dilations_0"), val = tensor([1, 1])]; int32 var_3722_groups_0 = const()[name = string("op_3722_groups_0"), val = int32(1)]; tensor var_3722 = conv(dilations = var_3722_dilations_0, groups = var_3722_groups_0, pad = var_3722_pad_0, pad_type = var_3722_pad_type_0, strides = var_3722_strides_0, weight = encoder_layers_15_self_attn_q_proj_weight_quantized, x = var_3715_cast_fp16)[name = string("op_3722")]; tensor var_3723 = const()[name = string("op_3723"), val = tensor([1, 3, 256, 128])]; tensor var_3724 = reshape(shape = var_3723, x = var_3722)[name = string("op_3724")]; tensor var_3725 = const()[name = string("op_3725"), val = tensor([0, 1, 3, 2])]; string var_3732_pad_type_0 = const()[name = string("op_3732_pad_type_0"), val = string("valid")]; tensor var_3732_strides_0 = const()[name = string("op_3732_strides_0"), val = tensor([1, 1])]; tensor var_3732_pad_0 = const()[name = string("op_3732_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3732_dilations_0 = const()[name = string("op_3732_dilations_0"), val = tensor([1, 1])]; int32 var_3732_groups_0 = const()[name = string("op_3732_groups_0"), val = int32(1)]; tensor var_3732 = conv(dilations = var_3732_dilations_0, groups = var_3732_groups_0, pad = var_3732_pad_0, pad_type = var_3732_pad_type_0, strides = var_3732_strides_0, weight = encoder_layers_15_self_attn_k_proj_weight_quantized, x = var_3715_cast_fp16)[name = string("op_3732")]; tensor var_3733 = const()[name = string("op_3733"), val = tensor([1, 1, 256, 128])]; tensor var_3734 = reshape(shape = var_3733, x = var_3732)[name = string("op_3734")]; tensor var_3735 = const()[name = string("op_3735"), val = tensor([0, 1, 3, 2])]; string var_3742_pad_type_0 = const()[name = string("op_3742_pad_type_0"), val = string("valid")]; tensor var_3742_strides_0 = const()[name = string("op_3742_strides_0"), val = tensor([1, 1])]; tensor var_3742_pad_0 = const()[name = string("op_3742_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3742_dilations_0 = const()[name = string("op_3742_dilations_0"), val = tensor([1, 1])]; int32 var_3742_groups_0 = const()[name = string("op_3742_groups_0"), val = int32(1)]; tensor var_3742 = conv(dilations = var_3742_dilations_0, groups = var_3742_groups_0, pad = var_3742_pad_0, pad_type = var_3742_pad_type_0, strides = var_3742_strides_0, weight = encoder_layers_15_self_attn_v_proj_weight_quantized, x = var_3715_cast_fp16)[name = string("op_3742")]; tensor var_3743 = const()[name = string("op_3743"), val = tensor([1, 1, 256, 128])]; tensor var_3744 = reshape(shape = var_3743, x = var_3742)[name = string("op_3744")]; tensor var_3745 = const()[name = string("op_3745"), val = tensor([0, 1, 3, 2])]; fp16 const_212_promoted_to_fp16 = const()[name = string("const_212_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_91 = transpose(perm = var_3725, x = var_3724)[name = string("transpose_79")]; tensor var_3751_cast_fp16 = mul(x = q_91, y = const_212_promoted_to_fp16)[name = string("op_3751_cast_fp16")]; bool input_305_interleave_0 = const()[name = string("input_305_interleave_0"), val = bool(false)]; tensor input_305_cast_fp16 = concat(axis = var_23, interleave = input_305_interleave_0, values = (q_91, var_3751_cast_fp16))[name = string("input_305_cast_fp16")]; tensor normed_427_axes_0 = const()[name = string("normed_427_axes_0"), val = tensor([-1])]; tensor normed_427_cast_fp16 = layer_norm(axes = normed_427_axes_0, epsilon = var_8_to_fp16, x = input_305_cast_fp16)[name = string("normed_427_cast_fp16")]; tensor var_3756_split_sizes_0 = const()[name = string("op_3756_split_sizes_0"), val = tensor([256, 256])]; int32 var_3756_axis_0 = const()[name = string("op_3756_axis_0"), val = int32(-1)]; tensor var_3756_cast_fp16_0, tensor var_3756_cast_fp16_1 = split(axis = var_3756_axis_0, split_sizes = var_3756_split_sizes_0, x = normed_427_cast_fp16)[name = string("op_3756_cast_fp16")]; tensor var_3760_to_fp16 = const()[name = string("op_3760_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303225216)))]; tensor out_183_cast_fp16 = mul(x = var_3756_cast_fp16_0, y = var_3760_to_fp16)[name = string("out_183_cast_fp16")]; fp16 const_214_promoted_to_fp16 = const()[name = string("const_214_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_91 = transpose(perm = var_3735, x = var_3734)[name = string("transpose_78")]; tensor var_3767_cast_fp16 = mul(x = k_91, y = const_214_promoted_to_fp16)[name = string("op_3767_cast_fp16")]; bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; tensor input_307_cast_fp16 = concat(axis = var_23, interleave = input_307_interleave_0, values = (k_91, var_3767_cast_fp16))[name = string("input_307_cast_fp16")]; tensor normed_431_axes_0 = const()[name = string("normed_431_axes_0"), val = tensor([-1])]; tensor normed_431_cast_fp16 = layer_norm(axes = normed_431_axes_0, epsilon = var_8_to_fp16, x = input_307_cast_fp16)[name = string("normed_431_cast_fp16")]; tensor var_3772_split_sizes_0 = const()[name = string("op_3772_split_sizes_0"), val = tensor([256, 256])]; int32 var_3772_axis_0 = const()[name = string("op_3772_axis_0"), val = int32(-1)]; tensor var_3772_cast_fp16_0, tensor var_3772_cast_fp16_1 = split(axis = var_3772_axis_0, split_sizes = var_3772_split_sizes_0, x = normed_431_cast_fp16)[name = string("op_3772_cast_fp16")]; tensor var_3776_to_fp16 = const()[name = string("op_3776_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303225792)))]; tensor out_185_cast_fp16 = mul(x = var_3772_cast_fp16_0, y = var_3776_to_fp16)[name = string("out_185_cast_fp16")]; tensor var_3779 = mul(x = out_183_cast_fp16, y = cos_1_quantized)[name = string("op_3779")]; tensor var_3780_split_sizes_0 = const()[name = string("op_3780_split_sizes_0"), val = tensor([128, 128])]; int32 var_3780_axis_0 = const()[name = string("op_3780_axis_0"), val = int32(-1)]; tensor var_3780_0, tensor var_3780_1 = split(axis = var_3780_axis_0, split_sizes = var_3780_split_sizes_0, x = out_183_cast_fp16)[name = string("op_3780")]; fp16 const_216_promoted = const()[name = string("const_216_promoted"), val = fp16(-0x1p+0)]; tensor var_3782 = mul(x = var_3780_1, y = const_216_promoted)[name = string("op_3782")]; bool var_3784_interleave_0 = const()[name = string("op_3784_interleave_0"), val = bool(false)]; tensor var_3784 = concat(axis = var_23, interleave = var_3784_interleave_0, values = (var_3782, var_3780_0))[name = string("op_3784")]; tensor var_3785 = mul(x = var_3784, y = sin_1_quantized)[name = string("op_3785")]; tensor q_95 = add(x = var_3779, y = var_3785)[name = string("q_95")]; tensor var_3787 = mul(x = out_185_cast_fp16, y = cos_1_quantized)[name = string("op_3787")]; tensor var_3788_split_sizes_0 = const()[name = string("op_3788_split_sizes_0"), val = tensor([128, 128])]; int32 var_3788_axis_0 = const()[name = string("op_3788_axis_0"), val = int32(-1)]; tensor var_3788_0, tensor var_3788_1 = split(axis = var_3788_axis_0, split_sizes = var_3788_split_sizes_0, x = out_185_cast_fp16)[name = string("op_3788")]; fp16 const_217_promoted = const()[name = string("const_217_promoted"), val = fp16(-0x1p+0)]; tensor var_3790 = mul(x = var_3788_1, y = const_217_promoted)[name = string("op_3790")]; bool var_3792_interleave_0 = const()[name = string("op_3792_interleave_0"), val = bool(false)]; tensor var_3792 = concat(axis = var_23, interleave = var_3792_interleave_0, values = (var_3790, var_3788_0))[name = string("op_3792")]; tensor var_3793 = mul(x = var_3792, y = sin_1_quantized)[name = string("op_3793")]; tensor hidden_states_181 = add(x = var_3787, y = var_3793)[name = string("hidden_states_181")]; tensor hidden_states_183_axes_0 = const()[name = string("hidden_states_183_axes_0"), val = tensor([2])]; tensor hidden_states_183 = expand_dims(axes = hidden_states_183_axes_0, x = hidden_states_181)[name = string("hidden_states_183")]; tensor var_3796 = const()[name = string("op_3796"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_185 = tile(reps = var_3796, x = hidden_states_183)[name = string("hidden_states_185")]; tensor var_3798 = const()[name = string("op_3798"), val = tensor([1, 3, 128, 256])]; tensor k_95 = reshape(shape = var_3798, x = hidden_states_185)[name = string("k_95")]; tensor hidden_states_189_axes_0 = const()[name = string("hidden_states_189_axes_0"), val = tensor([2])]; tensor hidden_states_187 = transpose(perm = var_3745, x = var_3744)[name = string("transpose_77")]; tensor hidden_states_189 = expand_dims(axes = hidden_states_189_axes_0, x = hidden_states_187)[name = string("hidden_states_189")]; tensor var_3801 = const()[name = string("op_3801"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_191 = tile(reps = var_3801, x = hidden_states_189)[name = string("hidden_states_191")]; tensor var_3803 = const()[name = string("op_3803"), val = tensor([1, 3, 128, 256])]; tensor v_31 = reshape(shape = var_3803, x = hidden_states_191)[name = string("v_31")]; bool var_3808_transpose_x_1 = const()[name = string("op_3808_transpose_x_1"), val = bool(false)]; bool var_3808_transpose_y_1 = const()[name = string("op_3808_transpose_y_1"), val = bool(true)]; tensor var_3808_cast_fp16 = matmul(transpose_x = var_3808_transpose_x_1, transpose_y = var_3808_transpose_y_1, x = q_95, y = k_95)[name = string("op_3808_cast_fp16")]; fp16 var_3809_to_fp16 = const()[name = string("op_3809_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_91_cast_fp16 = mul(x = var_3808_cast_fp16, y = var_3809_to_fp16)[name = string("attn_weights_91_cast_fp16")]; tensor attn_weights_93_cast_fp16 = add(x = attn_weights_91_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_93_cast_fp16")]; tensor var_3813_cast_fp16 = softmax(axis = var_23, x = attn_weights_93_cast_fp16)[name = string("op_3813_cast_fp16")]; bool var_3817_transpose_x_0 = const()[name = string("op_3817_transpose_x_0"), val = bool(false)]; bool var_3817_transpose_y_0 = const()[name = string("op_3817_transpose_y_0"), val = bool(false)]; tensor var_3817_cast_fp16 = matmul(transpose_x = var_3817_transpose_x_0, transpose_y = var_3817_transpose_y_0, x = var_3813_cast_fp16, y = v_31)[name = string("op_3817_cast_fp16")]; tensor var_3819 = const()[name = string("op_3819"), val = tensor([0, 2, 1, 3])]; tensor var_3822 = const()[name = string("op_3822"), val = tensor([1, 128, 768])]; tensor var_3820 = transpose(perm = var_3819, x = var_3817_cast_fp16)[name = string("transpose_76")]; tensor attn_out_93 = reshape(shape = var_3822, x = var_3820)[name = string("attn_out_93")]; tensor var_3824 = const()[name = string("op_3824"), val = tensor([0, 2, 1])]; tensor squeeze_15_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303226368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303816256))))[name = string("squeeze_15_quantized")]; string var_3833_pad_type_0 = const()[name = string("op_3833_pad_type_0"), val = string("valid")]; int32 var_3833_groups_0 = const()[name = string("op_3833_groups_0"), val = int32(1)]; tensor var_3833_strides_0 = const()[name = string("op_3833_strides_0"), val = tensor([1])]; tensor var_3833_pad_0 = const()[name = string("op_3833_pad_0"), val = tensor([0, 0])]; tensor var_3833_dilations_0 = const()[name = string("op_3833_dilations_0"), val = tensor([1])]; tensor var_3825 = transpose(perm = var_3824, x = attn_out_93)[name = string("transpose_75")]; tensor var_3833 = conv(dilations = var_3833_dilations_0, groups = var_3833_groups_0, pad = var_3833_pad_0, pad_type = var_3833_pad_type_0, strides = var_3833_strides_0, weight = squeeze_15_quantized, x = var_3825)[name = string("op_3833")]; tensor var_3834 = const()[name = string("op_3834"), val = tensor([0, 2, 1])]; fp16 const_218_promoted_to_fp16 = const()[name = string("const_218_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_249 = transpose(perm = var_3834, x = var_3833)[name = string("transpose_74")]; tensor var_3838_cast_fp16 = mul(x = x_249, y = const_218_promoted_to_fp16)[name = string("op_3838_cast_fp16")]; bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; tensor input_311_cast_fp16 = concat(axis = var_23, interleave = input_311_interleave_0, values = (x_249, var_3838_cast_fp16))[name = string("input_311_cast_fp16")]; tensor normed_435_axes_0 = const()[name = string("normed_435_axes_0"), val = tensor([-1])]; tensor normed_435_cast_fp16 = layer_norm(axes = normed_435_axes_0, epsilon = var_8_to_fp16, x = input_311_cast_fp16)[name = string("normed_435_cast_fp16")]; tensor var_3843_split_sizes_0 = const()[name = string("op_3843_split_sizes_0"), val = tensor([768, 768])]; int32 var_3843_axis_0 = const()[name = string("op_3843_axis_0"), val = int32(-1)]; tensor var_3843_cast_fp16_0, tensor var_3843_cast_fp16_1 = split(axis = var_3843_axis_0, split_sizes = var_3843_split_sizes_0, x = normed_435_cast_fp16)[name = string("op_3843_cast_fp16")]; tensor var_3847_to_fp16 = const()[name = string("op_3847_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303817856)))]; tensor out_187_cast_fp16 = mul(x = var_3843_cast_fp16_0, y = var_3847_to_fp16)[name = string("out_187_cast_fp16")]; tensor x_251_cast_fp16 = add(x = x_241_cast_fp16, y = out_187_cast_fp16)[name = string("x_251_cast_fp16")]; fp16 const_220_promoted_to_fp16 = const()[name = string("const_220_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3854_cast_fp16 = mul(x = x_251_cast_fp16, y = const_220_promoted_to_fp16)[name = string("op_3854_cast_fp16")]; bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; tensor input_313_cast_fp16 = concat(axis = var_23, interleave = input_313_interleave_0, values = (x_251_cast_fp16, var_3854_cast_fp16))[name = string("input_313_cast_fp16")]; tensor normed_439_axes_0 = const()[name = string("normed_439_axes_0"), val = tensor([-1])]; tensor normed_439_cast_fp16 = layer_norm(axes = normed_439_axes_0, epsilon = var_8_to_fp16, x = input_313_cast_fp16)[name = string("normed_439_cast_fp16")]; tensor var_3859_split_sizes_0 = const()[name = string("op_3859_split_sizes_0"), val = tensor([768, 768])]; int32 var_3859_axis_0 = const()[name = string("op_3859_axis_0"), val = int32(-1)]; tensor var_3859_cast_fp16_0, tensor var_3859_cast_fp16_1 = split(axis = var_3859_axis_0, split_sizes = var_3859_split_sizes_0, x = normed_439_cast_fp16)[name = string("op_3859_cast_fp16")]; tensor var_3863_to_fp16 = const()[name = string("op_3863_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303819456)))]; tensor out_189_cast_fp16 = mul(x = var_3859_cast_fp16_0, y = var_3863_to_fp16)[name = string("out_189_cast_fp16")]; tensor var_3870 = const()[name = string("op_3870"), val = tensor([0, 2, 1])]; tensor input_315_axes_0 = const()[name = string("input_315_axes_0"), val = tensor([2])]; tensor var_3871 = transpose(perm = var_3870, x = out_189_cast_fp16)[name = string("transpose_73")]; tensor input_315 = expand_dims(axes = input_315_axes_0, x = var_3871)[name = string("input_315")]; string gate_61_pad_type_0 = const()[name = string("gate_61_pad_type_0"), val = string("valid")]; tensor gate_61_strides_0 = const()[name = string("gate_61_strides_0"), val = tensor([1, 1])]; tensor gate_61_pad_0 = const()[name = string("gate_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_61_dilations_0 = const()[name = string("gate_61_dilations_0"), val = tensor([1, 1])]; int32 gate_61_groups_0 = const()[name = string("gate_61_groups_0"), val = int32(1)]; tensor gate_61 = conv(dilations = gate_61_dilations_0, groups = gate_61_groups_0, pad = gate_61_pad_0, pad_type = gate_61_pad_type_0, strides = gate_61_strides_0, weight = encoder_layers_15_mlp_gate_proj_weight_quantized, x = input_315)[name = string("gate_61")]; string up_31_pad_type_0 = const()[name = string("up_31_pad_type_0"), val = string("valid")]; tensor up_31_strides_0 = const()[name = string("up_31_strides_0"), val = tensor([1, 1])]; tensor up_31_pad_0 = const()[name = string("up_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_31_dilations_0 = const()[name = string("up_31_dilations_0"), val = tensor([1, 1])]; int32 up_31_groups_0 = const()[name = string("up_31_groups_0"), val = int32(1)]; tensor up_31 = conv(dilations = up_31_dilations_0, groups = up_31_groups_0, pad = up_31_pad_0, pad_type = up_31_pad_type_0, strides = up_31_strides_0, weight = encoder_layers_15_mlp_up_proj_weight_quantized, x = input_315)[name = string("up_31")]; string gate_63_mode_0 = const()[name = string("gate_63_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_63 = gelu(mode = gate_63_mode_0, x = gate_61)[name = string("gate_63")]; tensor input_317 = mul(x = gate_63, y = up_31)[name = string("input_317")]; string var_3892_pad_type_0 = const()[name = string("op_3892_pad_type_0"), val = string("valid")]; tensor var_3892_strides_0 = const()[name = string("op_3892_strides_0"), val = tensor([1, 1])]; tensor var_3892_pad_0 = const()[name = string("op_3892_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3892_dilations_0 = const()[name = string("op_3892_dilations_0"), val = tensor([1, 1])]; int32 var_3892_groups_0 = const()[name = string("op_3892_groups_0"), val = int32(1)]; tensor var_3892 = conv(dilations = var_3892_dilations_0, groups = var_3892_groups_0, pad = var_3892_pad_0, pad_type = var_3892_pad_type_0, strides = var_3892_strides_0, weight = encoder_layers_15_mlp_down_proj_weight_quantized, x = input_317)[name = string("op_3892")]; tensor var_3893_axes_0 = const()[name = string("op_3893_axes_0"), val = tensor([2])]; tensor var_3893 = squeeze(axes = var_3893_axes_0, x = var_3892)[name = string("op_3893")]; tensor var_3894 = const()[name = string("op_3894"), val = tensor([0, 2, 1])]; fp16 const_222_promoted_to_fp16 = const()[name = string("const_222_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_255 = transpose(perm = var_3894, x = var_3893)[name = string("transpose_72")]; tensor var_3898_cast_fp16 = mul(x = x_255, y = const_222_promoted_to_fp16)[name = string("op_3898_cast_fp16")]; bool input_319_interleave_0 = const()[name = string("input_319_interleave_0"), val = bool(false)]; tensor input_319_cast_fp16 = concat(axis = var_23, interleave = input_319_interleave_0, values = (x_255, var_3898_cast_fp16))[name = string("input_319_cast_fp16")]; tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_8_to_fp16, x = input_319_cast_fp16)[name = string("normed_445_cast_fp16")]; tensor var_3903_split_sizes_0 = const()[name = string("op_3903_split_sizes_0"), val = tensor([768, 768])]; int32 var_3903_axis_0 = const()[name = string("op_3903_axis_0"), val = int32(-1)]; tensor var_3903_cast_fp16_0, tensor var_3903_cast_fp16_1 = split(axis = var_3903_axis_0, split_sizes = var_3903_split_sizes_0, x = normed_445_cast_fp16)[name = string("op_3903_cast_fp16")]; tensor var_3907_to_fp16 = const()[name = string("op_3907_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303821056)))]; tensor out_191_cast_fp16 = mul(x = var_3903_cast_fp16_0, y = var_3907_to_fp16)[name = string("out_191_cast_fp16")]; tensor x_257_cast_fp16 = add(x = x_251_cast_fp16, y = out_191_cast_fp16)[name = string("x_257_cast_fp16")]; fp16 const_224_promoted_to_fp16 = const()[name = string("const_224_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3936_cast_fp16 = mul(x = x_257_cast_fp16, y = const_224_promoted_to_fp16)[name = string("op_3936_cast_fp16")]; bool input_321_interleave_0 = const()[name = string("input_321_interleave_0"), val = bool(false)]; tensor input_321_cast_fp16 = concat(axis = var_23, interleave = input_321_interleave_0, values = (x_257_cast_fp16, var_3936_cast_fp16))[name = string("input_321_cast_fp16")]; tensor normed_449_axes_0 = const()[name = string("normed_449_axes_0"), val = tensor([-1])]; tensor normed_449_cast_fp16 = layer_norm(axes = normed_449_axes_0, epsilon = var_8_to_fp16, x = input_321_cast_fp16)[name = string("normed_449_cast_fp16")]; tensor var_3941_split_sizes_0 = const()[name = string("op_3941_split_sizes_0"), val = tensor([768, 768])]; int32 var_3941_axis_0 = const()[name = string("op_3941_axis_0"), val = int32(-1)]; tensor var_3941_cast_fp16_0, tensor var_3941_cast_fp16_1 = split(axis = var_3941_axis_0, split_sizes = var_3941_split_sizes_0, x = normed_449_cast_fp16)[name = string("op_3941_cast_fp16")]; tensor var_3945_to_fp16 = const()[name = string("op_3945_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303822656)))]; tensor out_193_cast_fp16 = mul(x = var_3941_cast_fp16_0, y = var_3945_to_fp16)[name = string("out_193_cast_fp16")]; tensor var_3951 = const()[name = string("op_3951"), val = tensor([0, 2, 1])]; tensor var_3953_axes_0 = const()[name = string("op_3953_axes_0"), val = tensor([2])]; tensor var_3952_cast_fp16 = transpose(perm = var_3951, x = out_193_cast_fp16)[name = string("transpose_71")]; tensor var_3953_cast_fp16 = expand_dims(axes = var_3953_axes_0, x = var_3952_cast_fp16)[name = string("op_3953_cast_fp16")]; string var_3960_pad_type_0 = const()[name = string("op_3960_pad_type_0"), val = string("valid")]; tensor var_3960_strides_0 = const()[name = string("op_3960_strides_0"), val = tensor([1, 1])]; tensor var_3960_pad_0 = const()[name = string("op_3960_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3960_dilations_0 = const()[name = string("op_3960_dilations_0"), val = tensor([1, 1])]; int32 var_3960_groups_0 = const()[name = string("op_3960_groups_0"), val = int32(1)]; tensor var_3960 = conv(dilations = var_3960_dilations_0, groups = var_3960_groups_0, pad = var_3960_pad_0, pad_type = var_3960_pad_type_0, strides = var_3960_strides_0, weight = encoder_layers_16_self_attn_q_proj_weight_quantized, x = var_3953_cast_fp16)[name = string("op_3960")]; tensor var_3961 = const()[name = string("op_3961"), val = tensor([1, 3, 256, 128])]; tensor var_3962 = reshape(shape = var_3961, x = var_3960)[name = string("op_3962")]; tensor var_3963 = const()[name = string("op_3963"), val = tensor([0, 1, 3, 2])]; string var_3970_pad_type_0 = const()[name = string("op_3970_pad_type_0"), val = string("valid")]; tensor var_3970_strides_0 = const()[name = string("op_3970_strides_0"), val = tensor([1, 1])]; tensor var_3970_pad_0 = const()[name = string("op_3970_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3970_dilations_0 = const()[name = string("op_3970_dilations_0"), val = tensor([1, 1])]; int32 var_3970_groups_0 = const()[name = string("op_3970_groups_0"), val = int32(1)]; tensor var_3970 = conv(dilations = var_3970_dilations_0, groups = var_3970_groups_0, pad = var_3970_pad_0, pad_type = var_3970_pad_type_0, strides = var_3970_strides_0, weight = encoder_layers_16_self_attn_k_proj_weight_quantized, x = var_3953_cast_fp16)[name = string("op_3970")]; tensor var_3971 = const()[name = string("op_3971"), val = tensor([1, 1, 256, 128])]; tensor var_3972 = reshape(shape = var_3971, x = var_3970)[name = string("op_3972")]; tensor var_3973 = const()[name = string("op_3973"), val = tensor([0, 1, 3, 2])]; string var_3980_pad_type_0 = const()[name = string("op_3980_pad_type_0"), val = string("valid")]; tensor var_3980_strides_0 = const()[name = string("op_3980_strides_0"), val = tensor([1, 1])]; tensor var_3980_pad_0 = const()[name = string("op_3980_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3980_dilations_0 = const()[name = string("op_3980_dilations_0"), val = tensor([1, 1])]; int32 var_3980_groups_0 = const()[name = string("op_3980_groups_0"), val = int32(1)]; tensor var_3980 = conv(dilations = var_3980_dilations_0, groups = var_3980_groups_0, pad = var_3980_pad_0, pad_type = var_3980_pad_type_0, strides = var_3980_strides_0, weight = encoder_layers_16_self_attn_v_proj_weight_quantized, x = var_3953_cast_fp16)[name = string("op_3980")]; tensor var_3981 = const()[name = string("op_3981"), val = tensor([1, 1, 256, 128])]; tensor var_3982 = reshape(shape = var_3981, x = var_3980)[name = string("op_3982")]; tensor var_3983 = const()[name = string("op_3983"), val = tensor([0, 1, 3, 2])]; fp16 const_226_promoted_to_fp16 = const()[name = string("const_226_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_97 = transpose(perm = var_3963, x = var_3962)[name = string("transpose_70")]; tensor var_3989_cast_fp16 = mul(x = q_97, y = const_226_promoted_to_fp16)[name = string("op_3989_cast_fp16")]; bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; tensor input_325_cast_fp16 = concat(axis = var_23, interleave = input_325_interleave_0, values = (q_97, var_3989_cast_fp16))[name = string("input_325_cast_fp16")]; tensor normed_455_axes_0 = const()[name = string("normed_455_axes_0"), val = tensor([-1])]; tensor normed_455_cast_fp16 = layer_norm(axes = normed_455_axes_0, epsilon = var_8_to_fp16, x = input_325_cast_fp16)[name = string("normed_455_cast_fp16")]; tensor var_3994_split_sizes_0 = const()[name = string("op_3994_split_sizes_0"), val = tensor([256, 256])]; int32 var_3994_axis_0 = const()[name = string("op_3994_axis_0"), val = int32(-1)]; tensor var_3994_cast_fp16_0, tensor var_3994_cast_fp16_1 = split(axis = var_3994_axis_0, split_sizes = var_3994_split_sizes_0, x = normed_455_cast_fp16)[name = string("op_3994_cast_fp16")]; tensor var_3998_to_fp16 = const()[name = string("op_3998_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303824256)))]; tensor out_195_cast_fp16 = mul(x = var_3994_cast_fp16_0, y = var_3998_to_fp16)[name = string("out_195_cast_fp16")]; fp16 const_228_promoted_to_fp16 = const()[name = string("const_228_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_97 = transpose(perm = var_3973, x = var_3972)[name = string("transpose_69")]; tensor var_4005_cast_fp16 = mul(x = k_97, y = const_228_promoted_to_fp16)[name = string("op_4005_cast_fp16")]; bool input_327_interleave_0 = const()[name = string("input_327_interleave_0"), val = bool(false)]; tensor input_327_cast_fp16 = concat(axis = var_23, interleave = input_327_interleave_0, values = (k_97, var_4005_cast_fp16))[name = string("input_327_cast_fp16")]; tensor normed_459_axes_0 = const()[name = string("normed_459_axes_0"), val = tensor([-1])]; tensor normed_459_cast_fp16 = layer_norm(axes = normed_459_axes_0, epsilon = var_8_to_fp16, x = input_327_cast_fp16)[name = string("normed_459_cast_fp16")]; tensor var_4010_split_sizes_0 = const()[name = string("op_4010_split_sizes_0"), val = tensor([256, 256])]; int32 var_4010_axis_0 = const()[name = string("op_4010_axis_0"), val = int32(-1)]; tensor var_4010_cast_fp16_0, tensor var_4010_cast_fp16_1 = split(axis = var_4010_axis_0, split_sizes = var_4010_split_sizes_0, x = normed_459_cast_fp16)[name = string("op_4010_cast_fp16")]; tensor var_4014_to_fp16 = const()[name = string("op_4014_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303824832)))]; tensor out_197_cast_fp16 = mul(x = var_4010_cast_fp16_0, y = var_4014_to_fp16)[name = string("out_197_cast_fp16")]; tensor var_4017 = mul(x = out_195_cast_fp16, y = cos_1_quantized)[name = string("op_4017")]; tensor var_4018_split_sizes_0 = const()[name = string("op_4018_split_sizes_0"), val = tensor([128, 128])]; int32 var_4018_axis_0 = const()[name = string("op_4018_axis_0"), val = int32(-1)]; tensor var_4018_0, tensor var_4018_1 = split(axis = var_4018_axis_0, split_sizes = var_4018_split_sizes_0, x = out_195_cast_fp16)[name = string("op_4018")]; fp16 const_230_promoted = const()[name = string("const_230_promoted"), val = fp16(-0x1p+0)]; tensor var_4020 = mul(x = var_4018_1, y = const_230_promoted)[name = string("op_4020")]; bool var_4022_interleave_0 = const()[name = string("op_4022_interleave_0"), val = bool(false)]; tensor var_4022 = concat(axis = var_23, interleave = var_4022_interleave_0, values = (var_4020, var_4018_0))[name = string("op_4022")]; tensor var_4023 = mul(x = var_4022, y = sin_1_quantized)[name = string("op_4023")]; tensor q_101 = add(x = var_4017, y = var_4023)[name = string("q_101")]; tensor var_4025 = mul(x = out_197_cast_fp16, y = cos_1_quantized)[name = string("op_4025")]; tensor var_4026_split_sizes_0 = const()[name = string("op_4026_split_sizes_0"), val = tensor([128, 128])]; int32 var_4026_axis_0 = const()[name = string("op_4026_axis_0"), val = int32(-1)]; tensor var_4026_0, tensor var_4026_1 = split(axis = var_4026_axis_0, split_sizes = var_4026_split_sizes_0, x = out_197_cast_fp16)[name = string("op_4026")]; fp16 const_231_promoted = const()[name = string("const_231_promoted"), val = fp16(-0x1p+0)]; tensor var_4028 = mul(x = var_4026_1, y = const_231_promoted)[name = string("op_4028")]; bool var_4030_interleave_0 = const()[name = string("op_4030_interleave_0"), val = bool(false)]; tensor var_4030 = concat(axis = var_23, interleave = var_4030_interleave_0, values = (var_4028, var_4026_0))[name = string("op_4030")]; tensor var_4031 = mul(x = var_4030, y = sin_1_quantized)[name = string("op_4031")]; tensor hidden_states_193 = add(x = var_4025, y = var_4031)[name = string("hidden_states_193")]; tensor hidden_states_195_axes_0 = const()[name = string("hidden_states_195_axes_0"), val = tensor([2])]; tensor hidden_states_195 = expand_dims(axes = hidden_states_195_axes_0, x = hidden_states_193)[name = string("hidden_states_195")]; tensor var_4034 = const()[name = string("op_4034"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_197 = tile(reps = var_4034, x = hidden_states_195)[name = string("hidden_states_197")]; tensor var_4036 = const()[name = string("op_4036"), val = tensor([1, 3, 128, 256])]; tensor k_101 = reshape(shape = var_4036, x = hidden_states_197)[name = string("k_101")]; tensor hidden_states_201_axes_0 = const()[name = string("hidden_states_201_axes_0"), val = tensor([2])]; tensor hidden_states_199 = transpose(perm = var_3983, x = var_3982)[name = string("transpose_68")]; tensor hidden_states_201 = expand_dims(axes = hidden_states_201_axes_0, x = hidden_states_199)[name = string("hidden_states_201")]; tensor var_4039 = const()[name = string("op_4039"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_203 = tile(reps = var_4039, x = hidden_states_201)[name = string("hidden_states_203")]; tensor var_4041 = const()[name = string("op_4041"), val = tensor([1, 3, 128, 256])]; tensor v_33 = reshape(shape = var_4041, x = hidden_states_203)[name = string("v_33")]; bool var_4046_transpose_x_1 = const()[name = string("op_4046_transpose_x_1"), val = bool(false)]; bool var_4046_transpose_y_1 = const()[name = string("op_4046_transpose_y_1"), val = bool(true)]; tensor var_4046_cast_fp16 = matmul(transpose_x = var_4046_transpose_x_1, transpose_y = var_4046_transpose_y_1, x = q_101, y = k_101)[name = string("op_4046_cast_fp16")]; fp16 var_4047_to_fp16 = const()[name = string("op_4047_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_97_cast_fp16 = mul(x = var_4046_cast_fp16, y = var_4047_to_fp16)[name = string("attn_weights_97_cast_fp16")]; tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_99_cast_fp16")]; tensor var_4051_cast_fp16 = softmax(axis = var_23, x = attn_weights_99_cast_fp16)[name = string("op_4051_cast_fp16")]; bool var_4055_transpose_x_0 = const()[name = string("op_4055_transpose_x_0"), val = bool(false)]; bool var_4055_transpose_y_0 = const()[name = string("op_4055_transpose_y_0"), val = bool(false)]; tensor var_4055_cast_fp16 = matmul(transpose_x = var_4055_transpose_x_0, transpose_y = var_4055_transpose_y_0, x = var_4051_cast_fp16, y = v_33)[name = string("op_4055_cast_fp16")]; tensor var_4057 = const()[name = string("op_4057"), val = tensor([0, 2, 1, 3])]; tensor var_4060 = const()[name = string("op_4060"), val = tensor([1, 128, 768])]; tensor var_4058 = transpose(perm = var_4057, x = var_4055_cast_fp16)[name = string("transpose_67")]; tensor attn_out_99 = reshape(shape = var_4060, x = var_4058)[name = string("attn_out_99")]; tensor var_4062 = const()[name = string("op_4062"), val = tensor([0, 2, 1])]; tensor squeeze_16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303825408))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304415296))))[name = string("squeeze_16_quantized")]; string var_4071_pad_type_0 = const()[name = string("op_4071_pad_type_0"), val = string("valid")]; int32 var_4071_groups_0 = const()[name = string("op_4071_groups_0"), val = int32(1)]; tensor var_4071_strides_0 = const()[name = string("op_4071_strides_0"), val = tensor([1])]; tensor var_4071_pad_0 = const()[name = string("op_4071_pad_0"), val = tensor([0, 0])]; tensor var_4071_dilations_0 = const()[name = string("op_4071_dilations_0"), val = tensor([1])]; tensor var_4063 = transpose(perm = var_4062, x = attn_out_99)[name = string("transpose_66")]; tensor var_4071 = conv(dilations = var_4071_dilations_0, groups = var_4071_groups_0, pad = var_4071_pad_0, pad_type = var_4071_pad_type_0, strides = var_4071_strides_0, weight = squeeze_16_quantized, x = var_4063)[name = string("op_4071")]; tensor var_4072 = const()[name = string("op_4072"), val = tensor([0, 2, 1])]; fp16 const_232_promoted_to_fp16 = const()[name = string("const_232_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_265 = transpose(perm = var_4072, x = var_4071)[name = string("transpose_65")]; tensor var_4076_cast_fp16 = mul(x = x_265, y = const_232_promoted_to_fp16)[name = string("op_4076_cast_fp16")]; bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; tensor input_331_cast_fp16 = concat(axis = var_23, interleave = input_331_interleave_0, values = (x_265, var_4076_cast_fp16))[name = string("input_331_cast_fp16")]; tensor normed_463_axes_0 = const()[name = string("normed_463_axes_0"), val = tensor([-1])]; tensor normed_463_cast_fp16 = layer_norm(axes = normed_463_axes_0, epsilon = var_8_to_fp16, x = input_331_cast_fp16)[name = string("normed_463_cast_fp16")]; tensor var_4081_split_sizes_0 = const()[name = string("op_4081_split_sizes_0"), val = tensor([768, 768])]; int32 var_4081_axis_0 = const()[name = string("op_4081_axis_0"), val = int32(-1)]; tensor var_4081_cast_fp16_0, tensor var_4081_cast_fp16_1 = split(axis = var_4081_axis_0, split_sizes = var_4081_split_sizes_0, x = normed_463_cast_fp16)[name = string("op_4081_cast_fp16")]; tensor var_4085_to_fp16 = const()[name = string("op_4085_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304416896)))]; tensor out_199_cast_fp16 = mul(x = var_4081_cast_fp16_0, y = var_4085_to_fp16)[name = string("out_199_cast_fp16")]; tensor x_267_cast_fp16 = add(x = x_257_cast_fp16, y = out_199_cast_fp16)[name = string("x_267_cast_fp16")]; fp16 const_234_promoted_to_fp16 = const()[name = string("const_234_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4092_cast_fp16 = mul(x = x_267_cast_fp16, y = const_234_promoted_to_fp16)[name = string("op_4092_cast_fp16")]; bool input_333_interleave_0 = const()[name = string("input_333_interleave_0"), val = bool(false)]; tensor input_333_cast_fp16 = concat(axis = var_23, interleave = input_333_interleave_0, values = (x_267_cast_fp16, var_4092_cast_fp16))[name = string("input_333_cast_fp16")]; tensor normed_467_axes_0 = const()[name = string("normed_467_axes_0"), val = tensor([-1])]; tensor normed_467_cast_fp16 = layer_norm(axes = normed_467_axes_0, epsilon = var_8_to_fp16, x = input_333_cast_fp16)[name = string("normed_467_cast_fp16")]; tensor var_4097_split_sizes_0 = const()[name = string("op_4097_split_sizes_0"), val = tensor([768, 768])]; int32 var_4097_axis_0 = const()[name = string("op_4097_axis_0"), val = int32(-1)]; tensor var_4097_cast_fp16_0, tensor var_4097_cast_fp16_1 = split(axis = var_4097_axis_0, split_sizes = var_4097_split_sizes_0, x = normed_467_cast_fp16)[name = string("op_4097_cast_fp16")]; tensor var_4101_to_fp16 = const()[name = string("op_4101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304418496)))]; tensor out_201_cast_fp16 = mul(x = var_4097_cast_fp16_0, y = var_4101_to_fp16)[name = string("out_201_cast_fp16")]; tensor var_4108 = const()[name = string("op_4108"), val = tensor([0, 2, 1])]; tensor input_335_axes_0 = const()[name = string("input_335_axes_0"), val = tensor([2])]; tensor var_4109 = transpose(perm = var_4108, x = out_201_cast_fp16)[name = string("transpose_64")]; tensor input_335 = expand_dims(axes = input_335_axes_0, x = var_4109)[name = string("input_335")]; string gate_65_pad_type_0 = const()[name = string("gate_65_pad_type_0"), val = string("valid")]; tensor gate_65_strides_0 = const()[name = string("gate_65_strides_0"), val = tensor([1, 1])]; tensor gate_65_pad_0 = const()[name = string("gate_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_65_dilations_0 = const()[name = string("gate_65_dilations_0"), val = tensor([1, 1])]; int32 gate_65_groups_0 = const()[name = string("gate_65_groups_0"), val = int32(1)]; tensor gate_65 = conv(dilations = gate_65_dilations_0, groups = gate_65_groups_0, pad = gate_65_pad_0, pad_type = gate_65_pad_type_0, strides = gate_65_strides_0, weight = encoder_layers_16_mlp_gate_proj_weight_quantized, x = input_335)[name = string("gate_65")]; string up_33_pad_type_0 = const()[name = string("up_33_pad_type_0"), val = string("valid")]; tensor up_33_strides_0 = const()[name = string("up_33_strides_0"), val = tensor([1, 1])]; tensor up_33_pad_0 = const()[name = string("up_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_33_dilations_0 = const()[name = string("up_33_dilations_0"), val = tensor([1, 1])]; int32 up_33_groups_0 = const()[name = string("up_33_groups_0"), val = int32(1)]; tensor up_33 = conv(dilations = up_33_dilations_0, groups = up_33_groups_0, pad = up_33_pad_0, pad_type = up_33_pad_type_0, strides = up_33_strides_0, weight = encoder_layers_16_mlp_up_proj_weight_quantized, x = input_335)[name = string("up_33")]; string gate_67_mode_0 = const()[name = string("gate_67_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_67 = gelu(mode = gate_67_mode_0, x = gate_65)[name = string("gate_67")]; tensor input_337 = mul(x = gate_67, y = up_33)[name = string("input_337")]; string var_4130_pad_type_0 = const()[name = string("op_4130_pad_type_0"), val = string("valid")]; tensor var_4130_strides_0 = const()[name = string("op_4130_strides_0"), val = tensor([1, 1])]; tensor var_4130_pad_0 = const()[name = string("op_4130_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4130_dilations_0 = const()[name = string("op_4130_dilations_0"), val = tensor([1, 1])]; int32 var_4130_groups_0 = const()[name = string("op_4130_groups_0"), val = int32(1)]; tensor var_4130 = conv(dilations = var_4130_dilations_0, groups = var_4130_groups_0, pad = var_4130_pad_0, pad_type = var_4130_pad_type_0, strides = var_4130_strides_0, weight = encoder_layers_16_mlp_down_proj_weight_quantized, x = input_337)[name = string("op_4130")]; tensor var_4131_axes_0 = const()[name = string("op_4131_axes_0"), val = tensor([2])]; tensor var_4131 = squeeze(axes = var_4131_axes_0, x = var_4130)[name = string("op_4131")]; tensor var_4132 = const()[name = string("op_4132"), val = tensor([0, 2, 1])]; fp16 const_236_promoted_to_fp16 = const()[name = string("const_236_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_271 = transpose(perm = var_4132, x = var_4131)[name = string("transpose_63")]; tensor var_4136_cast_fp16 = mul(x = x_271, y = const_236_promoted_to_fp16)[name = string("op_4136_cast_fp16")]; bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; tensor input_339_cast_fp16 = concat(axis = var_23, interleave = input_339_interleave_0, values = (x_271, var_4136_cast_fp16))[name = string("input_339_cast_fp16")]; tensor normed_473_axes_0 = const()[name = string("normed_473_axes_0"), val = tensor([-1])]; tensor normed_473_cast_fp16 = layer_norm(axes = normed_473_axes_0, epsilon = var_8_to_fp16, x = input_339_cast_fp16)[name = string("normed_473_cast_fp16")]; tensor var_4141_split_sizes_0 = const()[name = string("op_4141_split_sizes_0"), val = tensor([768, 768])]; int32 var_4141_axis_0 = const()[name = string("op_4141_axis_0"), val = int32(-1)]; tensor var_4141_cast_fp16_0, tensor var_4141_cast_fp16_1 = split(axis = var_4141_axis_0, split_sizes = var_4141_split_sizes_0, x = normed_473_cast_fp16)[name = string("op_4141_cast_fp16")]; tensor var_4145_to_fp16 = const()[name = string("op_4145_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304420096)))]; tensor out_203_cast_fp16 = mul(x = var_4141_cast_fp16_0, y = var_4145_to_fp16)[name = string("out_203_cast_fp16")]; tensor x_273_cast_fp16 = add(x = x_267_cast_fp16, y = out_203_cast_fp16)[name = string("x_273_cast_fp16")]; fp16 const_238_promoted_to_fp16 = const()[name = string("const_238_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4174_cast_fp16 = mul(x = x_273_cast_fp16, y = const_238_promoted_to_fp16)[name = string("op_4174_cast_fp16")]; bool input_341_interleave_0 = const()[name = string("input_341_interleave_0"), val = bool(false)]; tensor input_341_cast_fp16 = concat(axis = var_23, interleave = input_341_interleave_0, values = (x_273_cast_fp16, var_4174_cast_fp16))[name = string("input_341_cast_fp16")]; tensor normed_477_axes_0 = const()[name = string("normed_477_axes_0"), val = tensor([-1])]; tensor normed_477_cast_fp16 = layer_norm(axes = normed_477_axes_0, epsilon = var_8_to_fp16, x = input_341_cast_fp16)[name = string("normed_477_cast_fp16")]; tensor var_4179_split_sizes_0 = const()[name = string("op_4179_split_sizes_0"), val = tensor([768, 768])]; int32 var_4179_axis_0 = const()[name = string("op_4179_axis_0"), val = int32(-1)]; tensor var_4179_cast_fp16_0, tensor var_4179_cast_fp16_1 = split(axis = var_4179_axis_0, split_sizes = var_4179_split_sizes_0, x = normed_477_cast_fp16)[name = string("op_4179_cast_fp16")]; tensor var_4183_to_fp16 = const()[name = string("op_4183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304421696)))]; tensor out_205_cast_fp16 = mul(x = var_4179_cast_fp16_0, y = var_4183_to_fp16)[name = string("out_205_cast_fp16")]; tensor var_4189 = const()[name = string("op_4189"), val = tensor([0, 2, 1])]; tensor var_4191_axes_0 = const()[name = string("op_4191_axes_0"), val = tensor([2])]; tensor var_4190_cast_fp16 = transpose(perm = var_4189, x = out_205_cast_fp16)[name = string("transpose_62")]; tensor var_4191_cast_fp16 = expand_dims(axes = var_4191_axes_0, x = var_4190_cast_fp16)[name = string("op_4191_cast_fp16")]; string var_4198_pad_type_0 = const()[name = string("op_4198_pad_type_0"), val = string("valid")]; tensor var_4198_strides_0 = const()[name = string("op_4198_strides_0"), val = tensor([1, 1])]; tensor var_4198_pad_0 = const()[name = string("op_4198_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4198_dilations_0 = const()[name = string("op_4198_dilations_0"), val = tensor([1, 1])]; int32 var_4198_groups_0 = const()[name = string("op_4198_groups_0"), val = int32(1)]; tensor var_4198 = conv(dilations = var_4198_dilations_0, groups = var_4198_groups_0, pad = var_4198_pad_0, pad_type = var_4198_pad_type_0, strides = var_4198_strides_0, weight = encoder_layers_17_self_attn_q_proj_weight_quantized, x = var_4191_cast_fp16)[name = string("op_4198")]; tensor var_4199 = const()[name = string("op_4199"), val = tensor([1, 3, 256, 128])]; tensor var_4200 = reshape(shape = var_4199, x = var_4198)[name = string("op_4200")]; tensor var_4201 = const()[name = string("op_4201"), val = tensor([0, 1, 3, 2])]; string var_4208_pad_type_0 = const()[name = string("op_4208_pad_type_0"), val = string("valid")]; tensor var_4208_strides_0 = const()[name = string("op_4208_strides_0"), val = tensor([1, 1])]; tensor var_4208_pad_0 = const()[name = string("op_4208_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4208_dilations_0 = const()[name = string("op_4208_dilations_0"), val = tensor([1, 1])]; int32 var_4208_groups_0 = const()[name = string("op_4208_groups_0"), val = int32(1)]; tensor var_4208 = conv(dilations = var_4208_dilations_0, groups = var_4208_groups_0, pad = var_4208_pad_0, pad_type = var_4208_pad_type_0, strides = var_4208_strides_0, weight = encoder_layers_17_self_attn_k_proj_weight_quantized, x = var_4191_cast_fp16)[name = string("op_4208")]; tensor var_4209 = const()[name = string("op_4209"), val = tensor([1, 1, 256, 128])]; tensor var_4210 = reshape(shape = var_4209, x = var_4208)[name = string("op_4210")]; tensor var_4211 = const()[name = string("op_4211"), val = tensor([0, 1, 3, 2])]; string var_4218_pad_type_0 = const()[name = string("op_4218_pad_type_0"), val = string("valid")]; tensor var_4218_strides_0 = const()[name = string("op_4218_strides_0"), val = tensor([1, 1])]; tensor var_4218_pad_0 = const()[name = string("op_4218_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4218_dilations_0 = const()[name = string("op_4218_dilations_0"), val = tensor([1, 1])]; int32 var_4218_groups_0 = const()[name = string("op_4218_groups_0"), val = int32(1)]; tensor var_4218 = conv(dilations = var_4218_dilations_0, groups = var_4218_groups_0, pad = var_4218_pad_0, pad_type = var_4218_pad_type_0, strides = var_4218_strides_0, weight = encoder_layers_17_self_attn_v_proj_weight_quantized, x = var_4191_cast_fp16)[name = string("op_4218")]; tensor var_4219 = const()[name = string("op_4219"), val = tensor([1, 1, 256, 128])]; tensor var_4220 = reshape(shape = var_4219, x = var_4218)[name = string("op_4220")]; tensor var_4221 = const()[name = string("op_4221"), val = tensor([0, 1, 3, 2])]; fp16 const_240_promoted_to_fp16 = const()[name = string("const_240_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_103 = transpose(perm = var_4201, x = var_4200)[name = string("transpose_61")]; tensor var_4227_cast_fp16 = mul(x = q_103, y = const_240_promoted_to_fp16)[name = string("op_4227_cast_fp16")]; bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; tensor input_345_cast_fp16 = concat(axis = var_23, interleave = input_345_interleave_0, values = (q_103, var_4227_cast_fp16))[name = string("input_345_cast_fp16")]; tensor normed_483_axes_0 = const()[name = string("normed_483_axes_0"), val = tensor([-1])]; tensor normed_483_cast_fp16 = layer_norm(axes = normed_483_axes_0, epsilon = var_8_to_fp16, x = input_345_cast_fp16)[name = string("normed_483_cast_fp16")]; tensor var_4232_split_sizes_0 = const()[name = string("op_4232_split_sizes_0"), val = tensor([256, 256])]; int32 var_4232_axis_0 = const()[name = string("op_4232_axis_0"), val = int32(-1)]; tensor var_4232_cast_fp16_0, tensor var_4232_cast_fp16_1 = split(axis = var_4232_axis_0, split_sizes = var_4232_split_sizes_0, x = normed_483_cast_fp16)[name = string("op_4232_cast_fp16")]; tensor var_4236_to_fp16 = const()[name = string("op_4236_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304423296)))]; tensor out_207_cast_fp16 = mul(x = var_4232_cast_fp16_0, y = var_4236_to_fp16)[name = string("out_207_cast_fp16")]; fp16 const_242_promoted_to_fp16 = const()[name = string("const_242_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_103 = transpose(perm = var_4211, x = var_4210)[name = string("transpose_60")]; tensor var_4243_cast_fp16 = mul(x = k_103, y = const_242_promoted_to_fp16)[name = string("op_4243_cast_fp16")]; bool input_347_interleave_0 = const()[name = string("input_347_interleave_0"), val = bool(false)]; tensor input_347_cast_fp16 = concat(axis = var_23, interleave = input_347_interleave_0, values = (k_103, var_4243_cast_fp16))[name = string("input_347_cast_fp16")]; tensor normed_487_axes_0 = const()[name = string("normed_487_axes_0"), val = tensor([-1])]; tensor normed_487_cast_fp16 = layer_norm(axes = normed_487_axes_0, epsilon = var_8_to_fp16, x = input_347_cast_fp16)[name = string("normed_487_cast_fp16")]; tensor var_4248_split_sizes_0 = const()[name = string("op_4248_split_sizes_0"), val = tensor([256, 256])]; int32 var_4248_axis_0 = const()[name = string("op_4248_axis_0"), val = int32(-1)]; tensor var_4248_cast_fp16_0, tensor var_4248_cast_fp16_1 = split(axis = var_4248_axis_0, split_sizes = var_4248_split_sizes_0, x = normed_487_cast_fp16)[name = string("op_4248_cast_fp16")]; tensor var_4252_to_fp16 = const()[name = string("op_4252_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304423872)))]; tensor out_209_cast_fp16 = mul(x = var_4248_cast_fp16_0, y = var_4252_to_fp16)[name = string("out_209_cast_fp16")]; tensor var_4255 = mul(x = out_207_cast_fp16, y = cos_quantized)[name = string("op_4255")]; tensor var_4256_split_sizes_0 = const()[name = string("op_4256_split_sizes_0"), val = tensor([128, 128])]; int32 var_4256_axis_0 = const()[name = string("op_4256_axis_0"), val = int32(-1)]; tensor var_4256_0, tensor var_4256_1 = split(axis = var_4256_axis_0, split_sizes = var_4256_split_sizes_0, x = out_207_cast_fp16)[name = string("op_4256")]; fp16 const_244_promoted = const()[name = string("const_244_promoted"), val = fp16(-0x1p+0)]; tensor var_4258 = mul(x = var_4256_1, y = const_244_promoted)[name = string("op_4258")]; bool var_4260_interleave_0 = const()[name = string("op_4260_interleave_0"), val = bool(false)]; tensor var_4260 = concat(axis = var_23, interleave = var_4260_interleave_0, values = (var_4258, var_4256_0))[name = string("op_4260")]; tensor var_4261 = mul(x = var_4260, y = sin_quantized)[name = string("op_4261")]; tensor q_107 = add(x = var_4255, y = var_4261)[name = string("q_107")]; tensor var_4263 = mul(x = out_209_cast_fp16, y = cos_quantized)[name = string("op_4263")]; tensor var_4264_split_sizes_0 = const()[name = string("op_4264_split_sizes_0"), val = tensor([128, 128])]; int32 var_4264_axis_0 = const()[name = string("op_4264_axis_0"), val = int32(-1)]; tensor var_4264_0, tensor var_4264_1 = split(axis = var_4264_axis_0, split_sizes = var_4264_split_sizes_0, x = out_209_cast_fp16)[name = string("op_4264")]; fp16 const_245_promoted = const()[name = string("const_245_promoted"), val = fp16(-0x1p+0)]; tensor var_4266 = mul(x = var_4264_1, y = const_245_promoted)[name = string("op_4266")]; bool var_4268_interleave_0 = const()[name = string("op_4268_interleave_0"), val = bool(false)]; tensor var_4268 = concat(axis = var_23, interleave = var_4268_interleave_0, values = (var_4266, var_4264_0))[name = string("op_4268")]; tensor var_4269 = mul(x = var_4268, y = sin_quantized)[name = string("op_4269")]; tensor hidden_states_205 = add(x = var_4263, y = var_4269)[name = string("hidden_states_205")]; tensor hidden_states_207_axes_0 = const()[name = string("hidden_states_207_axes_0"), val = tensor([2])]; tensor hidden_states_207 = expand_dims(axes = hidden_states_207_axes_0, x = hidden_states_205)[name = string("hidden_states_207")]; tensor var_4272 = const()[name = string("op_4272"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_209 = tile(reps = var_4272, x = hidden_states_207)[name = string("hidden_states_209")]; tensor var_4274 = const()[name = string("op_4274"), val = tensor([1, 3, 128, 256])]; tensor k_107 = reshape(shape = var_4274, x = hidden_states_209)[name = string("k_107")]; tensor hidden_states_213_axes_0 = const()[name = string("hidden_states_213_axes_0"), val = tensor([2])]; tensor hidden_states_211 = transpose(perm = var_4221, x = var_4220)[name = string("transpose_59")]; tensor hidden_states_213 = expand_dims(axes = hidden_states_213_axes_0, x = hidden_states_211)[name = string("hidden_states_213")]; tensor var_4277 = const()[name = string("op_4277"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_215 = tile(reps = var_4277, x = hidden_states_213)[name = string("hidden_states_215")]; tensor var_4279 = const()[name = string("op_4279"), val = tensor([1, 3, 128, 256])]; tensor v_35 = reshape(shape = var_4279, x = hidden_states_215)[name = string("v_35")]; bool var_4284_transpose_x_1 = const()[name = string("op_4284_transpose_x_1"), val = bool(false)]; bool var_4284_transpose_y_1 = const()[name = string("op_4284_transpose_y_1"), val = bool(true)]; tensor var_4284_cast_fp16 = matmul(transpose_x = var_4284_transpose_x_1, transpose_y = var_4284_transpose_y_1, x = q_107, y = k_107)[name = string("op_4284_cast_fp16")]; fp16 var_4285_to_fp16 = const()[name = string("op_4285_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_103_cast_fp16 = mul(x = var_4284_cast_fp16, y = var_4285_to_fp16)[name = string("attn_weights_103_cast_fp16")]; tensor attn_weights_105_cast_fp16 = add(x = attn_weights_103_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_105_cast_fp16")]; tensor var_4289_cast_fp16 = softmax(axis = var_23, x = attn_weights_105_cast_fp16)[name = string("op_4289_cast_fp16")]; bool var_4293_transpose_x_0 = const()[name = string("op_4293_transpose_x_0"), val = bool(false)]; bool var_4293_transpose_y_0 = const()[name = string("op_4293_transpose_y_0"), val = bool(false)]; tensor var_4293_cast_fp16 = matmul(transpose_x = var_4293_transpose_x_0, transpose_y = var_4293_transpose_y_0, x = var_4289_cast_fp16, y = v_35)[name = string("op_4293_cast_fp16")]; tensor var_4295 = const()[name = string("op_4295"), val = tensor([0, 2, 1, 3])]; tensor var_4298 = const()[name = string("op_4298"), val = tensor([1, 128, 768])]; tensor var_4296 = transpose(perm = var_4295, x = var_4293_cast_fp16)[name = string("transpose_58")]; tensor attn_out_105 = reshape(shape = var_4298, x = var_4296)[name = string("attn_out_105")]; tensor var_4300 = const()[name = string("op_4300"), val = tensor([0, 2, 1])]; tensor squeeze_17_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304424448))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305014336))))[name = string("squeeze_17_quantized")]; string var_4309_pad_type_0 = const()[name = string("op_4309_pad_type_0"), val = string("valid")]; int32 var_4309_groups_0 = const()[name = string("op_4309_groups_0"), val = int32(1)]; tensor var_4309_strides_0 = const()[name = string("op_4309_strides_0"), val = tensor([1])]; tensor var_4309_pad_0 = const()[name = string("op_4309_pad_0"), val = tensor([0, 0])]; tensor var_4309_dilations_0 = const()[name = string("op_4309_dilations_0"), val = tensor([1])]; tensor var_4301 = transpose(perm = var_4300, x = attn_out_105)[name = string("transpose_57")]; tensor var_4309 = conv(dilations = var_4309_dilations_0, groups = var_4309_groups_0, pad = var_4309_pad_0, pad_type = var_4309_pad_type_0, strides = var_4309_strides_0, weight = squeeze_17_quantized, x = var_4301)[name = string("op_4309")]; tensor var_4310 = const()[name = string("op_4310"), val = tensor([0, 2, 1])]; fp16 const_246_promoted_to_fp16 = const()[name = string("const_246_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_281 = transpose(perm = var_4310, x = var_4309)[name = string("transpose_56")]; tensor var_4314_cast_fp16 = mul(x = x_281, y = const_246_promoted_to_fp16)[name = string("op_4314_cast_fp16")]; bool input_351_interleave_0 = const()[name = string("input_351_interleave_0"), val = bool(false)]; tensor input_351_cast_fp16 = concat(axis = var_23, interleave = input_351_interleave_0, values = (x_281, var_4314_cast_fp16))[name = string("input_351_cast_fp16")]; tensor normed_491_axes_0 = const()[name = string("normed_491_axes_0"), val = tensor([-1])]; tensor normed_491_cast_fp16 = layer_norm(axes = normed_491_axes_0, epsilon = var_8_to_fp16, x = input_351_cast_fp16)[name = string("normed_491_cast_fp16")]; tensor var_4319_split_sizes_0 = const()[name = string("op_4319_split_sizes_0"), val = tensor([768, 768])]; int32 var_4319_axis_0 = const()[name = string("op_4319_axis_0"), val = int32(-1)]; tensor var_4319_cast_fp16_0, tensor var_4319_cast_fp16_1 = split(axis = var_4319_axis_0, split_sizes = var_4319_split_sizes_0, x = normed_491_cast_fp16)[name = string("op_4319_cast_fp16")]; tensor var_4323_to_fp16 = const()[name = string("op_4323_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305015936)))]; tensor out_211_cast_fp16 = mul(x = var_4319_cast_fp16_0, y = var_4323_to_fp16)[name = string("out_211_cast_fp16")]; tensor x_283_cast_fp16 = add(x = x_273_cast_fp16, y = out_211_cast_fp16)[name = string("x_283_cast_fp16")]; fp16 const_248_promoted_to_fp16 = const()[name = string("const_248_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4330_cast_fp16 = mul(x = x_283_cast_fp16, y = const_248_promoted_to_fp16)[name = string("op_4330_cast_fp16")]; bool input_353_interleave_0 = const()[name = string("input_353_interleave_0"), val = bool(false)]; tensor input_353_cast_fp16 = concat(axis = var_23, interleave = input_353_interleave_0, values = (x_283_cast_fp16, var_4330_cast_fp16))[name = string("input_353_cast_fp16")]; tensor normed_495_axes_0 = const()[name = string("normed_495_axes_0"), val = tensor([-1])]; tensor normed_495_cast_fp16 = layer_norm(axes = normed_495_axes_0, epsilon = var_8_to_fp16, x = input_353_cast_fp16)[name = string("normed_495_cast_fp16")]; tensor var_4335_split_sizes_0 = const()[name = string("op_4335_split_sizes_0"), val = tensor([768, 768])]; int32 var_4335_axis_0 = const()[name = string("op_4335_axis_0"), val = int32(-1)]; tensor var_4335_cast_fp16_0, tensor var_4335_cast_fp16_1 = split(axis = var_4335_axis_0, split_sizes = var_4335_split_sizes_0, x = normed_495_cast_fp16)[name = string("op_4335_cast_fp16")]; tensor var_4339_to_fp16 = const()[name = string("op_4339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305017536)))]; tensor out_213_cast_fp16 = mul(x = var_4335_cast_fp16_0, y = var_4339_to_fp16)[name = string("out_213_cast_fp16")]; tensor var_4346 = const()[name = string("op_4346"), val = tensor([0, 2, 1])]; tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; tensor var_4347 = transpose(perm = var_4346, x = out_213_cast_fp16)[name = string("transpose_55")]; tensor input_355 = expand_dims(axes = input_355_axes_0, x = var_4347)[name = string("input_355")]; string gate_69_pad_type_0 = const()[name = string("gate_69_pad_type_0"), val = string("valid")]; tensor gate_69_strides_0 = const()[name = string("gate_69_strides_0"), val = tensor([1, 1])]; tensor gate_69_pad_0 = const()[name = string("gate_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_69_dilations_0 = const()[name = string("gate_69_dilations_0"), val = tensor([1, 1])]; int32 gate_69_groups_0 = const()[name = string("gate_69_groups_0"), val = int32(1)]; tensor gate_69 = conv(dilations = gate_69_dilations_0, groups = gate_69_groups_0, pad = gate_69_pad_0, pad_type = gate_69_pad_type_0, strides = gate_69_strides_0, weight = encoder_layers_17_mlp_gate_proj_weight_quantized, x = input_355)[name = string("gate_69")]; string up_35_pad_type_0 = const()[name = string("up_35_pad_type_0"), val = string("valid")]; tensor up_35_strides_0 = const()[name = string("up_35_strides_0"), val = tensor([1, 1])]; tensor up_35_pad_0 = const()[name = string("up_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_35_dilations_0 = const()[name = string("up_35_dilations_0"), val = tensor([1, 1])]; int32 up_35_groups_0 = const()[name = string("up_35_groups_0"), val = int32(1)]; tensor up_35 = conv(dilations = up_35_dilations_0, groups = up_35_groups_0, pad = up_35_pad_0, pad_type = up_35_pad_type_0, strides = up_35_strides_0, weight = encoder_layers_17_mlp_up_proj_weight_quantized, x = input_355)[name = string("up_35")]; string gate_71_mode_0 = const()[name = string("gate_71_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_71 = gelu(mode = gate_71_mode_0, x = gate_69)[name = string("gate_71")]; tensor input_357 = mul(x = gate_71, y = up_35)[name = string("input_357")]; string var_4368_pad_type_0 = const()[name = string("op_4368_pad_type_0"), val = string("valid")]; tensor var_4368_strides_0 = const()[name = string("op_4368_strides_0"), val = tensor([1, 1])]; tensor var_4368_pad_0 = const()[name = string("op_4368_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4368_dilations_0 = const()[name = string("op_4368_dilations_0"), val = tensor([1, 1])]; int32 var_4368_groups_0 = const()[name = string("op_4368_groups_0"), val = int32(1)]; tensor var_4368 = conv(dilations = var_4368_dilations_0, groups = var_4368_groups_0, pad = var_4368_pad_0, pad_type = var_4368_pad_type_0, strides = var_4368_strides_0, weight = encoder_layers_17_mlp_down_proj_weight_quantized, x = input_357)[name = string("op_4368")]; tensor var_4369_axes_0 = const()[name = string("op_4369_axes_0"), val = tensor([2])]; tensor var_4369 = squeeze(axes = var_4369_axes_0, x = var_4368)[name = string("op_4369")]; tensor var_4370 = const()[name = string("op_4370"), val = tensor([0, 2, 1])]; fp16 const_250_promoted_to_fp16 = const()[name = string("const_250_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_287 = transpose(perm = var_4370, x = var_4369)[name = string("transpose_54")]; tensor var_4374_cast_fp16 = mul(x = x_287, y = const_250_promoted_to_fp16)[name = string("op_4374_cast_fp16")]; bool input_359_interleave_0 = const()[name = string("input_359_interleave_0"), val = bool(false)]; tensor input_359_cast_fp16 = concat(axis = var_23, interleave = input_359_interleave_0, values = (x_287, var_4374_cast_fp16))[name = string("input_359_cast_fp16")]; tensor normed_501_axes_0 = const()[name = string("normed_501_axes_0"), val = tensor([-1])]; tensor normed_501_cast_fp16 = layer_norm(axes = normed_501_axes_0, epsilon = var_8_to_fp16, x = input_359_cast_fp16)[name = string("normed_501_cast_fp16")]; tensor var_4379_split_sizes_0 = const()[name = string("op_4379_split_sizes_0"), val = tensor([768, 768])]; int32 var_4379_axis_0 = const()[name = string("op_4379_axis_0"), val = int32(-1)]; tensor var_4379_cast_fp16_0, tensor var_4379_cast_fp16_1 = split(axis = var_4379_axis_0, split_sizes = var_4379_split_sizes_0, x = normed_501_cast_fp16)[name = string("op_4379_cast_fp16")]; tensor var_4383_to_fp16 = const()[name = string("op_4383_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305019136)))]; tensor out_215_cast_fp16 = mul(x = var_4379_cast_fp16_0, y = var_4383_to_fp16)[name = string("out_215_cast_fp16")]; tensor x_289_cast_fp16 = add(x = x_283_cast_fp16, y = out_215_cast_fp16)[name = string("x_289_cast_fp16")]; fp16 const_252_promoted_to_fp16 = const()[name = string("const_252_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4412_cast_fp16 = mul(x = x_289_cast_fp16, y = const_252_promoted_to_fp16)[name = string("op_4412_cast_fp16")]; bool input_361_interleave_0 = const()[name = string("input_361_interleave_0"), val = bool(false)]; tensor input_361_cast_fp16 = concat(axis = var_23, interleave = input_361_interleave_0, values = (x_289_cast_fp16, var_4412_cast_fp16))[name = string("input_361_cast_fp16")]; tensor normed_505_axes_0 = const()[name = string("normed_505_axes_0"), val = tensor([-1])]; tensor normed_505_cast_fp16 = layer_norm(axes = normed_505_axes_0, epsilon = var_8_to_fp16, x = input_361_cast_fp16)[name = string("normed_505_cast_fp16")]; tensor var_4417_split_sizes_0 = const()[name = string("op_4417_split_sizes_0"), val = tensor([768, 768])]; int32 var_4417_axis_0 = const()[name = string("op_4417_axis_0"), val = int32(-1)]; tensor var_4417_cast_fp16_0, tensor var_4417_cast_fp16_1 = split(axis = var_4417_axis_0, split_sizes = var_4417_split_sizes_0, x = normed_505_cast_fp16)[name = string("op_4417_cast_fp16")]; tensor var_4421_to_fp16 = const()[name = string("op_4421_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305020736)))]; tensor out_217_cast_fp16 = mul(x = var_4417_cast_fp16_0, y = var_4421_to_fp16)[name = string("out_217_cast_fp16")]; tensor var_4427 = const()[name = string("op_4427"), val = tensor([0, 2, 1])]; tensor var_4429_axes_0 = const()[name = string("op_4429_axes_0"), val = tensor([2])]; tensor var_4428_cast_fp16 = transpose(perm = var_4427, x = out_217_cast_fp16)[name = string("transpose_53")]; tensor var_4429_cast_fp16 = expand_dims(axes = var_4429_axes_0, x = var_4428_cast_fp16)[name = string("op_4429_cast_fp16")]; string var_4436_pad_type_0 = const()[name = string("op_4436_pad_type_0"), val = string("valid")]; tensor var_4436_strides_0 = const()[name = string("op_4436_strides_0"), val = tensor([1, 1])]; tensor var_4436_pad_0 = const()[name = string("op_4436_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4436_dilations_0 = const()[name = string("op_4436_dilations_0"), val = tensor([1, 1])]; int32 var_4436_groups_0 = const()[name = string("op_4436_groups_0"), val = int32(1)]; tensor var_4436 = conv(dilations = var_4436_dilations_0, groups = var_4436_groups_0, pad = var_4436_pad_0, pad_type = var_4436_pad_type_0, strides = var_4436_strides_0, weight = encoder_layers_18_self_attn_q_proj_weight_quantized, x = var_4429_cast_fp16)[name = string("op_4436")]; tensor var_4437 = const()[name = string("op_4437"), val = tensor([1, 3, 256, 128])]; tensor var_4438 = reshape(shape = var_4437, x = var_4436)[name = string("op_4438")]; tensor var_4439 = const()[name = string("op_4439"), val = tensor([0, 1, 3, 2])]; string var_4446_pad_type_0 = const()[name = string("op_4446_pad_type_0"), val = string("valid")]; tensor var_4446_strides_0 = const()[name = string("op_4446_strides_0"), val = tensor([1, 1])]; tensor var_4446_pad_0 = const()[name = string("op_4446_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4446_dilations_0 = const()[name = string("op_4446_dilations_0"), val = tensor([1, 1])]; int32 var_4446_groups_0 = const()[name = string("op_4446_groups_0"), val = int32(1)]; tensor var_4446 = conv(dilations = var_4446_dilations_0, groups = var_4446_groups_0, pad = var_4446_pad_0, pad_type = var_4446_pad_type_0, strides = var_4446_strides_0, weight = encoder_layers_18_self_attn_k_proj_weight_quantized, x = var_4429_cast_fp16)[name = string("op_4446")]; tensor var_4447 = const()[name = string("op_4447"), val = tensor([1, 1, 256, 128])]; tensor var_4448 = reshape(shape = var_4447, x = var_4446)[name = string("op_4448")]; tensor var_4449 = const()[name = string("op_4449"), val = tensor([0, 1, 3, 2])]; string var_4456_pad_type_0 = const()[name = string("op_4456_pad_type_0"), val = string("valid")]; tensor var_4456_strides_0 = const()[name = string("op_4456_strides_0"), val = tensor([1, 1])]; tensor var_4456_pad_0 = const()[name = string("op_4456_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4456_dilations_0 = const()[name = string("op_4456_dilations_0"), val = tensor([1, 1])]; int32 var_4456_groups_0 = const()[name = string("op_4456_groups_0"), val = int32(1)]; tensor var_4456 = conv(dilations = var_4456_dilations_0, groups = var_4456_groups_0, pad = var_4456_pad_0, pad_type = var_4456_pad_type_0, strides = var_4456_strides_0, weight = encoder_layers_18_self_attn_v_proj_weight_quantized, x = var_4429_cast_fp16)[name = string("op_4456")]; tensor var_4457 = const()[name = string("op_4457"), val = tensor([1, 1, 256, 128])]; tensor var_4458 = reshape(shape = var_4457, x = var_4456)[name = string("op_4458")]; tensor var_4459 = const()[name = string("op_4459"), val = tensor([0, 1, 3, 2])]; fp16 const_254_promoted_to_fp16 = const()[name = string("const_254_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_109 = transpose(perm = var_4439, x = var_4438)[name = string("transpose_52")]; tensor var_4465_cast_fp16 = mul(x = q_109, y = const_254_promoted_to_fp16)[name = string("op_4465_cast_fp16")]; bool input_365_interleave_0 = const()[name = string("input_365_interleave_0"), val = bool(false)]; tensor input_365_cast_fp16 = concat(axis = var_23, interleave = input_365_interleave_0, values = (q_109, var_4465_cast_fp16))[name = string("input_365_cast_fp16")]; tensor normed_511_axes_0 = const()[name = string("normed_511_axes_0"), val = tensor([-1])]; tensor normed_511_cast_fp16 = layer_norm(axes = normed_511_axes_0, epsilon = var_8_to_fp16, x = input_365_cast_fp16)[name = string("normed_511_cast_fp16")]; tensor var_4470_split_sizes_0 = const()[name = string("op_4470_split_sizes_0"), val = tensor([256, 256])]; int32 var_4470_axis_0 = const()[name = string("op_4470_axis_0"), val = int32(-1)]; tensor var_4470_cast_fp16_0, tensor var_4470_cast_fp16_1 = split(axis = var_4470_axis_0, split_sizes = var_4470_split_sizes_0, x = normed_511_cast_fp16)[name = string("op_4470_cast_fp16")]; tensor var_4474_to_fp16 = const()[name = string("op_4474_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305022336)))]; tensor out_219_cast_fp16 = mul(x = var_4470_cast_fp16_0, y = var_4474_to_fp16)[name = string("out_219_cast_fp16")]; fp16 const_256_promoted_to_fp16 = const()[name = string("const_256_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_109 = transpose(perm = var_4449, x = var_4448)[name = string("transpose_51")]; tensor var_4481_cast_fp16 = mul(x = k_109, y = const_256_promoted_to_fp16)[name = string("op_4481_cast_fp16")]; bool input_367_interleave_0 = const()[name = string("input_367_interleave_0"), val = bool(false)]; tensor input_367_cast_fp16 = concat(axis = var_23, interleave = input_367_interleave_0, values = (k_109, var_4481_cast_fp16))[name = string("input_367_cast_fp16")]; tensor normed_515_axes_0 = const()[name = string("normed_515_axes_0"), val = tensor([-1])]; tensor normed_515_cast_fp16 = layer_norm(axes = normed_515_axes_0, epsilon = var_8_to_fp16, x = input_367_cast_fp16)[name = string("normed_515_cast_fp16")]; tensor var_4486_split_sizes_0 = const()[name = string("op_4486_split_sizes_0"), val = tensor([256, 256])]; int32 var_4486_axis_0 = const()[name = string("op_4486_axis_0"), val = int32(-1)]; tensor var_4486_cast_fp16_0, tensor var_4486_cast_fp16_1 = split(axis = var_4486_axis_0, split_sizes = var_4486_split_sizes_0, x = normed_515_cast_fp16)[name = string("op_4486_cast_fp16")]; tensor var_4490_to_fp16 = const()[name = string("op_4490_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305022912)))]; tensor out_221_cast_fp16 = mul(x = var_4486_cast_fp16_0, y = var_4490_to_fp16)[name = string("out_221_cast_fp16")]; tensor var_4493 = mul(x = out_219_cast_fp16, y = cos_1_quantized)[name = string("op_4493")]; tensor var_4494_split_sizes_0 = const()[name = string("op_4494_split_sizes_0"), val = tensor([128, 128])]; int32 var_4494_axis_0 = const()[name = string("op_4494_axis_0"), val = int32(-1)]; tensor var_4494_0, tensor var_4494_1 = split(axis = var_4494_axis_0, split_sizes = var_4494_split_sizes_0, x = out_219_cast_fp16)[name = string("op_4494")]; fp16 const_258_promoted = const()[name = string("const_258_promoted"), val = fp16(-0x1p+0)]; tensor var_4496 = mul(x = var_4494_1, y = const_258_promoted)[name = string("op_4496")]; bool var_4498_interleave_0 = const()[name = string("op_4498_interleave_0"), val = bool(false)]; tensor var_4498 = concat(axis = var_23, interleave = var_4498_interleave_0, values = (var_4496, var_4494_0))[name = string("op_4498")]; tensor var_4499 = mul(x = var_4498, y = sin_1_quantized)[name = string("op_4499")]; tensor q_113 = add(x = var_4493, y = var_4499)[name = string("q_113")]; tensor var_4501 = mul(x = out_221_cast_fp16, y = cos_1_quantized)[name = string("op_4501")]; tensor var_4502_split_sizes_0 = const()[name = string("op_4502_split_sizes_0"), val = tensor([128, 128])]; int32 var_4502_axis_0 = const()[name = string("op_4502_axis_0"), val = int32(-1)]; tensor var_4502_0, tensor var_4502_1 = split(axis = var_4502_axis_0, split_sizes = var_4502_split_sizes_0, x = out_221_cast_fp16)[name = string("op_4502")]; fp16 const_259_promoted = const()[name = string("const_259_promoted"), val = fp16(-0x1p+0)]; tensor var_4504 = mul(x = var_4502_1, y = const_259_promoted)[name = string("op_4504")]; bool var_4506_interleave_0 = const()[name = string("op_4506_interleave_0"), val = bool(false)]; tensor var_4506 = concat(axis = var_23, interleave = var_4506_interleave_0, values = (var_4504, var_4502_0))[name = string("op_4506")]; tensor var_4507 = mul(x = var_4506, y = sin_1_quantized)[name = string("op_4507")]; tensor hidden_states_217 = add(x = var_4501, y = var_4507)[name = string("hidden_states_217")]; tensor hidden_states_219_axes_0 = const()[name = string("hidden_states_219_axes_0"), val = tensor([2])]; tensor hidden_states_219 = expand_dims(axes = hidden_states_219_axes_0, x = hidden_states_217)[name = string("hidden_states_219")]; tensor var_4510 = const()[name = string("op_4510"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_221 = tile(reps = var_4510, x = hidden_states_219)[name = string("hidden_states_221")]; tensor var_4512 = const()[name = string("op_4512"), val = tensor([1, 3, 128, 256])]; tensor k_113 = reshape(shape = var_4512, x = hidden_states_221)[name = string("k_113")]; tensor hidden_states_225_axes_0 = const()[name = string("hidden_states_225_axes_0"), val = tensor([2])]; tensor hidden_states_223 = transpose(perm = var_4459, x = var_4458)[name = string("transpose_50")]; tensor hidden_states_225 = expand_dims(axes = hidden_states_225_axes_0, x = hidden_states_223)[name = string("hidden_states_225")]; tensor var_4515 = const()[name = string("op_4515"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_227 = tile(reps = var_4515, x = hidden_states_225)[name = string("hidden_states_227")]; tensor var_4517 = const()[name = string("op_4517"), val = tensor([1, 3, 128, 256])]; tensor v_37 = reshape(shape = var_4517, x = hidden_states_227)[name = string("v_37")]; bool var_4522_transpose_x_1 = const()[name = string("op_4522_transpose_x_1"), val = bool(false)]; bool var_4522_transpose_y_1 = const()[name = string("op_4522_transpose_y_1"), val = bool(true)]; tensor var_4522_cast_fp16 = matmul(transpose_x = var_4522_transpose_x_1, transpose_y = var_4522_transpose_y_1, x = q_113, y = k_113)[name = string("op_4522_cast_fp16")]; fp16 var_4523_to_fp16 = const()[name = string("op_4523_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_109_cast_fp16 = mul(x = var_4522_cast_fp16, y = var_4523_to_fp16)[name = string("attn_weights_109_cast_fp16")]; tensor attn_weights_111_cast_fp16 = add(x = attn_weights_109_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_111_cast_fp16")]; tensor var_4527_cast_fp16 = softmax(axis = var_23, x = attn_weights_111_cast_fp16)[name = string("op_4527_cast_fp16")]; bool var_4531_transpose_x_0 = const()[name = string("op_4531_transpose_x_0"), val = bool(false)]; bool var_4531_transpose_y_0 = const()[name = string("op_4531_transpose_y_0"), val = bool(false)]; tensor var_4531_cast_fp16 = matmul(transpose_x = var_4531_transpose_x_0, transpose_y = var_4531_transpose_y_0, x = var_4527_cast_fp16, y = v_37)[name = string("op_4531_cast_fp16")]; tensor var_4533 = const()[name = string("op_4533"), val = tensor([0, 2, 1, 3])]; tensor var_4536 = const()[name = string("op_4536"), val = tensor([1, 128, 768])]; tensor var_4534 = transpose(perm = var_4533, x = var_4531_cast_fp16)[name = string("transpose_49")]; tensor attn_out_111 = reshape(shape = var_4536, x = var_4534)[name = string("attn_out_111")]; tensor var_4538 = const()[name = string("op_4538"), val = tensor([0, 2, 1])]; tensor squeeze_18_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305023488))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305613376))))[name = string("squeeze_18_quantized")]; string var_4547_pad_type_0 = const()[name = string("op_4547_pad_type_0"), val = string("valid")]; int32 var_4547_groups_0 = const()[name = string("op_4547_groups_0"), val = int32(1)]; tensor var_4547_strides_0 = const()[name = string("op_4547_strides_0"), val = tensor([1])]; tensor var_4547_pad_0 = const()[name = string("op_4547_pad_0"), val = tensor([0, 0])]; tensor var_4547_dilations_0 = const()[name = string("op_4547_dilations_0"), val = tensor([1])]; tensor var_4539 = transpose(perm = var_4538, x = attn_out_111)[name = string("transpose_48")]; tensor var_4547 = conv(dilations = var_4547_dilations_0, groups = var_4547_groups_0, pad = var_4547_pad_0, pad_type = var_4547_pad_type_0, strides = var_4547_strides_0, weight = squeeze_18_quantized, x = var_4539)[name = string("op_4547")]; tensor var_4548 = const()[name = string("op_4548"), val = tensor([0, 2, 1])]; fp16 const_260_promoted_to_fp16 = const()[name = string("const_260_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_297 = transpose(perm = var_4548, x = var_4547)[name = string("transpose_47")]; tensor var_4552_cast_fp16 = mul(x = x_297, y = const_260_promoted_to_fp16)[name = string("op_4552_cast_fp16")]; bool input_371_interleave_0 = const()[name = string("input_371_interleave_0"), val = bool(false)]; tensor input_371_cast_fp16 = concat(axis = var_23, interleave = input_371_interleave_0, values = (x_297, var_4552_cast_fp16))[name = string("input_371_cast_fp16")]; tensor normed_519_axes_0 = const()[name = string("normed_519_axes_0"), val = tensor([-1])]; tensor normed_519_cast_fp16 = layer_norm(axes = normed_519_axes_0, epsilon = var_8_to_fp16, x = input_371_cast_fp16)[name = string("normed_519_cast_fp16")]; tensor var_4557_split_sizes_0 = const()[name = string("op_4557_split_sizes_0"), val = tensor([768, 768])]; int32 var_4557_axis_0 = const()[name = string("op_4557_axis_0"), val = int32(-1)]; tensor var_4557_cast_fp16_0, tensor var_4557_cast_fp16_1 = split(axis = var_4557_axis_0, split_sizes = var_4557_split_sizes_0, x = normed_519_cast_fp16)[name = string("op_4557_cast_fp16")]; tensor var_4561_to_fp16 = const()[name = string("op_4561_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305614976)))]; tensor out_223_cast_fp16 = mul(x = var_4557_cast_fp16_0, y = var_4561_to_fp16)[name = string("out_223_cast_fp16")]; tensor x_299_cast_fp16 = add(x = x_289_cast_fp16, y = out_223_cast_fp16)[name = string("x_299_cast_fp16")]; fp16 const_262_promoted_to_fp16 = const()[name = string("const_262_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4568_cast_fp16 = mul(x = x_299_cast_fp16, y = const_262_promoted_to_fp16)[name = string("op_4568_cast_fp16")]; bool input_373_interleave_0 = const()[name = string("input_373_interleave_0"), val = bool(false)]; tensor input_373_cast_fp16 = concat(axis = var_23, interleave = input_373_interleave_0, values = (x_299_cast_fp16, var_4568_cast_fp16))[name = string("input_373_cast_fp16")]; tensor normed_523_axes_0 = const()[name = string("normed_523_axes_0"), val = tensor([-1])]; tensor normed_523_cast_fp16 = layer_norm(axes = normed_523_axes_0, epsilon = var_8_to_fp16, x = input_373_cast_fp16)[name = string("normed_523_cast_fp16")]; tensor var_4573_split_sizes_0 = const()[name = string("op_4573_split_sizes_0"), val = tensor([768, 768])]; int32 var_4573_axis_0 = const()[name = string("op_4573_axis_0"), val = int32(-1)]; tensor var_4573_cast_fp16_0, tensor var_4573_cast_fp16_1 = split(axis = var_4573_axis_0, split_sizes = var_4573_split_sizes_0, x = normed_523_cast_fp16)[name = string("op_4573_cast_fp16")]; tensor var_4577_to_fp16 = const()[name = string("op_4577_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305616576)))]; tensor out_225_cast_fp16 = mul(x = var_4573_cast_fp16_0, y = var_4577_to_fp16)[name = string("out_225_cast_fp16")]; tensor var_4584 = const()[name = string("op_4584"), val = tensor([0, 2, 1])]; tensor input_375_axes_0 = const()[name = string("input_375_axes_0"), val = tensor([2])]; tensor var_4585 = transpose(perm = var_4584, x = out_225_cast_fp16)[name = string("transpose_46")]; tensor input_375 = expand_dims(axes = input_375_axes_0, x = var_4585)[name = string("input_375")]; string gate_73_pad_type_0 = const()[name = string("gate_73_pad_type_0"), val = string("valid")]; tensor gate_73_strides_0 = const()[name = string("gate_73_strides_0"), val = tensor([1, 1])]; tensor gate_73_pad_0 = const()[name = string("gate_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_73_dilations_0 = const()[name = string("gate_73_dilations_0"), val = tensor([1, 1])]; int32 gate_73_groups_0 = const()[name = string("gate_73_groups_0"), val = int32(1)]; tensor gate_73 = conv(dilations = gate_73_dilations_0, groups = gate_73_groups_0, pad = gate_73_pad_0, pad_type = gate_73_pad_type_0, strides = gate_73_strides_0, weight = encoder_layers_18_mlp_gate_proj_weight_quantized, x = input_375)[name = string("gate_73")]; string up_37_pad_type_0 = const()[name = string("up_37_pad_type_0"), val = string("valid")]; tensor up_37_strides_0 = const()[name = string("up_37_strides_0"), val = tensor([1, 1])]; tensor up_37_pad_0 = const()[name = string("up_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_37_dilations_0 = const()[name = string("up_37_dilations_0"), val = tensor([1, 1])]; int32 up_37_groups_0 = const()[name = string("up_37_groups_0"), val = int32(1)]; tensor up_37 = conv(dilations = up_37_dilations_0, groups = up_37_groups_0, pad = up_37_pad_0, pad_type = up_37_pad_type_0, strides = up_37_strides_0, weight = encoder_layers_18_mlp_up_proj_weight_quantized, x = input_375)[name = string("up_37")]; string gate_75_mode_0 = const()[name = string("gate_75_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_75 = gelu(mode = gate_75_mode_0, x = gate_73)[name = string("gate_75")]; tensor input_377 = mul(x = gate_75, y = up_37)[name = string("input_377")]; string var_4606_pad_type_0 = const()[name = string("op_4606_pad_type_0"), val = string("valid")]; tensor var_4606_strides_0 = const()[name = string("op_4606_strides_0"), val = tensor([1, 1])]; tensor var_4606_pad_0 = const()[name = string("op_4606_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4606_dilations_0 = const()[name = string("op_4606_dilations_0"), val = tensor([1, 1])]; int32 var_4606_groups_0 = const()[name = string("op_4606_groups_0"), val = int32(1)]; tensor var_4606 = conv(dilations = var_4606_dilations_0, groups = var_4606_groups_0, pad = var_4606_pad_0, pad_type = var_4606_pad_type_0, strides = var_4606_strides_0, weight = encoder_layers_18_mlp_down_proj_weight_quantized, x = input_377)[name = string("op_4606")]; tensor var_4607_axes_0 = const()[name = string("op_4607_axes_0"), val = tensor([2])]; tensor var_4607 = squeeze(axes = var_4607_axes_0, x = var_4606)[name = string("op_4607")]; tensor var_4608 = const()[name = string("op_4608"), val = tensor([0, 2, 1])]; fp16 const_264_promoted_to_fp16 = const()[name = string("const_264_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_303 = transpose(perm = var_4608, x = var_4607)[name = string("transpose_45")]; tensor var_4612_cast_fp16 = mul(x = x_303, y = const_264_promoted_to_fp16)[name = string("op_4612_cast_fp16")]; bool input_379_interleave_0 = const()[name = string("input_379_interleave_0"), val = bool(false)]; tensor input_379_cast_fp16 = concat(axis = var_23, interleave = input_379_interleave_0, values = (x_303, var_4612_cast_fp16))[name = string("input_379_cast_fp16")]; tensor normed_529_axes_0 = const()[name = string("normed_529_axes_0"), val = tensor([-1])]; tensor normed_529_cast_fp16 = layer_norm(axes = normed_529_axes_0, epsilon = var_8_to_fp16, x = input_379_cast_fp16)[name = string("normed_529_cast_fp16")]; tensor var_4617_split_sizes_0 = const()[name = string("op_4617_split_sizes_0"), val = tensor([768, 768])]; int32 var_4617_axis_0 = const()[name = string("op_4617_axis_0"), val = int32(-1)]; tensor var_4617_cast_fp16_0, tensor var_4617_cast_fp16_1 = split(axis = var_4617_axis_0, split_sizes = var_4617_split_sizes_0, x = normed_529_cast_fp16)[name = string("op_4617_cast_fp16")]; tensor var_4621_to_fp16 = const()[name = string("op_4621_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305618176)))]; tensor out_227_cast_fp16 = mul(x = var_4617_cast_fp16_0, y = var_4621_to_fp16)[name = string("out_227_cast_fp16")]; tensor x_305_cast_fp16 = add(x = x_299_cast_fp16, y = out_227_cast_fp16)[name = string("x_305_cast_fp16")]; fp16 const_266_promoted_to_fp16 = const()[name = string("const_266_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4650_cast_fp16 = mul(x = x_305_cast_fp16, y = const_266_promoted_to_fp16)[name = string("op_4650_cast_fp16")]; bool input_381_interleave_0 = const()[name = string("input_381_interleave_0"), val = bool(false)]; tensor input_381_cast_fp16 = concat(axis = var_23, interleave = input_381_interleave_0, values = (x_305_cast_fp16, var_4650_cast_fp16))[name = string("input_381_cast_fp16")]; tensor normed_533_axes_0 = const()[name = string("normed_533_axes_0"), val = tensor([-1])]; tensor normed_533_cast_fp16 = layer_norm(axes = normed_533_axes_0, epsilon = var_8_to_fp16, x = input_381_cast_fp16)[name = string("normed_533_cast_fp16")]; tensor var_4655_split_sizes_0 = const()[name = string("op_4655_split_sizes_0"), val = tensor([768, 768])]; int32 var_4655_axis_0 = const()[name = string("op_4655_axis_0"), val = int32(-1)]; tensor var_4655_cast_fp16_0, tensor var_4655_cast_fp16_1 = split(axis = var_4655_axis_0, split_sizes = var_4655_split_sizes_0, x = normed_533_cast_fp16)[name = string("op_4655_cast_fp16")]; tensor var_4659_to_fp16 = const()[name = string("op_4659_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305619776)))]; tensor out_229_cast_fp16 = mul(x = var_4655_cast_fp16_0, y = var_4659_to_fp16)[name = string("out_229_cast_fp16")]; tensor var_4665 = const()[name = string("op_4665"), val = tensor([0, 2, 1])]; tensor var_4667_axes_0 = const()[name = string("op_4667_axes_0"), val = tensor([2])]; tensor var_4666_cast_fp16 = transpose(perm = var_4665, x = out_229_cast_fp16)[name = string("transpose_44")]; tensor var_4667_cast_fp16 = expand_dims(axes = var_4667_axes_0, x = var_4666_cast_fp16)[name = string("op_4667_cast_fp16")]; string var_4674_pad_type_0 = const()[name = string("op_4674_pad_type_0"), val = string("valid")]; tensor var_4674_strides_0 = const()[name = string("op_4674_strides_0"), val = tensor([1, 1])]; tensor var_4674_pad_0 = const()[name = string("op_4674_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4674_dilations_0 = const()[name = string("op_4674_dilations_0"), val = tensor([1, 1])]; int32 var_4674_groups_0 = const()[name = string("op_4674_groups_0"), val = int32(1)]; tensor var_4674 = conv(dilations = var_4674_dilations_0, groups = var_4674_groups_0, pad = var_4674_pad_0, pad_type = var_4674_pad_type_0, strides = var_4674_strides_0, weight = encoder_layers_19_self_attn_q_proj_weight_quantized, x = var_4667_cast_fp16)[name = string("op_4674")]; tensor var_4675 = const()[name = string("op_4675"), val = tensor([1, 3, 256, 128])]; tensor var_4676 = reshape(shape = var_4675, x = var_4674)[name = string("op_4676")]; tensor var_4677 = const()[name = string("op_4677"), val = tensor([0, 1, 3, 2])]; string var_4684_pad_type_0 = const()[name = string("op_4684_pad_type_0"), val = string("valid")]; tensor var_4684_strides_0 = const()[name = string("op_4684_strides_0"), val = tensor([1, 1])]; tensor var_4684_pad_0 = const()[name = string("op_4684_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4684_dilations_0 = const()[name = string("op_4684_dilations_0"), val = tensor([1, 1])]; int32 var_4684_groups_0 = const()[name = string("op_4684_groups_0"), val = int32(1)]; tensor var_4684 = conv(dilations = var_4684_dilations_0, groups = var_4684_groups_0, pad = var_4684_pad_0, pad_type = var_4684_pad_type_0, strides = var_4684_strides_0, weight = encoder_layers_19_self_attn_k_proj_weight_quantized, x = var_4667_cast_fp16)[name = string("op_4684")]; tensor var_4685 = const()[name = string("op_4685"), val = tensor([1, 1, 256, 128])]; tensor var_4686 = reshape(shape = var_4685, x = var_4684)[name = string("op_4686")]; tensor var_4687 = const()[name = string("op_4687"), val = tensor([0, 1, 3, 2])]; string var_4694_pad_type_0 = const()[name = string("op_4694_pad_type_0"), val = string("valid")]; tensor var_4694_strides_0 = const()[name = string("op_4694_strides_0"), val = tensor([1, 1])]; tensor var_4694_pad_0 = const()[name = string("op_4694_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4694_dilations_0 = const()[name = string("op_4694_dilations_0"), val = tensor([1, 1])]; int32 var_4694_groups_0 = const()[name = string("op_4694_groups_0"), val = int32(1)]; tensor var_4694 = conv(dilations = var_4694_dilations_0, groups = var_4694_groups_0, pad = var_4694_pad_0, pad_type = var_4694_pad_type_0, strides = var_4694_strides_0, weight = encoder_layers_19_self_attn_v_proj_weight_quantized, x = var_4667_cast_fp16)[name = string("op_4694")]; tensor var_4695 = const()[name = string("op_4695"), val = tensor([1, 1, 256, 128])]; tensor var_4696 = reshape(shape = var_4695, x = var_4694)[name = string("op_4696")]; tensor var_4697 = const()[name = string("op_4697"), val = tensor([0, 1, 3, 2])]; fp16 const_268_promoted_to_fp16 = const()[name = string("const_268_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_115 = transpose(perm = var_4677, x = var_4676)[name = string("transpose_43")]; tensor var_4703_cast_fp16 = mul(x = q_115, y = const_268_promoted_to_fp16)[name = string("op_4703_cast_fp16")]; bool input_385_interleave_0 = const()[name = string("input_385_interleave_0"), val = bool(false)]; tensor input_385_cast_fp16 = concat(axis = var_23, interleave = input_385_interleave_0, values = (q_115, var_4703_cast_fp16))[name = string("input_385_cast_fp16")]; tensor normed_539_axes_0 = const()[name = string("normed_539_axes_0"), val = tensor([-1])]; tensor normed_539_cast_fp16 = layer_norm(axes = normed_539_axes_0, epsilon = var_8_to_fp16, x = input_385_cast_fp16)[name = string("normed_539_cast_fp16")]; tensor var_4708_split_sizes_0 = const()[name = string("op_4708_split_sizes_0"), val = tensor([256, 256])]; int32 var_4708_axis_0 = const()[name = string("op_4708_axis_0"), val = int32(-1)]; tensor var_4708_cast_fp16_0, tensor var_4708_cast_fp16_1 = split(axis = var_4708_axis_0, split_sizes = var_4708_split_sizes_0, x = normed_539_cast_fp16)[name = string("op_4708_cast_fp16")]; tensor var_4712_to_fp16 = const()[name = string("op_4712_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305621376)))]; tensor out_231_cast_fp16 = mul(x = var_4708_cast_fp16_0, y = var_4712_to_fp16)[name = string("out_231_cast_fp16")]; fp16 const_270_promoted_to_fp16 = const()[name = string("const_270_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_115 = transpose(perm = var_4687, x = var_4686)[name = string("transpose_42")]; tensor var_4719_cast_fp16 = mul(x = k_115, y = const_270_promoted_to_fp16)[name = string("op_4719_cast_fp16")]; bool input_387_interleave_0 = const()[name = string("input_387_interleave_0"), val = bool(false)]; tensor input_387_cast_fp16 = concat(axis = var_23, interleave = input_387_interleave_0, values = (k_115, var_4719_cast_fp16))[name = string("input_387_cast_fp16")]; tensor normed_543_axes_0 = const()[name = string("normed_543_axes_0"), val = tensor([-1])]; tensor normed_543_cast_fp16 = layer_norm(axes = normed_543_axes_0, epsilon = var_8_to_fp16, x = input_387_cast_fp16)[name = string("normed_543_cast_fp16")]; tensor var_4724_split_sizes_0 = const()[name = string("op_4724_split_sizes_0"), val = tensor([256, 256])]; int32 var_4724_axis_0 = const()[name = string("op_4724_axis_0"), val = int32(-1)]; tensor var_4724_cast_fp16_0, tensor var_4724_cast_fp16_1 = split(axis = var_4724_axis_0, split_sizes = var_4724_split_sizes_0, x = normed_543_cast_fp16)[name = string("op_4724_cast_fp16")]; tensor var_4728_to_fp16 = const()[name = string("op_4728_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305621952)))]; tensor out_233_cast_fp16 = mul(x = var_4724_cast_fp16_0, y = var_4728_to_fp16)[name = string("out_233_cast_fp16")]; tensor var_4731 = mul(x = out_231_cast_fp16, y = cos_1_quantized)[name = string("op_4731")]; tensor var_4732_split_sizes_0 = const()[name = string("op_4732_split_sizes_0"), val = tensor([128, 128])]; int32 var_4732_axis_0 = const()[name = string("op_4732_axis_0"), val = int32(-1)]; tensor var_4732_0, tensor var_4732_1 = split(axis = var_4732_axis_0, split_sizes = var_4732_split_sizes_0, x = out_231_cast_fp16)[name = string("op_4732")]; fp16 const_272_promoted = const()[name = string("const_272_promoted"), val = fp16(-0x1p+0)]; tensor var_4734 = mul(x = var_4732_1, y = const_272_promoted)[name = string("op_4734")]; bool var_4736_interleave_0 = const()[name = string("op_4736_interleave_0"), val = bool(false)]; tensor var_4736 = concat(axis = var_23, interleave = var_4736_interleave_0, values = (var_4734, var_4732_0))[name = string("op_4736")]; tensor var_4737 = mul(x = var_4736, y = sin_1_quantized)[name = string("op_4737")]; tensor q_119 = add(x = var_4731, y = var_4737)[name = string("q_119")]; tensor var_4739 = mul(x = out_233_cast_fp16, y = cos_1_quantized)[name = string("op_4739")]; tensor var_4740_split_sizes_0 = const()[name = string("op_4740_split_sizes_0"), val = tensor([128, 128])]; int32 var_4740_axis_0 = const()[name = string("op_4740_axis_0"), val = int32(-1)]; tensor var_4740_0, tensor var_4740_1 = split(axis = var_4740_axis_0, split_sizes = var_4740_split_sizes_0, x = out_233_cast_fp16)[name = string("op_4740")]; fp16 const_273_promoted = const()[name = string("const_273_promoted"), val = fp16(-0x1p+0)]; tensor var_4742 = mul(x = var_4740_1, y = const_273_promoted)[name = string("op_4742")]; bool var_4744_interleave_0 = const()[name = string("op_4744_interleave_0"), val = bool(false)]; tensor var_4744 = concat(axis = var_23, interleave = var_4744_interleave_0, values = (var_4742, var_4740_0))[name = string("op_4744")]; tensor var_4745 = mul(x = var_4744, y = sin_1_quantized)[name = string("op_4745")]; tensor hidden_states_229 = add(x = var_4739, y = var_4745)[name = string("hidden_states_229")]; tensor hidden_states_231_axes_0 = const()[name = string("hidden_states_231_axes_0"), val = tensor([2])]; tensor hidden_states_231 = expand_dims(axes = hidden_states_231_axes_0, x = hidden_states_229)[name = string("hidden_states_231")]; tensor var_4748 = const()[name = string("op_4748"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_233 = tile(reps = var_4748, x = hidden_states_231)[name = string("hidden_states_233")]; tensor var_4750 = const()[name = string("op_4750"), val = tensor([1, 3, 128, 256])]; tensor k_119 = reshape(shape = var_4750, x = hidden_states_233)[name = string("k_119")]; tensor hidden_states_237_axes_0 = const()[name = string("hidden_states_237_axes_0"), val = tensor([2])]; tensor hidden_states_235 = transpose(perm = var_4697, x = var_4696)[name = string("transpose_41")]; tensor hidden_states_237 = expand_dims(axes = hidden_states_237_axes_0, x = hidden_states_235)[name = string("hidden_states_237")]; tensor var_4753 = const()[name = string("op_4753"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_239 = tile(reps = var_4753, x = hidden_states_237)[name = string("hidden_states_239")]; tensor var_4755 = const()[name = string("op_4755"), val = tensor([1, 3, 128, 256])]; tensor v_39 = reshape(shape = var_4755, x = hidden_states_239)[name = string("v_39")]; bool var_4760_transpose_x_1 = const()[name = string("op_4760_transpose_x_1"), val = bool(false)]; bool var_4760_transpose_y_1 = const()[name = string("op_4760_transpose_y_1"), val = bool(true)]; tensor var_4760_cast_fp16 = matmul(transpose_x = var_4760_transpose_x_1, transpose_y = var_4760_transpose_y_1, x = q_119, y = k_119)[name = string("op_4760_cast_fp16")]; fp16 var_4761_to_fp16 = const()[name = string("op_4761_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_115_cast_fp16 = mul(x = var_4760_cast_fp16, y = var_4761_to_fp16)[name = string("attn_weights_115_cast_fp16")]; tensor attn_weights_117_cast_fp16 = add(x = attn_weights_115_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_117_cast_fp16")]; tensor var_4765_cast_fp16 = softmax(axis = var_23, x = attn_weights_117_cast_fp16)[name = string("op_4765_cast_fp16")]; bool var_4769_transpose_x_0 = const()[name = string("op_4769_transpose_x_0"), val = bool(false)]; bool var_4769_transpose_y_0 = const()[name = string("op_4769_transpose_y_0"), val = bool(false)]; tensor var_4769_cast_fp16 = matmul(transpose_x = var_4769_transpose_x_0, transpose_y = var_4769_transpose_y_0, x = var_4765_cast_fp16, y = v_39)[name = string("op_4769_cast_fp16")]; tensor var_4771 = const()[name = string("op_4771"), val = tensor([0, 2, 1, 3])]; tensor var_4774 = const()[name = string("op_4774"), val = tensor([1, 128, 768])]; tensor var_4772 = transpose(perm = var_4771, x = var_4769_cast_fp16)[name = string("transpose_40")]; tensor attn_out_117 = reshape(shape = var_4774, x = var_4772)[name = string("attn_out_117")]; tensor var_4776 = const()[name = string("op_4776"), val = tensor([0, 2, 1])]; tensor squeeze_19_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305622528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306212416))))[name = string("squeeze_19_quantized")]; string var_4785_pad_type_0 = const()[name = string("op_4785_pad_type_0"), val = string("valid")]; int32 var_4785_groups_0 = const()[name = string("op_4785_groups_0"), val = int32(1)]; tensor var_4785_strides_0 = const()[name = string("op_4785_strides_0"), val = tensor([1])]; tensor var_4785_pad_0 = const()[name = string("op_4785_pad_0"), val = tensor([0, 0])]; tensor var_4785_dilations_0 = const()[name = string("op_4785_dilations_0"), val = tensor([1])]; tensor var_4777 = transpose(perm = var_4776, x = attn_out_117)[name = string("transpose_39")]; tensor var_4785 = conv(dilations = var_4785_dilations_0, groups = var_4785_groups_0, pad = var_4785_pad_0, pad_type = var_4785_pad_type_0, strides = var_4785_strides_0, weight = squeeze_19_quantized, x = var_4777)[name = string("op_4785")]; tensor var_4786 = const()[name = string("op_4786"), val = tensor([0, 2, 1])]; fp16 const_274_promoted_to_fp16 = const()[name = string("const_274_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_313 = transpose(perm = var_4786, x = var_4785)[name = string("transpose_38")]; tensor var_4790_cast_fp16 = mul(x = x_313, y = const_274_promoted_to_fp16)[name = string("op_4790_cast_fp16")]; bool input_391_interleave_0 = const()[name = string("input_391_interleave_0"), val = bool(false)]; tensor input_391_cast_fp16 = concat(axis = var_23, interleave = input_391_interleave_0, values = (x_313, var_4790_cast_fp16))[name = string("input_391_cast_fp16")]; tensor normed_547_axes_0 = const()[name = string("normed_547_axes_0"), val = tensor([-1])]; tensor normed_547_cast_fp16 = layer_norm(axes = normed_547_axes_0, epsilon = var_8_to_fp16, x = input_391_cast_fp16)[name = string("normed_547_cast_fp16")]; tensor var_4795_split_sizes_0 = const()[name = string("op_4795_split_sizes_0"), val = tensor([768, 768])]; int32 var_4795_axis_0 = const()[name = string("op_4795_axis_0"), val = int32(-1)]; tensor var_4795_cast_fp16_0, tensor var_4795_cast_fp16_1 = split(axis = var_4795_axis_0, split_sizes = var_4795_split_sizes_0, x = normed_547_cast_fp16)[name = string("op_4795_cast_fp16")]; tensor var_4799_to_fp16 = const()[name = string("op_4799_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306214016)))]; tensor out_235_cast_fp16 = mul(x = var_4795_cast_fp16_0, y = var_4799_to_fp16)[name = string("out_235_cast_fp16")]; tensor x_315_cast_fp16 = add(x = x_305_cast_fp16, y = out_235_cast_fp16)[name = string("x_315_cast_fp16")]; fp16 const_276_promoted_to_fp16 = const()[name = string("const_276_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4806_cast_fp16 = mul(x = x_315_cast_fp16, y = const_276_promoted_to_fp16)[name = string("op_4806_cast_fp16")]; bool input_393_interleave_0 = const()[name = string("input_393_interleave_0"), val = bool(false)]; tensor input_393_cast_fp16 = concat(axis = var_23, interleave = input_393_interleave_0, values = (x_315_cast_fp16, var_4806_cast_fp16))[name = string("input_393_cast_fp16")]; tensor normed_551_axes_0 = const()[name = string("normed_551_axes_0"), val = tensor([-1])]; tensor normed_551_cast_fp16 = layer_norm(axes = normed_551_axes_0, epsilon = var_8_to_fp16, x = input_393_cast_fp16)[name = string("normed_551_cast_fp16")]; tensor var_4811_split_sizes_0 = const()[name = string("op_4811_split_sizes_0"), val = tensor([768, 768])]; int32 var_4811_axis_0 = const()[name = string("op_4811_axis_0"), val = int32(-1)]; tensor var_4811_cast_fp16_0, tensor var_4811_cast_fp16_1 = split(axis = var_4811_axis_0, split_sizes = var_4811_split_sizes_0, x = normed_551_cast_fp16)[name = string("op_4811_cast_fp16")]; tensor var_4815_to_fp16 = const()[name = string("op_4815_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306215616)))]; tensor out_237_cast_fp16 = mul(x = var_4811_cast_fp16_0, y = var_4815_to_fp16)[name = string("out_237_cast_fp16")]; tensor var_4822 = const()[name = string("op_4822"), val = tensor([0, 2, 1])]; tensor input_395_axes_0 = const()[name = string("input_395_axes_0"), val = tensor([2])]; tensor var_4823 = transpose(perm = var_4822, x = out_237_cast_fp16)[name = string("transpose_37")]; tensor input_395 = expand_dims(axes = input_395_axes_0, x = var_4823)[name = string("input_395")]; string gate_77_pad_type_0 = const()[name = string("gate_77_pad_type_0"), val = string("valid")]; tensor gate_77_strides_0 = const()[name = string("gate_77_strides_0"), val = tensor([1, 1])]; tensor gate_77_pad_0 = const()[name = string("gate_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_77_dilations_0 = const()[name = string("gate_77_dilations_0"), val = tensor([1, 1])]; int32 gate_77_groups_0 = const()[name = string("gate_77_groups_0"), val = int32(1)]; tensor gate_77 = conv(dilations = gate_77_dilations_0, groups = gate_77_groups_0, pad = gate_77_pad_0, pad_type = gate_77_pad_type_0, strides = gate_77_strides_0, weight = encoder_layers_19_mlp_gate_proj_weight_quantized, x = input_395)[name = string("gate_77")]; string up_39_pad_type_0 = const()[name = string("up_39_pad_type_0"), val = string("valid")]; tensor up_39_strides_0 = const()[name = string("up_39_strides_0"), val = tensor([1, 1])]; tensor up_39_pad_0 = const()[name = string("up_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_39_dilations_0 = const()[name = string("up_39_dilations_0"), val = tensor([1, 1])]; int32 up_39_groups_0 = const()[name = string("up_39_groups_0"), val = int32(1)]; tensor up_39 = conv(dilations = up_39_dilations_0, groups = up_39_groups_0, pad = up_39_pad_0, pad_type = up_39_pad_type_0, strides = up_39_strides_0, weight = encoder_layers_19_mlp_up_proj_weight_quantized, x = input_395)[name = string("up_39")]; string gate_79_mode_0 = const()[name = string("gate_79_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_79 = gelu(mode = gate_79_mode_0, x = gate_77)[name = string("gate_79")]; tensor input_397 = mul(x = gate_79, y = up_39)[name = string("input_397")]; string var_4844_pad_type_0 = const()[name = string("op_4844_pad_type_0"), val = string("valid")]; tensor var_4844_strides_0 = const()[name = string("op_4844_strides_0"), val = tensor([1, 1])]; tensor var_4844_pad_0 = const()[name = string("op_4844_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4844_dilations_0 = const()[name = string("op_4844_dilations_0"), val = tensor([1, 1])]; int32 var_4844_groups_0 = const()[name = string("op_4844_groups_0"), val = int32(1)]; tensor var_4844 = conv(dilations = var_4844_dilations_0, groups = var_4844_groups_0, pad = var_4844_pad_0, pad_type = var_4844_pad_type_0, strides = var_4844_strides_0, weight = encoder_layers_19_mlp_down_proj_weight_quantized, x = input_397)[name = string("op_4844")]; tensor var_4845_axes_0 = const()[name = string("op_4845_axes_0"), val = tensor([2])]; tensor var_4845 = squeeze(axes = var_4845_axes_0, x = var_4844)[name = string("op_4845")]; tensor var_4846 = const()[name = string("op_4846"), val = tensor([0, 2, 1])]; fp16 const_278_promoted_to_fp16 = const()[name = string("const_278_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_319 = transpose(perm = var_4846, x = var_4845)[name = string("transpose_36")]; tensor var_4850_cast_fp16 = mul(x = x_319, y = const_278_promoted_to_fp16)[name = string("op_4850_cast_fp16")]; bool input_399_interleave_0 = const()[name = string("input_399_interleave_0"), val = bool(false)]; tensor input_399_cast_fp16 = concat(axis = var_23, interleave = input_399_interleave_0, values = (x_319, var_4850_cast_fp16))[name = string("input_399_cast_fp16")]; tensor normed_557_axes_0 = const()[name = string("normed_557_axes_0"), val = tensor([-1])]; tensor normed_557_cast_fp16 = layer_norm(axes = normed_557_axes_0, epsilon = var_8_to_fp16, x = input_399_cast_fp16)[name = string("normed_557_cast_fp16")]; tensor var_4855_split_sizes_0 = const()[name = string("op_4855_split_sizes_0"), val = tensor([768, 768])]; int32 var_4855_axis_0 = const()[name = string("op_4855_axis_0"), val = int32(-1)]; tensor var_4855_cast_fp16_0, tensor var_4855_cast_fp16_1 = split(axis = var_4855_axis_0, split_sizes = var_4855_split_sizes_0, x = normed_557_cast_fp16)[name = string("op_4855_cast_fp16")]; tensor var_4859_to_fp16 = const()[name = string("op_4859_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306217216)))]; tensor out_239_cast_fp16 = mul(x = var_4855_cast_fp16_0, y = var_4859_to_fp16)[name = string("out_239_cast_fp16")]; tensor x_321_cast_fp16 = add(x = x_315_cast_fp16, y = out_239_cast_fp16)[name = string("x_321_cast_fp16")]; fp16 const_280_promoted_to_fp16 = const()[name = string("const_280_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4888_cast_fp16 = mul(x = x_321_cast_fp16, y = const_280_promoted_to_fp16)[name = string("op_4888_cast_fp16")]; bool input_401_interleave_0 = const()[name = string("input_401_interleave_0"), val = bool(false)]; tensor input_401_cast_fp16 = concat(axis = var_23, interleave = input_401_interleave_0, values = (x_321_cast_fp16, var_4888_cast_fp16))[name = string("input_401_cast_fp16")]; tensor normed_561_axes_0 = const()[name = string("normed_561_axes_0"), val = tensor([-1])]; tensor normed_561_cast_fp16 = layer_norm(axes = normed_561_axes_0, epsilon = var_8_to_fp16, x = input_401_cast_fp16)[name = string("normed_561_cast_fp16")]; tensor var_4893_split_sizes_0 = const()[name = string("op_4893_split_sizes_0"), val = tensor([768, 768])]; int32 var_4893_axis_0 = const()[name = string("op_4893_axis_0"), val = int32(-1)]; tensor var_4893_cast_fp16_0, tensor var_4893_cast_fp16_1 = split(axis = var_4893_axis_0, split_sizes = var_4893_split_sizes_0, x = normed_561_cast_fp16)[name = string("op_4893_cast_fp16")]; tensor var_4897_to_fp16 = const()[name = string("op_4897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306218816)))]; tensor out_241_cast_fp16 = mul(x = var_4893_cast_fp16_0, y = var_4897_to_fp16)[name = string("out_241_cast_fp16")]; tensor var_4903 = const()[name = string("op_4903"), val = tensor([0, 2, 1])]; tensor var_4905_axes_0 = const()[name = string("op_4905_axes_0"), val = tensor([2])]; tensor var_4904_cast_fp16 = transpose(perm = var_4903, x = out_241_cast_fp16)[name = string("transpose_35")]; tensor var_4905_cast_fp16 = expand_dims(axes = var_4905_axes_0, x = var_4904_cast_fp16)[name = string("op_4905_cast_fp16")]; string var_4912_pad_type_0 = const()[name = string("op_4912_pad_type_0"), val = string("valid")]; tensor var_4912_strides_0 = const()[name = string("op_4912_strides_0"), val = tensor([1, 1])]; tensor var_4912_pad_0 = const()[name = string("op_4912_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4912_dilations_0 = const()[name = string("op_4912_dilations_0"), val = tensor([1, 1])]; int32 var_4912_groups_0 = const()[name = string("op_4912_groups_0"), val = int32(1)]; tensor var_4912 = conv(dilations = var_4912_dilations_0, groups = var_4912_groups_0, pad = var_4912_pad_0, pad_type = var_4912_pad_type_0, strides = var_4912_strides_0, weight = encoder_layers_20_self_attn_q_proj_weight_quantized, x = var_4905_cast_fp16)[name = string("op_4912")]; tensor var_4913 = const()[name = string("op_4913"), val = tensor([1, 3, 256, 128])]; tensor var_4914 = reshape(shape = var_4913, x = var_4912)[name = string("op_4914")]; tensor var_4915 = const()[name = string("op_4915"), val = tensor([0, 1, 3, 2])]; string var_4922_pad_type_0 = const()[name = string("op_4922_pad_type_0"), val = string("valid")]; tensor var_4922_strides_0 = const()[name = string("op_4922_strides_0"), val = tensor([1, 1])]; tensor var_4922_pad_0 = const()[name = string("op_4922_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4922_dilations_0 = const()[name = string("op_4922_dilations_0"), val = tensor([1, 1])]; int32 var_4922_groups_0 = const()[name = string("op_4922_groups_0"), val = int32(1)]; tensor var_4922 = conv(dilations = var_4922_dilations_0, groups = var_4922_groups_0, pad = var_4922_pad_0, pad_type = var_4922_pad_type_0, strides = var_4922_strides_0, weight = encoder_layers_20_self_attn_k_proj_weight_quantized, x = var_4905_cast_fp16)[name = string("op_4922")]; tensor var_4923 = const()[name = string("op_4923"), val = tensor([1, 1, 256, 128])]; tensor var_4924 = reshape(shape = var_4923, x = var_4922)[name = string("op_4924")]; tensor var_4925 = const()[name = string("op_4925"), val = tensor([0, 1, 3, 2])]; string var_4932_pad_type_0 = const()[name = string("op_4932_pad_type_0"), val = string("valid")]; tensor var_4932_strides_0 = const()[name = string("op_4932_strides_0"), val = tensor([1, 1])]; tensor var_4932_pad_0 = const()[name = string("op_4932_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4932_dilations_0 = const()[name = string("op_4932_dilations_0"), val = tensor([1, 1])]; int32 var_4932_groups_0 = const()[name = string("op_4932_groups_0"), val = int32(1)]; tensor var_4932 = conv(dilations = var_4932_dilations_0, groups = var_4932_groups_0, pad = var_4932_pad_0, pad_type = var_4932_pad_type_0, strides = var_4932_strides_0, weight = encoder_layers_20_self_attn_v_proj_weight_quantized, x = var_4905_cast_fp16)[name = string("op_4932")]; tensor var_4933 = const()[name = string("op_4933"), val = tensor([1, 1, 256, 128])]; tensor var_4934 = reshape(shape = var_4933, x = var_4932)[name = string("op_4934")]; tensor var_4935 = const()[name = string("op_4935"), val = tensor([0, 1, 3, 2])]; fp16 const_282_promoted_to_fp16 = const()[name = string("const_282_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_121 = transpose(perm = var_4915, x = var_4914)[name = string("transpose_34")]; tensor var_4941_cast_fp16 = mul(x = q_121, y = const_282_promoted_to_fp16)[name = string("op_4941_cast_fp16")]; bool input_405_interleave_0 = const()[name = string("input_405_interleave_0"), val = bool(false)]; tensor input_405_cast_fp16 = concat(axis = var_23, interleave = input_405_interleave_0, values = (q_121, var_4941_cast_fp16))[name = string("input_405_cast_fp16")]; tensor normed_567_axes_0 = const()[name = string("normed_567_axes_0"), val = tensor([-1])]; tensor normed_567_cast_fp16 = layer_norm(axes = normed_567_axes_0, epsilon = var_8_to_fp16, x = input_405_cast_fp16)[name = string("normed_567_cast_fp16")]; tensor var_4946_split_sizes_0 = const()[name = string("op_4946_split_sizes_0"), val = tensor([256, 256])]; int32 var_4946_axis_0 = const()[name = string("op_4946_axis_0"), val = int32(-1)]; tensor var_4946_cast_fp16_0, tensor var_4946_cast_fp16_1 = split(axis = var_4946_axis_0, split_sizes = var_4946_split_sizes_0, x = normed_567_cast_fp16)[name = string("op_4946_cast_fp16")]; tensor var_4950_to_fp16 = const()[name = string("op_4950_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306220416)))]; tensor out_243_cast_fp16 = mul(x = var_4946_cast_fp16_0, y = var_4950_to_fp16)[name = string("out_243_cast_fp16")]; fp16 const_284_promoted_to_fp16 = const()[name = string("const_284_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_121 = transpose(perm = var_4925, x = var_4924)[name = string("transpose_33")]; tensor var_4957_cast_fp16 = mul(x = k_121, y = const_284_promoted_to_fp16)[name = string("op_4957_cast_fp16")]; bool input_407_interleave_0 = const()[name = string("input_407_interleave_0"), val = bool(false)]; tensor input_407_cast_fp16 = concat(axis = var_23, interleave = input_407_interleave_0, values = (k_121, var_4957_cast_fp16))[name = string("input_407_cast_fp16")]; tensor normed_571_axes_0 = const()[name = string("normed_571_axes_0"), val = tensor([-1])]; tensor normed_571_cast_fp16 = layer_norm(axes = normed_571_axes_0, epsilon = var_8_to_fp16, x = input_407_cast_fp16)[name = string("normed_571_cast_fp16")]; tensor var_4962_split_sizes_0 = const()[name = string("op_4962_split_sizes_0"), val = tensor([256, 256])]; int32 var_4962_axis_0 = const()[name = string("op_4962_axis_0"), val = int32(-1)]; tensor var_4962_cast_fp16_0, tensor var_4962_cast_fp16_1 = split(axis = var_4962_axis_0, split_sizes = var_4962_split_sizes_0, x = normed_571_cast_fp16)[name = string("op_4962_cast_fp16")]; tensor var_4966_to_fp16 = const()[name = string("op_4966_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306220992)))]; tensor out_245_cast_fp16 = mul(x = var_4962_cast_fp16_0, y = var_4966_to_fp16)[name = string("out_245_cast_fp16")]; tensor var_4969 = mul(x = out_243_cast_fp16, y = cos_1_quantized)[name = string("op_4969")]; tensor var_4970_split_sizes_0 = const()[name = string("op_4970_split_sizes_0"), val = tensor([128, 128])]; int32 var_4970_axis_0 = const()[name = string("op_4970_axis_0"), val = int32(-1)]; tensor var_4970_0, tensor var_4970_1 = split(axis = var_4970_axis_0, split_sizes = var_4970_split_sizes_0, x = out_243_cast_fp16)[name = string("op_4970")]; fp16 const_286_promoted = const()[name = string("const_286_promoted"), val = fp16(-0x1p+0)]; tensor var_4972 = mul(x = var_4970_1, y = const_286_promoted)[name = string("op_4972")]; bool var_4974_interleave_0 = const()[name = string("op_4974_interleave_0"), val = bool(false)]; tensor var_4974 = concat(axis = var_23, interleave = var_4974_interleave_0, values = (var_4972, var_4970_0))[name = string("op_4974")]; tensor var_4975 = mul(x = var_4974, y = sin_1_quantized)[name = string("op_4975")]; tensor q_125 = add(x = var_4969, y = var_4975)[name = string("q_125")]; tensor var_4977 = mul(x = out_245_cast_fp16, y = cos_1_quantized)[name = string("op_4977")]; tensor var_4978_split_sizes_0 = const()[name = string("op_4978_split_sizes_0"), val = tensor([128, 128])]; int32 var_4978_axis_0 = const()[name = string("op_4978_axis_0"), val = int32(-1)]; tensor var_4978_0, tensor var_4978_1 = split(axis = var_4978_axis_0, split_sizes = var_4978_split_sizes_0, x = out_245_cast_fp16)[name = string("op_4978")]; fp16 const_287_promoted = const()[name = string("const_287_promoted"), val = fp16(-0x1p+0)]; tensor var_4980 = mul(x = var_4978_1, y = const_287_promoted)[name = string("op_4980")]; bool var_4982_interleave_0 = const()[name = string("op_4982_interleave_0"), val = bool(false)]; tensor var_4982 = concat(axis = var_23, interleave = var_4982_interleave_0, values = (var_4980, var_4978_0))[name = string("op_4982")]; tensor var_4983 = mul(x = var_4982, y = sin_1_quantized)[name = string("op_4983")]; tensor hidden_states_241 = add(x = var_4977, y = var_4983)[name = string("hidden_states_241")]; tensor hidden_states_243_axes_0 = const()[name = string("hidden_states_243_axes_0"), val = tensor([2])]; tensor hidden_states_243 = expand_dims(axes = hidden_states_243_axes_0, x = hidden_states_241)[name = string("hidden_states_243")]; tensor var_4986 = const()[name = string("op_4986"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_245 = tile(reps = var_4986, x = hidden_states_243)[name = string("hidden_states_245")]; tensor var_4988 = const()[name = string("op_4988"), val = tensor([1, 3, 128, 256])]; tensor k_125 = reshape(shape = var_4988, x = hidden_states_245)[name = string("k_125")]; tensor hidden_states_249_axes_0 = const()[name = string("hidden_states_249_axes_0"), val = tensor([2])]; tensor hidden_states_247 = transpose(perm = var_4935, x = var_4934)[name = string("transpose_32")]; tensor hidden_states_249 = expand_dims(axes = hidden_states_249_axes_0, x = hidden_states_247)[name = string("hidden_states_249")]; tensor var_4991 = const()[name = string("op_4991"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_251 = tile(reps = var_4991, x = hidden_states_249)[name = string("hidden_states_251")]; tensor var_4993 = const()[name = string("op_4993"), val = tensor([1, 3, 128, 256])]; tensor v_41 = reshape(shape = var_4993, x = hidden_states_251)[name = string("v_41")]; bool var_4998_transpose_x_1 = const()[name = string("op_4998_transpose_x_1"), val = bool(false)]; bool var_4998_transpose_y_1 = const()[name = string("op_4998_transpose_y_1"), val = bool(true)]; tensor var_4998_cast_fp16 = matmul(transpose_x = var_4998_transpose_x_1, transpose_y = var_4998_transpose_y_1, x = q_125, y = k_125)[name = string("op_4998_cast_fp16")]; fp16 var_4999_to_fp16 = const()[name = string("op_4999_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_121_cast_fp16 = mul(x = var_4998_cast_fp16, y = var_4999_to_fp16)[name = string("attn_weights_121_cast_fp16")]; tensor attn_weights_123_cast_fp16 = add(x = attn_weights_121_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_123_cast_fp16")]; tensor var_5003_cast_fp16 = softmax(axis = var_23, x = attn_weights_123_cast_fp16)[name = string("op_5003_cast_fp16")]; bool var_5007_transpose_x_0 = const()[name = string("op_5007_transpose_x_0"), val = bool(false)]; bool var_5007_transpose_y_0 = const()[name = string("op_5007_transpose_y_0"), val = bool(false)]; tensor var_5007_cast_fp16 = matmul(transpose_x = var_5007_transpose_x_0, transpose_y = var_5007_transpose_y_0, x = var_5003_cast_fp16, y = v_41)[name = string("op_5007_cast_fp16")]; tensor var_5009 = const()[name = string("op_5009"), val = tensor([0, 2, 1, 3])]; tensor var_5012 = const()[name = string("op_5012"), val = tensor([1, 128, 768])]; tensor var_5010 = transpose(perm = var_5009, x = var_5007_cast_fp16)[name = string("transpose_31")]; tensor attn_out_123 = reshape(shape = var_5012, x = var_5010)[name = string("attn_out_123")]; tensor var_5014 = const()[name = string("op_5014"), val = tensor([0, 2, 1])]; tensor squeeze_20_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306221568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306811456))))[name = string("squeeze_20_quantized")]; string var_5023_pad_type_0 = const()[name = string("op_5023_pad_type_0"), val = string("valid")]; int32 var_5023_groups_0 = const()[name = string("op_5023_groups_0"), val = int32(1)]; tensor var_5023_strides_0 = const()[name = string("op_5023_strides_0"), val = tensor([1])]; tensor var_5023_pad_0 = const()[name = string("op_5023_pad_0"), val = tensor([0, 0])]; tensor var_5023_dilations_0 = const()[name = string("op_5023_dilations_0"), val = tensor([1])]; tensor var_5015 = transpose(perm = var_5014, x = attn_out_123)[name = string("transpose_30")]; tensor var_5023 = conv(dilations = var_5023_dilations_0, groups = var_5023_groups_0, pad = var_5023_pad_0, pad_type = var_5023_pad_type_0, strides = var_5023_strides_0, weight = squeeze_20_quantized, x = var_5015)[name = string("op_5023")]; tensor var_5024 = const()[name = string("op_5024"), val = tensor([0, 2, 1])]; fp16 const_288_promoted_to_fp16 = const()[name = string("const_288_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_329 = transpose(perm = var_5024, x = var_5023)[name = string("transpose_29")]; tensor var_5028_cast_fp16 = mul(x = x_329, y = const_288_promoted_to_fp16)[name = string("op_5028_cast_fp16")]; bool input_411_interleave_0 = const()[name = string("input_411_interleave_0"), val = bool(false)]; tensor input_411_cast_fp16 = concat(axis = var_23, interleave = input_411_interleave_0, values = (x_329, var_5028_cast_fp16))[name = string("input_411_cast_fp16")]; tensor normed_575_axes_0 = const()[name = string("normed_575_axes_0"), val = tensor([-1])]; tensor normed_575_cast_fp16 = layer_norm(axes = normed_575_axes_0, epsilon = var_8_to_fp16, x = input_411_cast_fp16)[name = string("normed_575_cast_fp16")]; tensor var_5033_split_sizes_0 = const()[name = string("op_5033_split_sizes_0"), val = tensor([768, 768])]; int32 var_5033_axis_0 = const()[name = string("op_5033_axis_0"), val = int32(-1)]; tensor var_5033_cast_fp16_0, tensor var_5033_cast_fp16_1 = split(axis = var_5033_axis_0, split_sizes = var_5033_split_sizes_0, x = normed_575_cast_fp16)[name = string("op_5033_cast_fp16")]; tensor var_5037_to_fp16 = const()[name = string("op_5037_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306813056)))]; tensor out_247_cast_fp16 = mul(x = var_5033_cast_fp16_0, y = var_5037_to_fp16)[name = string("out_247_cast_fp16")]; tensor x_331_cast_fp16 = add(x = x_321_cast_fp16, y = out_247_cast_fp16)[name = string("x_331_cast_fp16")]; fp16 const_290_promoted_to_fp16 = const()[name = string("const_290_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5044_cast_fp16 = mul(x = x_331_cast_fp16, y = const_290_promoted_to_fp16)[name = string("op_5044_cast_fp16")]; bool input_413_interleave_0 = const()[name = string("input_413_interleave_0"), val = bool(false)]; tensor input_413_cast_fp16 = concat(axis = var_23, interleave = input_413_interleave_0, values = (x_331_cast_fp16, var_5044_cast_fp16))[name = string("input_413_cast_fp16")]; tensor normed_579_axes_0 = const()[name = string("normed_579_axes_0"), val = tensor([-1])]; tensor normed_579_cast_fp16 = layer_norm(axes = normed_579_axes_0, epsilon = var_8_to_fp16, x = input_413_cast_fp16)[name = string("normed_579_cast_fp16")]; tensor var_5049_split_sizes_0 = const()[name = string("op_5049_split_sizes_0"), val = tensor([768, 768])]; int32 var_5049_axis_0 = const()[name = string("op_5049_axis_0"), val = int32(-1)]; tensor var_5049_cast_fp16_0, tensor var_5049_cast_fp16_1 = split(axis = var_5049_axis_0, split_sizes = var_5049_split_sizes_0, x = normed_579_cast_fp16)[name = string("op_5049_cast_fp16")]; tensor var_5053_to_fp16 = const()[name = string("op_5053_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306814656)))]; tensor out_249_cast_fp16 = mul(x = var_5049_cast_fp16_0, y = var_5053_to_fp16)[name = string("out_249_cast_fp16")]; tensor var_5060 = const()[name = string("op_5060"), val = tensor([0, 2, 1])]; tensor input_415_axes_0 = const()[name = string("input_415_axes_0"), val = tensor([2])]; tensor var_5061 = transpose(perm = var_5060, x = out_249_cast_fp16)[name = string("transpose_28")]; tensor input_415 = expand_dims(axes = input_415_axes_0, x = var_5061)[name = string("input_415")]; string gate_81_pad_type_0 = const()[name = string("gate_81_pad_type_0"), val = string("valid")]; tensor gate_81_strides_0 = const()[name = string("gate_81_strides_0"), val = tensor([1, 1])]; tensor gate_81_pad_0 = const()[name = string("gate_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_81_dilations_0 = const()[name = string("gate_81_dilations_0"), val = tensor([1, 1])]; int32 gate_81_groups_0 = const()[name = string("gate_81_groups_0"), val = int32(1)]; tensor gate_81 = conv(dilations = gate_81_dilations_0, groups = gate_81_groups_0, pad = gate_81_pad_0, pad_type = gate_81_pad_type_0, strides = gate_81_strides_0, weight = encoder_layers_20_mlp_gate_proj_weight_quantized, x = input_415)[name = string("gate_81")]; string up_41_pad_type_0 = const()[name = string("up_41_pad_type_0"), val = string("valid")]; tensor up_41_strides_0 = const()[name = string("up_41_strides_0"), val = tensor([1, 1])]; tensor up_41_pad_0 = const()[name = string("up_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_41_dilations_0 = const()[name = string("up_41_dilations_0"), val = tensor([1, 1])]; int32 up_41_groups_0 = const()[name = string("up_41_groups_0"), val = int32(1)]; tensor up_41 = conv(dilations = up_41_dilations_0, groups = up_41_groups_0, pad = up_41_pad_0, pad_type = up_41_pad_type_0, strides = up_41_strides_0, weight = encoder_layers_20_mlp_up_proj_weight_quantized, x = input_415)[name = string("up_41")]; string gate_83_mode_0 = const()[name = string("gate_83_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_83 = gelu(mode = gate_83_mode_0, x = gate_81)[name = string("gate_83")]; tensor input_417 = mul(x = gate_83, y = up_41)[name = string("input_417")]; string var_5082_pad_type_0 = const()[name = string("op_5082_pad_type_0"), val = string("valid")]; tensor var_5082_strides_0 = const()[name = string("op_5082_strides_0"), val = tensor([1, 1])]; tensor var_5082_pad_0 = const()[name = string("op_5082_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5082_dilations_0 = const()[name = string("op_5082_dilations_0"), val = tensor([1, 1])]; int32 var_5082_groups_0 = const()[name = string("op_5082_groups_0"), val = int32(1)]; tensor var_5082 = conv(dilations = var_5082_dilations_0, groups = var_5082_groups_0, pad = var_5082_pad_0, pad_type = var_5082_pad_type_0, strides = var_5082_strides_0, weight = encoder_layers_20_mlp_down_proj_weight_quantized, x = input_417)[name = string("op_5082")]; tensor var_5083_axes_0 = const()[name = string("op_5083_axes_0"), val = tensor([2])]; tensor var_5083 = squeeze(axes = var_5083_axes_0, x = var_5082)[name = string("op_5083")]; tensor var_5084 = const()[name = string("op_5084"), val = tensor([0, 2, 1])]; fp16 const_292_promoted_to_fp16 = const()[name = string("const_292_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_335 = transpose(perm = var_5084, x = var_5083)[name = string("transpose_27")]; tensor var_5088_cast_fp16 = mul(x = x_335, y = const_292_promoted_to_fp16)[name = string("op_5088_cast_fp16")]; bool input_419_interleave_0 = const()[name = string("input_419_interleave_0"), val = bool(false)]; tensor input_419_cast_fp16 = concat(axis = var_23, interleave = input_419_interleave_0, values = (x_335, var_5088_cast_fp16))[name = string("input_419_cast_fp16")]; tensor normed_585_axes_0 = const()[name = string("normed_585_axes_0"), val = tensor([-1])]; tensor normed_585_cast_fp16 = layer_norm(axes = normed_585_axes_0, epsilon = var_8_to_fp16, x = input_419_cast_fp16)[name = string("normed_585_cast_fp16")]; tensor var_5093_split_sizes_0 = const()[name = string("op_5093_split_sizes_0"), val = tensor([768, 768])]; int32 var_5093_axis_0 = const()[name = string("op_5093_axis_0"), val = int32(-1)]; tensor var_5093_cast_fp16_0, tensor var_5093_cast_fp16_1 = split(axis = var_5093_axis_0, split_sizes = var_5093_split_sizes_0, x = normed_585_cast_fp16)[name = string("op_5093_cast_fp16")]; tensor var_5097_to_fp16 = const()[name = string("op_5097_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306816256)))]; tensor out_251_cast_fp16 = mul(x = var_5093_cast_fp16_0, y = var_5097_to_fp16)[name = string("out_251_cast_fp16")]; tensor x_337_cast_fp16 = add(x = x_331_cast_fp16, y = out_251_cast_fp16)[name = string("x_337_cast_fp16")]; fp16 const_294_promoted_to_fp16 = const()[name = string("const_294_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5126_cast_fp16 = mul(x = x_337_cast_fp16, y = const_294_promoted_to_fp16)[name = string("op_5126_cast_fp16")]; bool input_421_interleave_0 = const()[name = string("input_421_interleave_0"), val = bool(false)]; tensor input_421_cast_fp16 = concat(axis = var_23, interleave = input_421_interleave_0, values = (x_337_cast_fp16, var_5126_cast_fp16))[name = string("input_421_cast_fp16")]; tensor normed_589_axes_0 = const()[name = string("normed_589_axes_0"), val = tensor([-1])]; tensor normed_589_cast_fp16 = layer_norm(axes = normed_589_axes_0, epsilon = var_8_to_fp16, x = input_421_cast_fp16)[name = string("normed_589_cast_fp16")]; tensor var_5131_split_sizes_0 = const()[name = string("op_5131_split_sizes_0"), val = tensor([768, 768])]; int32 var_5131_axis_0 = const()[name = string("op_5131_axis_0"), val = int32(-1)]; tensor var_5131_cast_fp16_0, tensor var_5131_cast_fp16_1 = split(axis = var_5131_axis_0, split_sizes = var_5131_split_sizes_0, x = normed_589_cast_fp16)[name = string("op_5131_cast_fp16")]; tensor var_5135_to_fp16 = const()[name = string("op_5135_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306817856)))]; tensor out_253_cast_fp16 = mul(x = var_5131_cast_fp16_0, y = var_5135_to_fp16)[name = string("out_253_cast_fp16")]; tensor var_5141 = const()[name = string("op_5141"), val = tensor([0, 2, 1])]; tensor var_5143_axes_0 = const()[name = string("op_5143_axes_0"), val = tensor([2])]; tensor var_5142_cast_fp16 = transpose(perm = var_5141, x = out_253_cast_fp16)[name = string("transpose_26")]; tensor var_5143_cast_fp16 = expand_dims(axes = var_5143_axes_0, x = var_5142_cast_fp16)[name = string("op_5143_cast_fp16")]; string var_5150_pad_type_0 = const()[name = string("op_5150_pad_type_0"), val = string("valid")]; tensor var_5150_strides_0 = const()[name = string("op_5150_strides_0"), val = tensor([1, 1])]; tensor var_5150_pad_0 = const()[name = string("op_5150_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5150_dilations_0 = const()[name = string("op_5150_dilations_0"), val = tensor([1, 1])]; int32 var_5150_groups_0 = const()[name = string("op_5150_groups_0"), val = int32(1)]; tensor var_5150 = conv(dilations = var_5150_dilations_0, groups = var_5150_groups_0, pad = var_5150_pad_0, pad_type = var_5150_pad_type_0, strides = var_5150_strides_0, weight = encoder_layers_21_self_attn_q_proj_weight_quantized, x = var_5143_cast_fp16)[name = string("op_5150")]; tensor var_5151 = const()[name = string("op_5151"), val = tensor([1, 3, 256, 128])]; tensor var_5152 = reshape(shape = var_5151, x = var_5150)[name = string("op_5152")]; tensor var_5153 = const()[name = string("op_5153"), val = tensor([0, 1, 3, 2])]; string var_5160_pad_type_0 = const()[name = string("op_5160_pad_type_0"), val = string("valid")]; tensor var_5160_strides_0 = const()[name = string("op_5160_strides_0"), val = tensor([1, 1])]; tensor var_5160_pad_0 = const()[name = string("op_5160_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5160_dilations_0 = const()[name = string("op_5160_dilations_0"), val = tensor([1, 1])]; int32 var_5160_groups_0 = const()[name = string("op_5160_groups_0"), val = int32(1)]; tensor var_5160 = conv(dilations = var_5160_dilations_0, groups = var_5160_groups_0, pad = var_5160_pad_0, pad_type = var_5160_pad_type_0, strides = var_5160_strides_0, weight = encoder_layers_21_self_attn_k_proj_weight_quantized, x = var_5143_cast_fp16)[name = string("op_5160")]; tensor var_5161 = const()[name = string("op_5161"), val = tensor([1, 1, 256, 128])]; tensor var_5162 = reshape(shape = var_5161, x = var_5160)[name = string("op_5162")]; tensor var_5163 = const()[name = string("op_5163"), val = tensor([0, 1, 3, 2])]; string var_5170_pad_type_0 = const()[name = string("op_5170_pad_type_0"), val = string("valid")]; tensor var_5170_strides_0 = const()[name = string("op_5170_strides_0"), val = tensor([1, 1])]; tensor var_5170_pad_0 = const()[name = string("op_5170_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5170_dilations_0 = const()[name = string("op_5170_dilations_0"), val = tensor([1, 1])]; int32 var_5170_groups_0 = const()[name = string("op_5170_groups_0"), val = int32(1)]; tensor var_5170 = conv(dilations = var_5170_dilations_0, groups = var_5170_groups_0, pad = var_5170_pad_0, pad_type = var_5170_pad_type_0, strides = var_5170_strides_0, weight = encoder_layers_21_self_attn_v_proj_weight_quantized, x = var_5143_cast_fp16)[name = string("op_5170")]; tensor var_5171 = const()[name = string("op_5171"), val = tensor([1, 1, 256, 128])]; tensor var_5172 = reshape(shape = var_5171, x = var_5170)[name = string("op_5172")]; tensor var_5173 = const()[name = string("op_5173"), val = tensor([0, 1, 3, 2])]; fp16 const_296_promoted_to_fp16 = const()[name = string("const_296_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_127 = transpose(perm = var_5153, x = var_5152)[name = string("transpose_25")]; tensor var_5179_cast_fp16 = mul(x = q_127, y = const_296_promoted_to_fp16)[name = string("op_5179_cast_fp16")]; bool input_425_interleave_0 = const()[name = string("input_425_interleave_0"), val = bool(false)]; tensor input_425_cast_fp16 = concat(axis = var_23, interleave = input_425_interleave_0, values = (q_127, var_5179_cast_fp16))[name = string("input_425_cast_fp16")]; tensor normed_595_axes_0 = const()[name = string("normed_595_axes_0"), val = tensor([-1])]; tensor normed_595_cast_fp16 = layer_norm(axes = normed_595_axes_0, epsilon = var_8_to_fp16, x = input_425_cast_fp16)[name = string("normed_595_cast_fp16")]; tensor var_5184_split_sizes_0 = const()[name = string("op_5184_split_sizes_0"), val = tensor([256, 256])]; int32 var_5184_axis_0 = const()[name = string("op_5184_axis_0"), val = int32(-1)]; tensor var_5184_cast_fp16_0, tensor var_5184_cast_fp16_1 = split(axis = var_5184_axis_0, split_sizes = var_5184_split_sizes_0, x = normed_595_cast_fp16)[name = string("op_5184_cast_fp16")]; tensor var_5188_to_fp16 = const()[name = string("op_5188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306819456)))]; tensor out_255_cast_fp16 = mul(x = var_5184_cast_fp16_0, y = var_5188_to_fp16)[name = string("out_255_cast_fp16")]; fp16 const_298_promoted_to_fp16 = const()[name = string("const_298_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_127 = transpose(perm = var_5163, x = var_5162)[name = string("transpose_24")]; tensor var_5195_cast_fp16 = mul(x = k_127, y = const_298_promoted_to_fp16)[name = string("op_5195_cast_fp16")]; bool input_427_interleave_0 = const()[name = string("input_427_interleave_0"), val = bool(false)]; tensor input_427_cast_fp16 = concat(axis = var_23, interleave = input_427_interleave_0, values = (k_127, var_5195_cast_fp16))[name = string("input_427_cast_fp16")]; tensor normed_599_axes_0 = const()[name = string("normed_599_axes_0"), val = tensor([-1])]; tensor normed_599_cast_fp16 = layer_norm(axes = normed_599_axes_0, epsilon = var_8_to_fp16, x = input_427_cast_fp16)[name = string("normed_599_cast_fp16")]; tensor var_5200_split_sizes_0 = const()[name = string("op_5200_split_sizes_0"), val = tensor([256, 256])]; int32 var_5200_axis_0 = const()[name = string("op_5200_axis_0"), val = int32(-1)]; tensor var_5200_cast_fp16_0, tensor var_5200_cast_fp16_1 = split(axis = var_5200_axis_0, split_sizes = var_5200_split_sizes_0, x = normed_599_cast_fp16)[name = string("op_5200_cast_fp16")]; tensor var_5204_to_fp16 = const()[name = string("op_5204_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306820032)))]; tensor out_257_cast_fp16 = mul(x = var_5200_cast_fp16_0, y = var_5204_to_fp16)[name = string("out_257_cast_fp16")]; tensor var_5207 = mul(x = out_255_cast_fp16, y = cos_1_quantized)[name = string("op_5207")]; tensor var_5208_split_sizes_0 = const()[name = string("op_5208_split_sizes_0"), val = tensor([128, 128])]; int32 var_5208_axis_0 = const()[name = string("op_5208_axis_0"), val = int32(-1)]; tensor var_5208_0, tensor var_5208_1 = split(axis = var_5208_axis_0, split_sizes = var_5208_split_sizes_0, x = out_255_cast_fp16)[name = string("op_5208")]; fp16 const_300_promoted = const()[name = string("const_300_promoted"), val = fp16(-0x1p+0)]; tensor var_5210 = mul(x = var_5208_1, y = const_300_promoted)[name = string("op_5210")]; bool var_5212_interleave_0 = const()[name = string("op_5212_interleave_0"), val = bool(false)]; tensor var_5212 = concat(axis = var_23, interleave = var_5212_interleave_0, values = (var_5210, var_5208_0))[name = string("op_5212")]; tensor var_5213 = mul(x = var_5212, y = sin_1_quantized)[name = string("op_5213")]; tensor q_131 = add(x = var_5207, y = var_5213)[name = string("q_131")]; tensor var_5215 = mul(x = out_257_cast_fp16, y = cos_1_quantized)[name = string("op_5215")]; tensor var_5216_split_sizes_0 = const()[name = string("op_5216_split_sizes_0"), val = tensor([128, 128])]; int32 var_5216_axis_0 = const()[name = string("op_5216_axis_0"), val = int32(-1)]; tensor var_5216_0, tensor var_5216_1 = split(axis = var_5216_axis_0, split_sizes = var_5216_split_sizes_0, x = out_257_cast_fp16)[name = string("op_5216")]; fp16 const_301_promoted = const()[name = string("const_301_promoted"), val = fp16(-0x1p+0)]; tensor var_5218 = mul(x = var_5216_1, y = const_301_promoted)[name = string("op_5218")]; bool var_5220_interleave_0 = const()[name = string("op_5220_interleave_0"), val = bool(false)]; tensor var_5220 = concat(axis = var_23, interleave = var_5220_interleave_0, values = (var_5218, var_5216_0))[name = string("op_5220")]; tensor var_5221 = mul(x = var_5220, y = sin_1_quantized)[name = string("op_5221")]; tensor hidden_states_253 = add(x = var_5215, y = var_5221)[name = string("hidden_states_253")]; tensor hidden_states_255_axes_0 = const()[name = string("hidden_states_255_axes_0"), val = tensor([2])]; tensor hidden_states_255 = expand_dims(axes = hidden_states_255_axes_0, x = hidden_states_253)[name = string("hidden_states_255")]; tensor var_5224 = const()[name = string("op_5224"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_257 = tile(reps = var_5224, x = hidden_states_255)[name = string("hidden_states_257")]; tensor var_5226 = const()[name = string("op_5226"), val = tensor([1, 3, 128, 256])]; tensor k_131 = reshape(shape = var_5226, x = hidden_states_257)[name = string("k_131")]; tensor hidden_states_261_axes_0 = const()[name = string("hidden_states_261_axes_0"), val = tensor([2])]; tensor hidden_states_259 = transpose(perm = var_5173, x = var_5172)[name = string("transpose_23")]; tensor hidden_states_261 = expand_dims(axes = hidden_states_261_axes_0, x = hidden_states_259)[name = string("hidden_states_261")]; tensor var_5229 = const()[name = string("op_5229"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_263 = tile(reps = var_5229, x = hidden_states_261)[name = string("hidden_states_263")]; tensor var_5231 = const()[name = string("op_5231"), val = tensor([1, 3, 128, 256])]; tensor v_43 = reshape(shape = var_5231, x = hidden_states_263)[name = string("v_43")]; bool var_5236_transpose_x_1 = const()[name = string("op_5236_transpose_x_1"), val = bool(false)]; bool var_5236_transpose_y_1 = const()[name = string("op_5236_transpose_y_1"), val = bool(true)]; tensor var_5236_cast_fp16 = matmul(transpose_x = var_5236_transpose_x_1, transpose_y = var_5236_transpose_y_1, x = q_131, y = k_131)[name = string("op_5236_cast_fp16")]; fp16 var_5237_to_fp16 = const()[name = string("op_5237_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_127_cast_fp16 = mul(x = var_5236_cast_fp16, y = var_5237_to_fp16)[name = string("attn_weights_127_cast_fp16")]; tensor attn_weights_129_cast_fp16 = add(x = attn_weights_127_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_129_cast_fp16")]; tensor var_5241_cast_fp16 = softmax(axis = var_23, x = attn_weights_129_cast_fp16)[name = string("op_5241_cast_fp16")]; bool var_5245_transpose_x_0 = const()[name = string("op_5245_transpose_x_0"), val = bool(false)]; bool var_5245_transpose_y_0 = const()[name = string("op_5245_transpose_y_0"), val = bool(false)]; tensor var_5245_cast_fp16 = matmul(transpose_x = var_5245_transpose_x_0, transpose_y = var_5245_transpose_y_0, x = var_5241_cast_fp16, y = v_43)[name = string("op_5245_cast_fp16")]; tensor var_5247 = const()[name = string("op_5247"), val = tensor([0, 2, 1, 3])]; tensor var_5250 = const()[name = string("op_5250"), val = tensor([1, 128, 768])]; tensor var_5248 = transpose(perm = var_5247, x = var_5245_cast_fp16)[name = string("transpose_22")]; tensor attn_out_129 = reshape(shape = var_5250, x = var_5248)[name = string("attn_out_129")]; tensor var_5252 = const()[name = string("op_5252"), val = tensor([0, 2, 1])]; tensor squeeze_21_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306820608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307410496))))[name = string("squeeze_21_quantized")]; string var_5261_pad_type_0 = const()[name = string("op_5261_pad_type_0"), val = string("valid")]; int32 var_5261_groups_0 = const()[name = string("op_5261_groups_0"), val = int32(1)]; tensor var_5261_strides_0 = const()[name = string("op_5261_strides_0"), val = tensor([1])]; tensor var_5261_pad_0 = const()[name = string("op_5261_pad_0"), val = tensor([0, 0])]; tensor var_5261_dilations_0 = const()[name = string("op_5261_dilations_0"), val = tensor([1])]; tensor var_5253 = transpose(perm = var_5252, x = attn_out_129)[name = string("transpose_21")]; tensor var_5261 = conv(dilations = var_5261_dilations_0, groups = var_5261_groups_0, pad = var_5261_pad_0, pad_type = var_5261_pad_type_0, strides = var_5261_strides_0, weight = squeeze_21_quantized, x = var_5253)[name = string("op_5261")]; tensor var_5262 = const()[name = string("op_5262"), val = tensor([0, 2, 1])]; fp16 const_302_promoted_to_fp16 = const()[name = string("const_302_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_345 = transpose(perm = var_5262, x = var_5261)[name = string("transpose_20")]; tensor var_5266_cast_fp16 = mul(x = x_345, y = const_302_promoted_to_fp16)[name = string("op_5266_cast_fp16")]; bool input_431_interleave_0 = const()[name = string("input_431_interleave_0"), val = bool(false)]; tensor input_431_cast_fp16 = concat(axis = var_23, interleave = input_431_interleave_0, values = (x_345, var_5266_cast_fp16))[name = string("input_431_cast_fp16")]; tensor normed_603_axes_0 = const()[name = string("normed_603_axes_0"), val = tensor([-1])]; tensor normed_603_cast_fp16 = layer_norm(axes = normed_603_axes_0, epsilon = var_8_to_fp16, x = input_431_cast_fp16)[name = string("normed_603_cast_fp16")]; tensor var_5271_split_sizes_0 = const()[name = string("op_5271_split_sizes_0"), val = tensor([768, 768])]; int32 var_5271_axis_0 = const()[name = string("op_5271_axis_0"), val = int32(-1)]; tensor var_5271_cast_fp16_0, tensor var_5271_cast_fp16_1 = split(axis = var_5271_axis_0, split_sizes = var_5271_split_sizes_0, x = normed_603_cast_fp16)[name = string("op_5271_cast_fp16")]; tensor var_5275_to_fp16 = const()[name = string("op_5275_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307412096)))]; tensor out_259_cast_fp16 = mul(x = var_5271_cast_fp16_0, y = var_5275_to_fp16)[name = string("out_259_cast_fp16")]; tensor x_347_cast_fp16 = add(x = x_337_cast_fp16, y = out_259_cast_fp16)[name = string("x_347_cast_fp16")]; fp16 const_304_promoted_to_fp16 = const()[name = string("const_304_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5282_cast_fp16 = mul(x = x_347_cast_fp16, y = const_304_promoted_to_fp16)[name = string("op_5282_cast_fp16")]; bool input_433_interleave_0 = const()[name = string("input_433_interleave_0"), val = bool(false)]; tensor input_433_cast_fp16 = concat(axis = var_23, interleave = input_433_interleave_0, values = (x_347_cast_fp16, var_5282_cast_fp16))[name = string("input_433_cast_fp16")]; tensor normed_607_axes_0 = const()[name = string("normed_607_axes_0"), val = tensor([-1])]; tensor normed_607_cast_fp16 = layer_norm(axes = normed_607_axes_0, epsilon = var_8_to_fp16, x = input_433_cast_fp16)[name = string("normed_607_cast_fp16")]; tensor var_5287_split_sizes_0 = const()[name = string("op_5287_split_sizes_0"), val = tensor([768, 768])]; int32 var_5287_axis_0 = const()[name = string("op_5287_axis_0"), val = int32(-1)]; tensor var_5287_cast_fp16_0, tensor var_5287_cast_fp16_1 = split(axis = var_5287_axis_0, split_sizes = var_5287_split_sizes_0, x = normed_607_cast_fp16)[name = string("op_5287_cast_fp16")]; tensor var_5291_to_fp16 = const()[name = string("op_5291_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307413696)))]; tensor out_261_cast_fp16 = mul(x = var_5287_cast_fp16_0, y = var_5291_to_fp16)[name = string("out_261_cast_fp16")]; tensor var_5298 = const()[name = string("op_5298"), val = tensor([0, 2, 1])]; tensor input_435_axes_0 = const()[name = string("input_435_axes_0"), val = tensor([2])]; tensor var_5299 = transpose(perm = var_5298, x = out_261_cast_fp16)[name = string("transpose_19")]; tensor input_435 = expand_dims(axes = input_435_axes_0, x = var_5299)[name = string("input_435")]; string gate_85_pad_type_0 = const()[name = string("gate_85_pad_type_0"), val = string("valid")]; tensor gate_85_strides_0 = const()[name = string("gate_85_strides_0"), val = tensor([1, 1])]; tensor gate_85_pad_0 = const()[name = string("gate_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_85_dilations_0 = const()[name = string("gate_85_dilations_0"), val = tensor([1, 1])]; int32 gate_85_groups_0 = const()[name = string("gate_85_groups_0"), val = int32(1)]; tensor gate_85 = conv(dilations = gate_85_dilations_0, groups = gate_85_groups_0, pad = gate_85_pad_0, pad_type = gate_85_pad_type_0, strides = gate_85_strides_0, weight = encoder_layers_21_mlp_gate_proj_weight_quantized, x = input_435)[name = string("gate_85")]; string up_43_pad_type_0 = const()[name = string("up_43_pad_type_0"), val = string("valid")]; tensor up_43_strides_0 = const()[name = string("up_43_strides_0"), val = tensor([1, 1])]; tensor up_43_pad_0 = const()[name = string("up_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_43_dilations_0 = const()[name = string("up_43_dilations_0"), val = tensor([1, 1])]; int32 up_43_groups_0 = const()[name = string("up_43_groups_0"), val = int32(1)]; tensor up_43 = conv(dilations = up_43_dilations_0, groups = up_43_groups_0, pad = up_43_pad_0, pad_type = up_43_pad_type_0, strides = up_43_strides_0, weight = encoder_layers_21_mlp_up_proj_weight_quantized, x = input_435)[name = string("up_43")]; string gate_87_mode_0 = const()[name = string("gate_87_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_87 = gelu(mode = gate_87_mode_0, x = gate_85)[name = string("gate_87")]; tensor input_437 = mul(x = gate_87, y = up_43)[name = string("input_437")]; string var_5320_pad_type_0 = const()[name = string("op_5320_pad_type_0"), val = string("valid")]; tensor var_5320_strides_0 = const()[name = string("op_5320_strides_0"), val = tensor([1, 1])]; tensor var_5320_pad_0 = const()[name = string("op_5320_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5320_dilations_0 = const()[name = string("op_5320_dilations_0"), val = tensor([1, 1])]; int32 var_5320_groups_0 = const()[name = string("op_5320_groups_0"), val = int32(1)]; tensor var_5320 = conv(dilations = var_5320_dilations_0, groups = var_5320_groups_0, pad = var_5320_pad_0, pad_type = var_5320_pad_type_0, strides = var_5320_strides_0, weight = encoder_layers_21_mlp_down_proj_weight_quantized, x = input_437)[name = string("op_5320")]; tensor var_5321_axes_0 = const()[name = string("op_5321_axes_0"), val = tensor([2])]; tensor var_5321 = squeeze(axes = var_5321_axes_0, x = var_5320)[name = string("op_5321")]; tensor var_5322 = const()[name = string("op_5322"), val = tensor([0, 2, 1])]; fp16 const_306_promoted_to_fp16 = const()[name = string("const_306_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_351 = transpose(perm = var_5322, x = var_5321)[name = string("transpose_18")]; tensor var_5326_cast_fp16 = mul(x = x_351, y = const_306_promoted_to_fp16)[name = string("op_5326_cast_fp16")]; bool input_439_interleave_0 = const()[name = string("input_439_interleave_0"), val = bool(false)]; tensor input_439_cast_fp16 = concat(axis = var_23, interleave = input_439_interleave_0, values = (x_351, var_5326_cast_fp16))[name = string("input_439_cast_fp16")]; tensor normed_613_axes_0 = const()[name = string("normed_613_axes_0"), val = tensor([-1])]; tensor normed_613_cast_fp16 = layer_norm(axes = normed_613_axes_0, epsilon = var_8_to_fp16, x = input_439_cast_fp16)[name = string("normed_613_cast_fp16")]; tensor var_5331_split_sizes_0 = const()[name = string("op_5331_split_sizes_0"), val = tensor([768, 768])]; int32 var_5331_axis_0 = const()[name = string("op_5331_axis_0"), val = int32(-1)]; tensor var_5331_cast_fp16_0, tensor var_5331_cast_fp16_1 = split(axis = var_5331_axis_0, split_sizes = var_5331_split_sizes_0, x = normed_613_cast_fp16)[name = string("op_5331_cast_fp16")]; tensor var_5335_to_fp16 = const()[name = string("op_5335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307415296)))]; tensor out_263_cast_fp16 = mul(x = var_5331_cast_fp16_0, y = var_5335_to_fp16)[name = string("out_263_cast_fp16")]; tensor x_353_cast_fp16 = add(x = x_347_cast_fp16, y = out_263_cast_fp16)[name = string("x_353_cast_fp16")]; fp16 const_308_promoted_to_fp16 = const()[name = string("const_308_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5364_cast_fp16 = mul(x = x_353_cast_fp16, y = const_308_promoted_to_fp16)[name = string("op_5364_cast_fp16")]; bool input_441_interleave_0 = const()[name = string("input_441_interleave_0"), val = bool(false)]; tensor input_441_cast_fp16 = concat(axis = var_23, interleave = input_441_interleave_0, values = (x_353_cast_fp16, var_5364_cast_fp16))[name = string("input_441_cast_fp16")]; tensor normed_617_axes_0 = const()[name = string("normed_617_axes_0"), val = tensor([-1])]; tensor normed_617_cast_fp16 = layer_norm(axes = normed_617_axes_0, epsilon = var_8_to_fp16, x = input_441_cast_fp16)[name = string("normed_617_cast_fp16")]; tensor var_5369_split_sizes_0 = const()[name = string("op_5369_split_sizes_0"), val = tensor([768, 768])]; int32 var_5369_axis_0 = const()[name = string("op_5369_axis_0"), val = int32(-1)]; tensor var_5369_cast_fp16_0, tensor var_5369_cast_fp16_1 = split(axis = var_5369_axis_0, split_sizes = var_5369_split_sizes_0, x = normed_617_cast_fp16)[name = string("op_5369_cast_fp16")]; tensor var_5373_to_fp16 = const()[name = string("op_5373_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307416896)))]; tensor out_265_cast_fp16 = mul(x = var_5369_cast_fp16_0, y = var_5373_to_fp16)[name = string("out_265_cast_fp16")]; tensor var_5379 = const()[name = string("op_5379"), val = tensor([0, 2, 1])]; tensor var_5381_axes_0 = const()[name = string("op_5381_axes_0"), val = tensor([2])]; tensor var_5380_cast_fp16 = transpose(perm = var_5379, x = out_265_cast_fp16)[name = string("transpose_17")]; tensor var_5381_cast_fp16 = expand_dims(axes = var_5381_axes_0, x = var_5380_cast_fp16)[name = string("op_5381_cast_fp16")]; string var_5388_pad_type_0 = const()[name = string("op_5388_pad_type_0"), val = string("valid")]; tensor var_5388_strides_0 = const()[name = string("op_5388_strides_0"), val = tensor([1, 1])]; tensor var_5388_pad_0 = const()[name = string("op_5388_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5388_dilations_0 = const()[name = string("op_5388_dilations_0"), val = tensor([1, 1])]; int32 var_5388_groups_0 = const()[name = string("op_5388_groups_0"), val = int32(1)]; tensor var_5388 = conv(dilations = var_5388_dilations_0, groups = var_5388_groups_0, pad = var_5388_pad_0, pad_type = var_5388_pad_type_0, strides = var_5388_strides_0, weight = encoder_layers_22_self_attn_q_proj_weight_quantized, x = var_5381_cast_fp16)[name = string("op_5388")]; tensor var_5389 = const()[name = string("op_5389"), val = tensor([1, 3, 256, 128])]; tensor var_5390 = reshape(shape = var_5389, x = var_5388)[name = string("op_5390")]; tensor var_5391 = const()[name = string("op_5391"), val = tensor([0, 1, 3, 2])]; string var_5398_pad_type_0 = const()[name = string("op_5398_pad_type_0"), val = string("valid")]; tensor var_5398_strides_0 = const()[name = string("op_5398_strides_0"), val = tensor([1, 1])]; tensor var_5398_pad_0 = const()[name = string("op_5398_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5398_dilations_0 = const()[name = string("op_5398_dilations_0"), val = tensor([1, 1])]; int32 var_5398_groups_0 = const()[name = string("op_5398_groups_0"), val = int32(1)]; tensor var_5398 = conv(dilations = var_5398_dilations_0, groups = var_5398_groups_0, pad = var_5398_pad_0, pad_type = var_5398_pad_type_0, strides = var_5398_strides_0, weight = encoder_layers_22_self_attn_k_proj_weight_quantized, x = var_5381_cast_fp16)[name = string("op_5398")]; tensor var_5399 = const()[name = string("op_5399"), val = tensor([1, 1, 256, 128])]; tensor var_5400 = reshape(shape = var_5399, x = var_5398)[name = string("op_5400")]; tensor var_5401 = const()[name = string("op_5401"), val = tensor([0, 1, 3, 2])]; string var_5408_pad_type_0 = const()[name = string("op_5408_pad_type_0"), val = string("valid")]; tensor var_5408_strides_0 = const()[name = string("op_5408_strides_0"), val = tensor([1, 1])]; tensor var_5408_pad_0 = const()[name = string("op_5408_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5408_dilations_0 = const()[name = string("op_5408_dilations_0"), val = tensor([1, 1])]; int32 var_5408_groups_0 = const()[name = string("op_5408_groups_0"), val = int32(1)]; tensor var_5408 = conv(dilations = var_5408_dilations_0, groups = var_5408_groups_0, pad = var_5408_pad_0, pad_type = var_5408_pad_type_0, strides = var_5408_strides_0, weight = encoder_layers_22_self_attn_v_proj_weight_quantized, x = var_5381_cast_fp16)[name = string("op_5408")]; tensor var_5409 = const()[name = string("op_5409"), val = tensor([1, 1, 256, 128])]; tensor var_5410 = reshape(shape = var_5409, x = var_5408)[name = string("op_5410")]; tensor var_5411 = const()[name = string("op_5411"), val = tensor([0, 1, 3, 2])]; fp16 const_310_promoted_to_fp16 = const()[name = string("const_310_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_133 = transpose(perm = var_5391, x = var_5390)[name = string("transpose_16")]; tensor var_5417_cast_fp16 = mul(x = q_133, y = const_310_promoted_to_fp16)[name = string("op_5417_cast_fp16")]; bool input_445_interleave_0 = const()[name = string("input_445_interleave_0"), val = bool(false)]; tensor input_445_cast_fp16 = concat(axis = var_23, interleave = input_445_interleave_0, values = (q_133, var_5417_cast_fp16))[name = string("input_445_cast_fp16")]; tensor normed_623_axes_0 = const()[name = string("normed_623_axes_0"), val = tensor([-1])]; tensor normed_623_cast_fp16 = layer_norm(axes = normed_623_axes_0, epsilon = var_8_to_fp16, x = input_445_cast_fp16)[name = string("normed_623_cast_fp16")]; tensor var_5422_split_sizes_0 = const()[name = string("op_5422_split_sizes_0"), val = tensor([256, 256])]; int32 var_5422_axis_0 = const()[name = string("op_5422_axis_0"), val = int32(-1)]; tensor var_5422_cast_fp16_0, tensor var_5422_cast_fp16_1 = split(axis = var_5422_axis_0, split_sizes = var_5422_split_sizes_0, x = normed_623_cast_fp16)[name = string("op_5422_cast_fp16")]; tensor var_5426_to_fp16 = const()[name = string("op_5426_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307418496)))]; tensor out_267_cast_fp16 = mul(x = var_5422_cast_fp16_0, y = var_5426_to_fp16)[name = string("out_267_cast_fp16")]; fp16 const_312_promoted_to_fp16 = const()[name = string("const_312_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_133 = transpose(perm = var_5401, x = var_5400)[name = string("transpose_15")]; tensor var_5433_cast_fp16 = mul(x = k_133, y = const_312_promoted_to_fp16)[name = string("op_5433_cast_fp16")]; bool input_447_interleave_0 = const()[name = string("input_447_interleave_0"), val = bool(false)]; tensor input_447_cast_fp16 = concat(axis = var_23, interleave = input_447_interleave_0, values = (k_133, var_5433_cast_fp16))[name = string("input_447_cast_fp16")]; tensor normed_627_axes_0 = const()[name = string("normed_627_axes_0"), val = tensor([-1])]; tensor normed_627_cast_fp16 = layer_norm(axes = normed_627_axes_0, epsilon = var_8_to_fp16, x = input_447_cast_fp16)[name = string("normed_627_cast_fp16")]; tensor var_5438_split_sizes_0 = const()[name = string("op_5438_split_sizes_0"), val = tensor([256, 256])]; int32 var_5438_axis_0 = const()[name = string("op_5438_axis_0"), val = int32(-1)]; tensor var_5438_cast_fp16_0, tensor var_5438_cast_fp16_1 = split(axis = var_5438_axis_0, split_sizes = var_5438_split_sizes_0, x = normed_627_cast_fp16)[name = string("op_5438_cast_fp16")]; tensor var_5442_to_fp16 = const()[name = string("op_5442_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307419072)))]; tensor out_269_cast_fp16 = mul(x = var_5438_cast_fp16_0, y = var_5442_to_fp16)[name = string("out_269_cast_fp16")]; tensor var_5445 = mul(x = out_267_cast_fp16, y = cos_1_quantized)[name = string("op_5445")]; tensor var_5446_split_sizes_0 = const()[name = string("op_5446_split_sizes_0"), val = tensor([128, 128])]; int32 var_5446_axis_0 = const()[name = string("op_5446_axis_0"), val = int32(-1)]; tensor var_5446_0, tensor var_5446_1 = split(axis = var_5446_axis_0, split_sizes = var_5446_split_sizes_0, x = out_267_cast_fp16)[name = string("op_5446")]; fp16 const_314_promoted = const()[name = string("const_314_promoted"), val = fp16(-0x1p+0)]; tensor var_5448 = mul(x = var_5446_1, y = const_314_promoted)[name = string("op_5448")]; bool var_5450_interleave_0 = const()[name = string("op_5450_interleave_0"), val = bool(false)]; tensor var_5450 = concat(axis = var_23, interleave = var_5450_interleave_0, values = (var_5448, var_5446_0))[name = string("op_5450")]; tensor var_5451 = mul(x = var_5450, y = sin_1_quantized)[name = string("op_5451")]; tensor q_137 = add(x = var_5445, y = var_5451)[name = string("q_137")]; tensor var_5453 = mul(x = out_269_cast_fp16, y = cos_1_quantized)[name = string("op_5453")]; tensor var_5454_split_sizes_0 = const()[name = string("op_5454_split_sizes_0"), val = tensor([128, 128])]; int32 var_5454_axis_0 = const()[name = string("op_5454_axis_0"), val = int32(-1)]; tensor var_5454_0, tensor var_5454_1 = split(axis = var_5454_axis_0, split_sizes = var_5454_split_sizes_0, x = out_269_cast_fp16)[name = string("op_5454")]; fp16 const_315_promoted = const()[name = string("const_315_promoted"), val = fp16(-0x1p+0)]; tensor var_5456 = mul(x = var_5454_1, y = const_315_promoted)[name = string("op_5456")]; bool var_5458_interleave_0 = const()[name = string("op_5458_interleave_0"), val = bool(false)]; tensor var_5458 = concat(axis = var_23, interleave = var_5458_interleave_0, values = (var_5456, var_5454_0))[name = string("op_5458")]; tensor var_5459 = mul(x = var_5458, y = sin_1_quantized)[name = string("op_5459")]; tensor hidden_states_265 = add(x = var_5453, y = var_5459)[name = string("hidden_states_265")]; tensor hidden_states_267_axes_0 = const()[name = string("hidden_states_267_axes_0"), val = tensor([2])]; tensor hidden_states_267 = expand_dims(axes = hidden_states_267_axes_0, x = hidden_states_265)[name = string("hidden_states_267")]; tensor var_5462 = const()[name = string("op_5462"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_269 = tile(reps = var_5462, x = hidden_states_267)[name = string("hidden_states_269")]; tensor var_5464 = const()[name = string("op_5464"), val = tensor([1, 3, 128, 256])]; tensor k_137 = reshape(shape = var_5464, x = hidden_states_269)[name = string("k_137")]; tensor hidden_states_273_axes_0 = const()[name = string("hidden_states_273_axes_0"), val = tensor([2])]; tensor hidden_states_271 = transpose(perm = var_5411, x = var_5410)[name = string("transpose_14")]; tensor hidden_states_273 = expand_dims(axes = hidden_states_273_axes_0, x = hidden_states_271)[name = string("hidden_states_273")]; tensor var_5467 = const()[name = string("op_5467"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_275 = tile(reps = var_5467, x = hidden_states_273)[name = string("hidden_states_275")]; tensor var_5469 = const()[name = string("op_5469"), val = tensor([1, 3, 128, 256])]; tensor v_45 = reshape(shape = var_5469, x = hidden_states_275)[name = string("v_45")]; bool var_5474_transpose_x_1 = const()[name = string("op_5474_transpose_x_1"), val = bool(false)]; bool var_5474_transpose_y_1 = const()[name = string("op_5474_transpose_y_1"), val = bool(true)]; tensor var_5474_cast_fp16 = matmul(transpose_x = var_5474_transpose_x_1, transpose_y = var_5474_transpose_y_1, x = q_137, y = k_137)[name = string("op_5474_cast_fp16")]; fp16 var_5475_to_fp16 = const()[name = string("op_5475_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_133_cast_fp16 = mul(x = var_5474_cast_fp16, y = var_5475_to_fp16)[name = string("attn_weights_133_cast_fp16")]; tensor attn_weights_135_cast_fp16 = add(x = attn_weights_133_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_135_cast_fp16")]; tensor var_5479_cast_fp16 = softmax(axis = var_23, x = attn_weights_135_cast_fp16)[name = string("op_5479_cast_fp16")]; bool var_5483_transpose_x_0 = const()[name = string("op_5483_transpose_x_0"), val = bool(false)]; bool var_5483_transpose_y_0 = const()[name = string("op_5483_transpose_y_0"), val = bool(false)]; tensor var_5483_cast_fp16 = matmul(transpose_x = var_5483_transpose_x_0, transpose_y = var_5483_transpose_y_0, x = var_5479_cast_fp16, y = v_45)[name = string("op_5483_cast_fp16")]; tensor var_5485 = const()[name = string("op_5485"), val = tensor([0, 2, 1, 3])]; tensor var_5488 = const()[name = string("op_5488"), val = tensor([1, 128, 768])]; tensor var_5486 = transpose(perm = var_5485, x = var_5483_cast_fp16)[name = string("transpose_13")]; tensor attn_out_135 = reshape(shape = var_5488, x = var_5486)[name = string("attn_out_135")]; tensor var_5490 = const()[name = string("op_5490"), val = tensor([0, 2, 1])]; tensor squeeze_22_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307419648))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308009536))))[name = string("squeeze_22_quantized")]; string var_5499_pad_type_0 = const()[name = string("op_5499_pad_type_0"), val = string("valid")]; int32 var_5499_groups_0 = const()[name = string("op_5499_groups_0"), val = int32(1)]; tensor var_5499_strides_0 = const()[name = string("op_5499_strides_0"), val = tensor([1])]; tensor var_5499_pad_0 = const()[name = string("op_5499_pad_0"), val = tensor([0, 0])]; tensor var_5499_dilations_0 = const()[name = string("op_5499_dilations_0"), val = tensor([1])]; tensor var_5491 = transpose(perm = var_5490, x = attn_out_135)[name = string("transpose_12")]; tensor var_5499 = conv(dilations = var_5499_dilations_0, groups = var_5499_groups_0, pad = var_5499_pad_0, pad_type = var_5499_pad_type_0, strides = var_5499_strides_0, weight = squeeze_22_quantized, x = var_5491)[name = string("op_5499")]; tensor var_5500 = const()[name = string("op_5500"), val = tensor([0, 2, 1])]; fp16 const_316_promoted_to_fp16 = const()[name = string("const_316_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_361 = transpose(perm = var_5500, x = var_5499)[name = string("transpose_11")]; tensor var_5504_cast_fp16 = mul(x = x_361, y = const_316_promoted_to_fp16)[name = string("op_5504_cast_fp16")]; bool input_451_interleave_0 = const()[name = string("input_451_interleave_0"), val = bool(false)]; tensor input_451_cast_fp16 = concat(axis = var_23, interleave = input_451_interleave_0, values = (x_361, var_5504_cast_fp16))[name = string("input_451_cast_fp16")]; tensor normed_631_axes_0 = const()[name = string("normed_631_axes_0"), val = tensor([-1])]; tensor normed_631_cast_fp16 = layer_norm(axes = normed_631_axes_0, epsilon = var_8_to_fp16, x = input_451_cast_fp16)[name = string("normed_631_cast_fp16")]; tensor var_5509_split_sizes_0 = const()[name = string("op_5509_split_sizes_0"), val = tensor([768, 768])]; int32 var_5509_axis_0 = const()[name = string("op_5509_axis_0"), val = int32(-1)]; tensor var_5509_cast_fp16_0, tensor var_5509_cast_fp16_1 = split(axis = var_5509_axis_0, split_sizes = var_5509_split_sizes_0, x = normed_631_cast_fp16)[name = string("op_5509_cast_fp16")]; tensor var_5513_to_fp16 = const()[name = string("op_5513_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308011136)))]; tensor out_271_cast_fp16 = mul(x = var_5509_cast_fp16_0, y = var_5513_to_fp16)[name = string("out_271_cast_fp16")]; tensor x_363_cast_fp16 = add(x = x_353_cast_fp16, y = out_271_cast_fp16)[name = string("x_363_cast_fp16")]; fp16 const_318_promoted_to_fp16 = const()[name = string("const_318_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5520_cast_fp16 = mul(x = x_363_cast_fp16, y = const_318_promoted_to_fp16)[name = string("op_5520_cast_fp16")]; bool input_453_interleave_0 = const()[name = string("input_453_interleave_0"), val = bool(false)]; tensor input_453_cast_fp16 = concat(axis = var_23, interleave = input_453_interleave_0, values = (x_363_cast_fp16, var_5520_cast_fp16))[name = string("input_453_cast_fp16")]; tensor normed_635_axes_0 = const()[name = string("normed_635_axes_0"), val = tensor([-1])]; tensor normed_635_cast_fp16 = layer_norm(axes = normed_635_axes_0, epsilon = var_8_to_fp16, x = input_453_cast_fp16)[name = string("normed_635_cast_fp16")]; tensor var_5525_split_sizes_0 = const()[name = string("op_5525_split_sizes_0"), val = tensor([768, 768])]; int32 var_5525_axis_0 = const()[name = string("op_5525_axis_0"), val = int32(-1)]; tensor var_5525_cast_fp16_0, tensor var_5525_cast_fp16_1 = split(axis = var_5525_axis_0, split_sizes = var_5525_split_sizes_0, x = normed_635_cast_fp16)[name = string("op_5525_cast_fp16")]; tensor var_5529_to_fp16 = const()[name = string("op_5529_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308012736)))]; tensor out_273_cast_fp16 = mul(x = var_5525_cast_fp16_0, y = var_5529_to_fp16)[name = string("out_273_cast_fp16")]; tensor var_5536 = const()[name = string("op_5536"), val = tensor([0, 2, 1])]; tensor input_455_axes_0 = const()[name = string("input_455_axes_0"), val = tensor([2])]; tensor var_5537 = transpose(perm = var_5536, x = out_273_cast_fp16)[name = string("transpose_10")]; tensor input_455 = expand_dims(axes = input_455_axes_0, x = var_5537)[name = string("input_455")]; string gate_89_pad_type_0 = const()[name = string("gate_89_pad_type_0"), val = string("valid")]; tensor gate_89_strides_0 = const()[name = string("gate_89_strides_0"), val = tensor([1, 1])]; tensor gate_89_pad_0 = const()[name = string("gate_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_89_dilations_0 = const()[name = string("gate_89_dilations_0"), val = tensor([1, 1])]; int32 gate_89_groups_0 = const()[name = string("gate_89_groups_0"), val = int32(1)]; tensor gate_89 = conv(dilations = gate_89_dilations_0, groups = gate_89_groups_0, pad = gate_89_pad_0, pad_type = gate_89_pad_type_0, strides = gate_89_strides_0, weight = encoder_layers_22_mlp_gate_proj_weight_quantized, x = input_455)[name = string("gate_89")]; string up_45_pad_type_0 = const()[name = string("up_45_pad_type_0"), val = string("valid")]; tensor up_45_strides_0 = const()[name = string("up_45_strides_0"), val = tensor([1, 1])]; tensor up_45_pad_0 = const()[name = string("up_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_45_dilations_0 = const()[name = string("up_45_dilations_0"), val = tensor([1, 1])]; int32 up_45_groups_0 = const()[name = string("up_45_groups_0"), val = int32(1)]; tensor up_45 = conv(dilations = up_45_dilations_0, groups = up_45_groups_0, pad = up_45_pad_0, pad_type = up_45_pad_type_0, strides = up_45_strides_0, weight = encoder_layers_22_mlp_up_proj_weight_quantized, x = input_455)[name = string("up_45")]; string gate_91_mode_0 = const()[name = string("gate_91_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_91 = gelu(mode = gate_91_mode_0, x = gate_89)[name = string("gate_91")]; tensor input_457 = mul(x = gate_91, y = up_45)[name = string("input_457")]; string var_5558_pad_type_0 = const()[name = string("op_5558_pad_type_0"), val = string("valid")]; tensor var_5558_strides_0 = const()[name = string("op_5558_strides_0"), val = tensor([1, 1])]; tensor var_5558_pad_0 = const()[name = string("op_5558_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5558_dilations_0 = const()[name = string("op_5558_dilations_0"), val = tensor([1, 1])]; int32 var_5558_groups_0 = const()[name = string("op_5558_groups_0"), val = int32(1)]; tensor var_5558 = conv(dilations = var_5558_dilations_0, groups = var_5558_groups_0, pad = var_5558_pad_0, pad_type = var_5558_pad_type_0, strides = var_5558_strides_0, weight = encoder_layers_22_mlp_down_proj_weight_quantized, x = input_457)[name = string("op_5558")]; tensor var_5559_axes_0 = const()[name = string("op_5559_axes_0"), val = tensor([2])]; tensor var_5559 = squeeze(axes = var_5559_axes_0, x = var_5558)[name = string("op_5559")]; tensor var_5560 = const()[name = string("op_5560"), val = tensor([0, 2, 1])]; fp16 const_320_promoted_to_fp16 = const()[name = string("const_320_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_367 = transpose(perm = var_5560, x = var_5559)[name = string("transpose_9")]; tensor var_5564_cast_fp16 = mul(x = x_367, y = const_320_promoted_to_fp16)[name = string("op_5564_cast_fp16")]; bool input_459_interleave_0 = const()[name = string("input_459_interleave_0"), val = bool(false)]; tensor input_459_cast_fp16 = concat(axis = var_23, interleave = input_459_interleave_0, values = (x_367, var_5564_cast_fp16))[name = string("input_459_cast_fp16")]; tensor normed_641_axes_0 = const()[name = string("normed_641_axes_0"), val = tensor([-1])]; tensor normed_641_cast_fp16 = layer_norm(axes = normed_641_axes_0, epsilon = var_8_to_fp16, x = input_459_cast_fp16)[name = string("normed_641_cast_fp16")]; tensor var_5569_split_sizes_0 = const()[name = string("op_5569_split_sizes_0"), val = tensor([768, 768])]; int32 var_5569_axis_0 = const()[name = string("op_5569_axis_0"), val = int32(-1)]; tensor var_5569_cast_fp16_0, tensor var_5569_cast_fp16_1 = split(axis = var_5569_axis_0, split_sizes = var_5569_split_sizes_0, x = normed_641_cast_fp16)[name = string("op_5569_cast_fp16")]; tensor var_5573_to_fp16 = const()[name = string("op_5573_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308014336)))]; tensor out_275_cast_fp16 = mul(x = var_5569_cast_fp16_0, y = var_5573_to_fp16)[name = string("out_275_cast_fp16")]; tensor x_369_cast_fp16 = add(x = x_363_cast_fp16, y = out_275_cast_fp16)[name = string("x_369_cast_fp16")]; fp16 const_322_promoted_to_fp16 = const()[name = string("const_322_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5602_cast_fp16 = mul(x = x_369_cast_fp16, y = const_322_promoted_to_fp16)[name = string("op_5602_cast_fp16")]; bool input_461_interleave_0 = const()[name = string("input_461_interleave_0"), val = bool(false)]; tensor input_461_cast_fp16 = concat(axis = var_23, interleave = input_461_interleave_0, values = (x_369_cast_fp16, var_5602_cast_fp16))[name = string("input_461_cast_fp16")]; tensor normed_645_axes_0 = const()[name = string("normed_645_axes_0"), val = tensor([-1])]; tensor normed_645_cast_fp16 = layer_norm(axes = normed_645_axes_0, epsilon = var_8_to_fp16, x = input_461_cast_fp16)[name = string("normed_645_cast_fp16")]; tensor var_5607_split_sizes_0 = const()[name = string("op_5607_split_sizes_0"), val = tensor([768, 768])]; int32 var_5607_axis_0 = const()[name = string("op_5607_axis_0"), val = int32(-1)]; tensor var_5607_cast_fp16_0, tensor var_5607_cast_fp16_1 = split(axis = var_5607_axis_0, split_sizes = var_5607_split_sizes_0, x = normed_645_cast_fp16)[name = string("op_5607_cast_fp16")]; tensor var_5611_to_fp16 = const()[name = string("op_5611_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308015936)))]; tensor out_277_cast_fp16 = mul(x = var_5607_cast_fp16_0, y = var_5611_to_fp16)[name = string("out_277_cast_fp16")]; tensor var_5617 = const()[name = string("op_5617"), val = tensor([0, 2, 1])]; tensor var_5619_axes_0 = const()[name = string("op_5619_axes_0"), val = tensor([2])]; tensor var_5618_cast_fp16 = transpose(perm = var_5617, x = out_277_cast_fp16)[name = string("transpose_8")]; tensor var_5619_cast_fp16 = expand_dims(axes = var_5619_axes_0, x = var_5618_cast_fp16)[name = string("op_5619_cast_fp16")]; string var_5626_pad_type_0 = const()[name = string("op_5626_pad_type_0"), val = string("valid")]; tensor var_5626_strides_0 = const()[name = string("op_5626_strides_0"), val = tensor([1, 1])]; tensor var_5626_pad_0 = const()[name = string("op_5626_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5626_dilations_0 = const()[name = string("op_5626_dilations_0"), val = tensor([1, 1])]; int32 var_5626_groups_0 = const()[name = string("op_5626_groups_0"), val = int32(1)]; tensor var_5626 = conv(dilations = var_5626_dilations_0, groups = var_5626_groups_0, pad = var_5626_pad_0, pad_type = var_5626_pad_type_0, strides = var_5626_strides_0, weight = encoder_layers_23_self_attn_q_proj_weight_quantized, x = var_5619_cast_fp16)[name = string("op_5626")]; tensor var_5627 = const()[name = string("op_5627"), val = tensor([1, 3, 256, 128])]; tensor var_5628 = reshape(shape = var_5627, x = var_5626)[name = string("op_5628")]; tensor var_5629 = const()[name = string("op_5629"), val = tensor([0, 1, 3, 2])]; string var_5636_pad_type_0 = const()[name = string("op_5636_pad_type_0"), val = string("valid")]; tensor var_5636_strides_0 = const()[name = string("op_5636_strides_0"), val = tensor([1, 1])]; tensor var_5636_pad_0 = const()[name = string("op_5636_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5636_dilations_0 = const()[name = string("op_5636_dilations_0"), val = tensor([1, 1])]; int32 var_5636_groups_0 = const()[name = string("op_5636_groups_0"), val = int32(1)]; tensor var_5636 = conv(dilations = var_5636_dilations_0, groups = var_5636_groups_0, pad = var_5636_pad_0, pad_type = var_5636_pad_type_0, strides = var_5636_strides_0, weight = encoder_layers_23_self_attn_k_proj_weight_quantized, x = var_5619_cast_fp16)[name = string("op_5636")]; tensor var_5637 = const()[name = string("op_5637"), val = tensor([1, 1, 256, 128])]; tensor var_5638 = reshape(shape = var_5637, x = var_5636)[name = string("op_5638")]; tensor var_5639 = const()[name = string("op_5639"), val = tensor([0, 1, 3, 2])]; string var_5646_pad_type_0 = const()[name = string("op_5646_pad_type_0"), val = string("valid")]; tensor var_5646_strides_0 = const()[name = string("op_5646_strides_0"), val = tensor([1, 1])]; tensor var_5646_pad_0 = const()[name = string("op_5646_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5646_dilations_0 = const()[name = string("op_5646_dilations_0"), val = tensor([1, 1])]; int32 var_5646_groups_0 = const()[name = string("op_5646_groups_0"), val = int32(1)]; tensor var_5646 = conv(dilations = var_5646_dilations_0, groups = var_5646_groups_0, pad = var_5646_pad_0, pad_type = var_5646_pad_type_0, strides = var_5646_strides_0, weight = encoder_layers_23_self_attn_v_proj_weight_quantized, x = var_5619_cast_fp16)[name = string("op_5646")]; tensor var_5647 = const()[name = string("op_5647"), val = tensor([1, 1, 256, 128])]; tensor var_5648 = reshape(shape = var_5647, x = var_5646)[name = string("op_5648")]; tensor var_5649 = const()[name = string("op_5649"), val = tensor([0, 1, 3, 2])]; fp16 const_324_promoted_to_fp16 = const()[name = string("const_324_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_139 = transpose(perm = var_5629, x = var_5628)[name = string("transpose_7")]; tensor var_5655_cast_fp16 = mul(x = q_139, y = const_324_promoted_to_fp16)[name = string("op_5655_cast_fp16")]; bool input_465_interleave_0 = const()[name = string("input_465_interleave_0"), val = bool(false)]; tensor input_465_cast_fp16 = concat(axis = var_23, interleave = input_465_interleave_0, values = (q_139, var_5655_cast_fp16))[name = string("input_465_cast_fp16")]; tensor normed_651_axes_0 = const()[name = string("normed_651_axes_0"), val = tensor([-1])]; tensor normed_651_cast_fp16 = layer_norm(axes = normed_651_axes_0, epsilon = var_8_to_fp16, x = input_465_cast_fp16)[name = string("normed_651_cast_fp16")]; tensor var_5660_split_sizes_0 = const()[name = string("op_5660_split_sizes_0"), val = tensor([256, 256])]; int32 var_5660_axis_0 = const()[name = string("op_5660_axis_0"), val = int32(-1)]; tensor var_5660_cast_fp16_0, tensor var_5660_cast_fp16_1 = split(axis = var_5660_axis_0, split_sizes = var_5660_split_sizes_0, x = normed_651_cast_fp16)[name = string("op_5660_cast_fp16")]; tensor var_5664_to_fp16 = const()[name = string("op_5664_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308017536)))]; tensor out_279_cast_fp16 = mul(x = var_5660_cast_fp16_0, y = var_5664_to_fp16)[name = string("out_279_cast_fp16")]; fp16 const_326_promoted_to_fp16 = const()[name = string("const_326_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_139 = transpose(perm = var_5639, x = var_5638)[name = string("transpose_6")]; tensor var_5671_cast_fp16 = mul(x = k_139, y = const_326_promoted_to_fp16)[name = string("op_5671_cast_fp16")]; bool input_467_interleave_0 = const()[name = string("input_467_interleave_0"), val = bool(false)]; tensor input_467_cast_fp16 = concat(axis = var_23, interleave = input_467_interleave_0, values = (k_139, var_5671_cast_fp16))[name = string("input_467_cast_fp16")]; tensor normed_655_axes_0 = const()[name = string("normed_655_axes_0"), val = tensor([-1])]; tensor normed_655_cast_fp16 = layer_norm(axes = normed_655_axes_0, epsilon = var_8_to_fp16, x = input_467_cast_fp16)[name = string("normed_655_cast_fp16")]; tensor var_5676_split_sizes_0 = const()[name = string("op_5676_split_sizes_0"), val = tensor([256, 256])]; int32 var_5676_axis_0 = const()[name = string("op_5676_axis_0"), val = int32(-1)]; tensor var_5676_cast_fp16_0, tensor var_5676_cast_fp16_1 = split(axis = var_5676_axis_0, split_sizes = var_5676_split_sizes_0, x = normed_655_cast_fp16)[name = string("op_5676_cast_fp16")]; tensor var_5680_to_fp16 = const()[name = string("op_5680_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308018112)))]; tensor out_281_cast_fp16 = mul(x = var_5676_cast_fp16_0, y = var_5680_to_fp16)[name = string("out_281_cast_fp16")]; tensor var_5683 = mul(x = out_279_cast_fp16, y = cos_quantized)[name = string("op_5683")]; tensor var_5684_split_sizes_0 = const()[name = string("op_5684_split_sizes_0"), val = tensor([128, 128])]; int32 var_5684_axis_0 = const()[name = string("op_5684_axis_0"), val = int32(-1)]; tensor var_5684_0, tensor var_5684_1 = split(axis = var_5684_axis_0, split_sizes = var_5684_split_sizes_0, x = out_279_cast_fp16)[name = string("op_5684")]; fp16 const_328_promoted = const()[name = string("const_328_promoted"), val = fp16(-0x1p+0)]; tensor var_5686 = mul(x = var_5684_1, y = const_328_promoted)[name = string("op_5686")]; bool var_5688_interleave_0 = const()[name = string("op_5688_interleave_0"), val = bool(false)]; tensor var_5688 = concat(axis = var_23, interleave = var_5688_interleave_0, values = (var_5686, var_5684_0))[name = string("op_5688")]; tensor var_5689 = mul(x = var_5688, y = sin_quantized)[name = string("op_5689")]; tensor q = add(x = var_5683, y = var_5689)[name = string("q")]; tensor var_5691 = mul(x = out_281_cast_fp16, y = cos_quantized)[name = string("op_5691")]; tensor var_5692_split_sizes_0 = const()[name = string("op_5692_split_sizes_0"), val = tensor([128, 128])]; int32 var_5692_axis_0 = const()[name = string("op_5692_axis_0"), val = int32(-1)]; tensor var_5692_0, tensor var_5692_1 = split(axis = var_5692_axis_0, split_sizes = var_5692_split_sizes_0, x = out_281_cast_fp16)[name = string("op_5692")]; fp16 const_329_promoted = const()[name = string("const_329_promoted"), val = fp16(-0x1p+0)]; tensor var_5694 = mul(x = var_5692_1, y = const_329_promoted)[name = string("op_5694")]; bool var_5696_interleave_0 = const()[name = string("op_5696_interleave_0"), val = bool(false)]; tensor var_5696 = concat(axis = var_23, interleave = var_5696_interleave_0, values = (var_5694, var_5692_0))[name = string("op_5696")]; tensor var_5697 = mul(x = var_5696, y = sin_quantized)[name = string("op_5697")]; tensor hidden_states_277 = add(x = var_5691, y = var_5697)[name = string("hidden_states_277")]; tensor hidden_states_279_axes_0 = const()[name = string("hidden_states_279_axes_0"), val = tensor([2])]; tensor hidden_states_279 = expand_dims(axes = hidden_states_279_axes_0, x = hidden_states_277)[name = string("hidden_states_279")]; tensor var_5700 = const()[name = string("op_5700"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_281 = tile(reps = var_5700, x = hidden_states_279)[name = string("hidden_states_281")]; tensor var_5702 = const()[name = string("op_5702"), val = tensor([1, 3, 128, 256])]; tensor k = reshape(shape = var_5702, x = hidden_states_281)[name = string("k")]; tensor hidden_states_285_axes_0 = const()[name = string("hidden_states_285_axes_0"), val = tensor([2])]; tensor hidden_states_283 = transpose(perm = var_5649, x = var_5648)[name = string("transpose_5")]; tensor hidden_states_285 = expand_dims(axes = hidden_states_285_axes_0, x = hidden_states_283)[name = string("hidden_states_285")]; tensor var_5705 = const()[name = string("op_5705"), val = tensor([1, 1, 3, 1, 1])]; tensor hidden_states_287 = tile(reps = var_5705, x = hidden_states_285)[name = string("hidden_states_287")]; tensor var_5707 = const()[name = string("op_5707"), val = tensor([1, 3, 128, 256])]; tensor v = reshape(shape = var_5707, x = hidden_states_287)[name = string("v")]; bool var_5712_transpose_x_1 = const()[name = string("op_5712_transpose_x_1"), val = bool(false)]; bool var_5712_transpose_y_1 = const()[name = string("op_5712_transpose_y_1"), val = bool(true)]; tensor var_5712_cast_fp16 = matmul(transpose_x = var_5712_transpose_x_1, transpose_y = var_5712_transpose_y_1, x = q, y = k)[name = string("op_5712_cast_fp16")]; fp16 var_5713_to_fp16 = const()[name = string("op_5713_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_139_cast_fp16 = mul(x = var_5712_cast_fp16, y = var_5713_to_fp16)[name = string("attn_weights_139_cast_fp16")]; tensor attn_weights_141_cast_fp16 = add(x = attn_weights_139_cast_fp16, y = full_mask_cast_fp16)[name = string("attn_weights_141_cast_fp16")]; tensor var_5717_cast_fp16 = softmax(axis = var_23, x = attn_weights_141_cast_fp16)[name = string("op_5717_cast_fp16")]; bool var_5721_transpose_x_0 = const()[name = string("op_5721_transpose_x_0"), val = bool(false)]; bool var_5721_transpose_y_0 = const()[name = string("op_5721_transpose_y_0"), val = bool(false)]; tensor var_5721_cast_fp16 = matmul(transpose_x = var_5721_transpose_x_0, transpose_y = var_5721_transpose_y_0, x = var_5717_cast_fp16, y = v)[name = string("op_5721_cast_fp16")]; tensor var_5723 = const()[name = string("op_5723"), val = tensor([0, 2, 1, 3])]; tensor var_5726 = const()[name = string("op_5726"), val = tensor([1, 128, 768])]; tensor var_5724 = transpose(perm = var_5723, x = var_5721_cast_fp16)[name = string("transpose_4")]; tensor attn_out_141 = reshape(shape = var_5726, x = var_5724)[name = string("attn_out_141")]; tensor var_5728 = const()[name = string("op_5728"), val = tensor([0, 2, 1])]; tensor squeeze_23_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308018688))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308608576))))[name = string("squeeze_23_quantized")]; string var_5737_pad_type_0 = const()[name = string("op_5737_pad_type_0"), val = string("valid")]; int32 var_5737_groups_0 = const()[name = string("op_5737_groups_0"), val = int32(1)]; tensor var_5737_strides_0 = const()[name = string("op_5737_strides_0"), val = tensor([1])]; tensor var_5737_pad_0 = const()[name = string("op_5737_pad_0"), val = tensor([0, 0])]; tensor var_5737_dilations_0 = const()[name = string("op_5737_dilations_0"), val = tensor([1])]; tensor var_5729 = transpose(perm = var_5728, x = attn_out_141)[name = string("transpose_3")]; tensor var_5737 = conv(dilations = var_5737_dilations_0, groups = var_5737_groups_0, pad = var_5737_pad_0, pad_type = var_5737_pad_type_0, strides = var_5737_strides_0, weight = squeeze_23_quantized, x = var_5729)[name = string("op_5737")]; tensor var_5738 = const()[name = string("op_5738"), val = tensor([0, 2, 1])]; fp16 const_330_promoted_to_fp16 = const()[name = string("const_330_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_377 = transpose(perm = var_5738, x = var_5737)[name = string("transpose_2")]; tensor var_5742_cast_fp16 = mul(x = x_377, y = const_330_promoted_to_fp16)[name = string("op_5742_cast_fp16")]; bool input_471_interleave_0 = const()[name = string("input_471_interleave_0"), val = bool(false)]; tensor input_471_cast_fp16 = concat(axis = var_23, interleave = input_471_interleave_0, values = (x_377, var_5742_cast_fp16))[name = string("input_471_cast_fp16")]; tensor normed_659_axes_0 = const()[name = string("normed_659_axes_0"), val = tensor([-1])]; tensor normed_659_cast_fp16 = layer_norm(axes = normed_659_axes_0, epsilon = var_8_to_fp16, x = input_471_cast_fp16)[name = string("normed_659_cast_fp16")]; tensor var_5747_split_sizes_0 = const()[name = string("op_5747_split_sizes_0"), val = tensor([768, 768])]; int32 var_5747_axis_0 = const()[name = string("op_5747_axis_0"), val = int32(-1)]; tensor var_5747_cast_fp16_0, tensor var_5747_cast_fp16_1 = split(axis = var_5747_axis_0, split_sizes = var_5747_split_sizes_0, x = normed_659_cast_fp16)[name = string("op_5747_cast_fp16")]; tensor var_5751_to_fp16 = const()[name = string("op_5751_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308610176)))]; tensor out_283_cast_fp16 = mul(x = var_5747_cast_fp16_0, y = var_5751_to_fp16)[name = string("out_283_cast_fp16")]; tensor x_379_cast_fp16 = add(x = x_369_cast_fp16, y = out_283_cast_fp16)[name = string("x_379_cast_fp16")]; fp16 const_332_promoted_to_fp16 = const()[name = string("const_332_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5758_cast_fp16 = mul(x = x_379_cast_fp16, y = const_332_promoted_to_fp16)[name = string("op_5758_cast_fp16")]; bool input_473_interleave_0 = const()[name = string("input_473_interleave_0"), val = bool(false)]; tensor input_473_cast_fp16 = concat(axis = var_23, interleave = input_473_interleave_0, values = (x_379_cast_fp16, var_5758_cast_fp16))[name = string("input_473_cast_fp16")]; tensor normed_663_axes_0 = const()[name = string("normed_663_axes_0"), val = tensor([-1])]; tensor normed_663_cast_fp16 = layer_norm(axes = normed_663_axes_0, epsilon = var_8_to_fp16, x = input_473_cast_fp16)[name = string("normed_663_cast_fp16")]; tensor var_5763_split_sizes_0 = const()[name = string("op_5763_split_sizes_0"), val = tensor([768, 768])]; int32 var_5763_axis_0 = const()[name = string("op_5763_axis_0"), val = int32(-1)]; tensor var_5763_cast_fp16_0, tensor var_5763_cast_fp16_1 = split(axis = var_5763_axis_0, split_sizes = var_5763_split_sizes_0, x = normed_663_cast_fp16)[name = string("op_5763_cast_fp16")]; tensor var_5767_to_fp16 = const()[name = string("op_5767_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308611776)))]; tensor out_285_cast_fp16 = mul(x = var_5763_cast_fp16_0, y = var_5767_to_fp16)[name = string("out_285_cast_fp16")]; tensor var_5774 = const()[name = string("op_5774"), val = tensor([0, 2, 1])]; tensor input_475_axes_0 = const()[name = string("input_475_axes_0"), val = tensor([2])]; tensor var_5775 = transpose(perm = var_5774, x = out_285_cast_fp16)[name = string("transpose_1")]; tensor input_475 = expand_dims(axes = input_475_axes_0, x = var_5775)[name = string("input_475")]; string gate_93_pad_type_0 = const()[name = string("gate_93_pad_type_0"), val = string("valid")]; tensor gate_93_strides_0 = const()[name = string("gate_93_strides_0"), val = tensor([1, 1])]; tensor gate_93_pad_0 = const()[name = string("gate_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_93_dilations_0 = const()[name = string("gate_93_dilations_0"), val = tensor([1, 1])]; int32 gate_93_groups_0 = const()[name = string("gate_93_groups_0"), val = int32(1)]; tensor gate_93 = conv(dilations = gate_93_dilations_0, groups = gate_93_groups_0, pad = gate_93_pad_0, pad_type = gate_93_pad_type_0, strides = gate_93_strides_0, weight = encoder_layers_23_mlp_gate_proj_weight_quantized, x = input_475)[name = string("gate_93")]; string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; tensor up_strides_0 = const()[name = string("up_strides_0"), val = tensor([1, 1])]; tensor up_pad_0 = const()[name = string("up_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor([1, 1])]; int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; tensor up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = encoder_layers_23_mlp_up_proj_weight_quantized, x = input_475)[name = string("up")]; string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate = gelu(mode = gate_mode_0, x = gate_93)[name = string("gate")]; tensor input_477 = mul(x = gate, y = up)[name = string("input_477")]; string var_5796_pad_type_0 = const()[name = string("op_5796_pad_type_0"), val = string("valid")]; tensor var_5796_strides_0 = const()[name = string("op_5796_strides_0"), val = tensor([1, 1])]; tensor var_5796_pad_0 = const()[name = string("op_5796_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5796_dilations_0 = const()[name = string("op_5796_dilations_0"), val = tensor([1, 1])]; int32 var_5796_groups_0 = const()[name = string("op_5796_groups_0"), val = int32(1)]; tensor var_5796 = conv(dilations = var_5796_dilations_0, groups = var_5796_groups_0, pad = var_5796_pad_0, pad_type = var_5796_pad_type_0, strides = var_5796_strides_0, weight = encoder_layers_23_mlp_down_proj_weight_quantized, x = input_477)[name = string("op_5796")]; tensor var_5797_axes_0 = const()[name = string("op_5797_axes_0"), val = tensor([2])]; tensor var_5797 = squeeze(axes = var_5797_axes_0, x = var_5796)[name = string("op_5797")]; tensor var_5798 = const()[name = string("op_5798"), val = tensor([0, 2, 1])]; fp16 const_334_promoted_to_fp16 = const()[name = string("const_334_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_383 = transpose(perm = var_5798, x = var_5797)[name = string("transpose_0")]; tensor var_5802_cast_fp16 = mul(x = x_383, y = const_334_promoted_to_fp16)[name = string("op_5802_cast_fp16")]; bool input_479_interleave_0 = const()[name = string("input_479_interleave_0"), val = bool(false)]; tensor input_479_cast_fp16 = concat(axis = var_23, interleave = input_479_interleave_0, values = (x_383, var_5802_cast_fp16))[name = string("input_479_cast_fp16")]; tensor normed_669_axes_0 = const()[name = string("normed_669_axes_0"), val = tensor([-1])]; tensor normed_669_cast_fp16 = layer_norm(axes = normed_669_axes_0, epsilon = var_8_to_fp16, x = input_479_cast_fp16)[name = string("normed_669_cast_fp16")]; tensor var_5807_split_sizes_0 = const()[name = string("op_5807_split_sizes_0"), val = tensor([768, 768])]; int32 var_5807_axis_0 = const()[name = string("op_5807_axis_0"), val = int32(-1)]; tensor var_5807_cast_fp16_0, tensor var_5807_cast_fp16_1 = split(axis = var_5807_axis_0, split_sizes = var_5807_split_sizes_0, x = normed_669_cast_fp16)[name = string("op_5807_cast_fp16")]; tensor var_5811_to_fp16 = const()[name = string("op_5811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308613376)))]; tensor out_287_cast_fp16 = mul(x = var_5807_cast_fp16_0, y = var_5811_to_fp16)[name = string("out_287_cast_fp16")]; tensor x_385_cast_fp16 = add(x = x_379_cast_fp16, y = out_287_cast_fp16)[name = string("x_385_cast_fp16")]; fp16 const_336_promoted_to_fp16 = const()[name = string("const_336_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5818_cast_fp16 = mul(x = x_385_cast_fp16, y = const_336_promoted_to_fp16)[name = string("op_5818_cast_fp16")]; bool input_481_interleave_0 = const()[name = string("input_481_interleave_0"), val = bool(false)]; tensor input_481_cast_fp16 = concat(axis = var_23, interleave = input_481_interleave_0, values = (x_385_cast_fp16, var_5818_cast_fp16))[name = string("input_481_cast_fp16")]; tensor normed_673_axes_0 = const()[name = string("normed_673_axes_0"), val = tensor([-1])]; tensor normed_673_cast_fp16 = layer_norm(axes = normed_673_axes_0, epsilon = var_8_to_fp16, x = input_481_cast_fp16)[name = string("normed_673_cast_fp16")]; tensor var_5823_split_sizes_0 = const()[name = string("op_5823_split_sizes_0"), val = tensor([768, 768])]; int32 var_5823_axis_0 = const()[name = string("op_5823_axis_0"), val = int32(-1)]; tensor var_5823_cast_fp16_0, tensor var_5823_cast_fp16_1 = split(axis = var_5823_axis_0, split_sizes = var_5823_split_sizes_0, x = normed_673_cast_fp16)[name = string("op_5823_cast_fp16")]; tensor var_5827_to_fp16 = const()[name = string("op_5827_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308614976)))]; tensor out_cast_fp16 = mul(x = var_5823_cast_fp16_0, y = var_5827_to_fp16)[name = string("out_cast_fp16")]; tensor mask_axes_0 = const()[name = string("mask_axes_0"), val = tensor([-1])]; tensor mask_cast_fp16 = expand_dims(axes = mask_axes_0, x = attention_mask)[name = string("mask_cast_fp16")]; tensor masked_cast_fp16 = mul(x = out_cast_fp16, y = mask_cast_fp16)[name = string("masked_cast_fp16")]; tensor summed_axes_0 = const()[name = string("summed_axes_0"), val = tensor([1])]; bool summed_keep_dims_0 = const()[name = string("summed_keep_dims_0"), val = bool(false)]; tensor summed_cast_fp16 = reduce_sum(axes = summed_axes_0, keep_dims = summed_keep_dims_0, x = masked_cast_fp16)[name = string("summed_cast_fp16")]; tensor var_5847_axes_0 = const()[name = string("op_5847_axes_0"), val = tensor([1])]; bool var_5847_keep_dims_0 = const()[name = string("op_5847_keep_dims_0"), val = bool(false)]; tensor var_5847_cast_fp16 = reduce_sum(axes = var_5847_axes_0, keep_dims = var_5847_keep_dims_0, x = mask_cast_fp16)[name = string("op_5847_cast_fp16")]; fp16 var_5848_to_fp16 = const()[name = string("op_5848_to_fp16"), val = fp16(0x1p+0)]; tensor denom_cast_fp16 = maximum(x = var_5847_cast_fp16, y = var_5848_to_fp16)[name = string("denom_cast_fp16")]; tensor pooled_1_cast_fp16 = real_div(x = summed_cast_fp16, y = denom_cast_fp16)[name = string("pooled_1_cast_fp16")]; tensor var_5857_axes_0 = const()[name = string("op_5857_axes_0"), val = tensor([-1])]; tensor var_5857 = expand_dims(axes = var_5857_axes_0, x = pooled_1_cast_fp16)[name = string("op_5857")]; tensor input_483_axes_0 = const()[name = string("input_483_axes_0"), val = tensor([-1])]; tensor input_483 = expand_dims(axes = input_483_axes_0, x = var_5857)[name = string("input_483")]; string input_pad_type_0 = const()[name = string("input_pad_type_0"), val = string("valid")]; tensor input_strides_0 = const()[name = string("input_strides_0"), val = tensor([1, 1])]; tensor input_pad_0 = const()[name = string("input_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_dilations_0 = const()[name = string("input_dilations_0"), val = tensor([1, 1])]; int32 input_groups_0 = const()[name = string("input_groups_0"), val = int32(1)]; tensor input = conv(bias = dense1_bias, dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = dense1_weight_quantized, x = input_483)[name = string("input")]; string x_pad_type_0 = const()[name = string("x_pad_type_0"), val = string("valid")]; tensor x_strides_0 = const()[name = string("x_strides_0"), val = tensor([1, 1])]; tensor x_pad_0 = const()[name = string("x_pad_0"), val = tensor([0, 0, 0, 0])]; tensor x_dilations_0 = const()[name = string("x_dilations_0"), val = tensor([1, 1])]; int32 x_groups_0 = const()[name = string("x_groups_0"), val = int32(1)]; tensor x = conv(bias = dense2_bias, dilations = x_dilations_0, groups = x_groups_0, pad = x_pad_0, pad_type = x_pad_type_0, strides = x_strides_0, weight = dense2_weight_quantized, x = input)[name = string("x")]; tensor var_5884 = const()[name = string("op_5884"), val = tensor([1, 768])]; tensor pooled = reshape(shape = var_5884, x = x)[name = string("pooled")]; fp16 const_338 = const()[name = string("const_338"), val = fp16(0x1.1p-20)]; tensor var_5894 = abs(x = pooled)[name = string("op_5894")]; tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; tensor reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = var_5894)[name = string("reduce_max_0")]; tensor abs_max = maximum(x = reduce_max_0, y = const_338)[name = string("abs_max")]; tensor scaled_cast_fp16 = real_div(x = pooled, y = abs_max)[name = string("scaled_cast_fp16")]; tensor var_5901_cast_fp16 = mul(x = scaled_cast_fp16, y = scaled_cast_fp16)[name = string("op_5901_cast_fp16")]; tensor sumsq_axes_0 = const()[name = string("sumsq_axes_0"), val = tensor([-1])]; bool sumsq_keep_dims_0 = const()[name = string("sumsq_keep_dims_0"), val = bool(true)]; tensor sumsq_cast_fp16 = reduce_sum(axes = sumsq_axes_0, keep_dims = sumsq_keep_dims_0, x = var_5901_cast_fp16)[name = string("sumsq_cast_fp16")]; fp16 var_5908_to_fp16 = const()[name = string("op_5908_to_fp16"), val = fp16(0x1p-24)]; tensor var_5909_cast_fp16 = add(x = sumsq_cast_fp16, y = var_5908_to_fp16)[name = string("op_5909_cast_fp16")]; fp32 inv_norm_epsilon_0 = const()[name = string("inv_norm_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor inv_norm_cast_fp16 = rsqrt(epsilon = inv_norm_epsilon_0, x = var_5909_cast_fp16)[name = string("inv_norm_cast_fp16")]; tensor embedding = mul(x = scaled_cast_fp16, y = inv_norm_cast_fp16)[name = string("normalized_cast_fp16")]; } -> (embedding); }