diff --git "a/openai_whisper-small_216MB/AudioEncoder.mlmodelc/model.mil" "b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/model.mil" @@ -0,0 +1,1625 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3404.16.1"}, {"coremlc-version", "3404.23.1"}})] +{ + func main(tensor melspectrogram_features) { + tensor var_76_pad_type_0 = const()[name = tensor("op_76_pad_type_0"), val = tensor("custom")]; + tensor var_76_pad_0 = const()[name = tensor("op_76_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_76_strides_0 = const()[name = tensor("op_76_strides_0"), val = tensor([1, 1])]; + tensor var_76_dilations_0 = const()[name = tensor("op_76_dilations_0"), val = tensor([1, 1])]; + tensor var_76_groups_0 = const()[name = tensor("op_76_groups_0"), val = tensor(1)]; + tensor var_45_to_fp16 = const()[name = tensor("op_45_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor var_57_to_fp16 = const()[name = tensor("op_57_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(368768)))]; + tensor var_76_cast_fp16 = conv(bias = var_57_to_fp16, dilations = var_76_dilations_0, groups = var_76_groups_0, pad = var_76_pad_0, pad_type = var_76_pad_type_0, strides = var_76_strides_0, weight = var_45_to_fp16, x = melspectrogram_features)[name = tensor("op_76_cast_fp16")]; + tensor var_114_pad_type_0 = const()[name = tensor("op_114_pad_type_0"), val = tensor("custom")]; + tensor var_114_pad_0 = const()[name = tensor("op_114_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_114_strides_0 = const()[name = tensor("op_114_strides_0"), val = tensor([1, 1])]; + tensor var_114_dilations_0 = const()[name = tensor("op_114_dilations_0"), val = tensor([1, 1])]; + tensor var_114_groups_0 = const()[name = tensor("op_114_groups_0"), val = tensor(1)]; + tensor op_89_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(370368))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462592))), name = tensor("op_89_to_fp16_palettized"), shape = tensor([768, 80, 1, 3])]; + tensor var_95_to_fp16 = const()[name = tensor("op_95_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462720)))]; + tensor var_114_cast_fp16 = conv(bias = var_95_to_fp16, dilations = var_114_dilations_0, groups = var_114_groups_0, pad = var_114_pad_0, pad_type = var_114_pad_type_0, strides = var_114_strides_0, weight = op_89_to_fp16_palettized, x = melspectrogram_features)[name = tensor("op_114_cast_fp16")]; + tensor var_116_cast_fp16 = add(x = var_76_cast_fp16, y = var_114_cast_fp16)[name = tensor("op_116_cast_fp16")]; + tensor hidden_states_1_mode_0 = const()[name = tensor("hidden_states_1_mode_0"), val = tensor("EXACT")]; + tensor hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_116_cast_fp16)[name = tensor("hidden_states_1_cast_fp16")]; + tensor var_162_pad_type_0 = const()[name = tensor("op_162_pad_type_0"), val = tensor("custom")]; + tensor var_162_pad_0 = const()[name = tensor("op_162_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_162_strides_0 = const()[name = tensor("op_162_strides_0"), val = tensor([2, 2])]; + tensor var_162_dilations_0 = const()[name = tensor("op_162_dilations_0"), val = tensor([1, 1])]; + tensor var_162_groups_0 = const()[name = tensor("op_162_groups_0"), val = tensor(1)]; + tensor var_131_to_fp16 = const()[name = tensor("op_131_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(464320)))]; + tensor var_162_cast_fp16 = conv(bias = var_57_to_fp16, dilations = var_162_dilations_0, groups = var_162_groups_0, pad = var_162_pad_0, pad_type = var_162_pad_type_0, strides = var_162_strides_0, weight = var_131_to_fp16, x = hidden_states_1_cast_fp16)[name = tensor("op_162_cast_fp16")]; + tensor var_200_pad_type_0 = const()[name = tensor("op_200_pad_type_0"), val = tensor("custom")]; + tensor var_200_pad_0 = const()[name = tensor("op_200_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_200_strides_0 = const()[name = tensor("op_200_strides_0"), val = tensor([2, 2])]; + tensor var_200_dilations_0 = const()[name = tensor("op_200_dilations_0"), val = tensor([1, 1])]; + tensor var_200_groups_0 = const()[name = tensor("op_200_groups_0"), val = tensor(1)]; + tensor op_175_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4003328))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4888128))), name = tensor("op_175_to_fp16_palettized"), shape = tensor([768, 768, 1, 3])]; + tensor var_181_to_fp16 = const()[name = tensor("op_181_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4888256)))]; + tensor var_200_cast_fp16 = conv(bias = var_181_to_fp16, dilations = var_200_dilations_0, groups = var_200_groups_0, pad = var_200_pad_0, pad_type = var_200_pad_type_0, strides = var_200_strides_0, weight = op_175_to_fp16_palettized, x = hidden_states_1_cast_fp16)[name = tensor("op_200_cast_fp16")]; + tensor var_202_cast_fp16 = add(x = var_162_cast_fp16, y = var_200_cast_fp16)[name = tensor("op_202_cast_fp16")]; + tensor hidden_states_3_mode_0 = const()[name = tensor("hidden_states_3_mode_0"), val = tensor("EXACT")]; + tensor hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_202_cast_fp16)[name = tensor("hidden_states_3_cast_fp16")]; + tensor var_222_to_fp16 = const()[name = tensor("op_222_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4889856)))]; + tensor inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_222_to_fp16)[name = tensor("inputs_1_cast_fp16")]; + tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; + tensor out_1_axes_0 = const()[name = tensor("out_1_axes_0"), val = tensor([1])]; + tensor var_254_to_fp16 = const()[name = tensor("op_254_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_254_to_fp16, x = inputs_1_cast_fp16)[name = tensor("out_1_cast_fp16")]; + tensor obj_1_variance_0_to_fp16 = const()[name = tensor("obj_1_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7193920)))]; + tensor obj_1_gamma_0_to_fp16 = const()[name = tensor("obj_1_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7195520)))]; + tensor obj_1_beta_0_to_fp16 = const()[name = tensor("obj_1_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7197120)))]; + tensor obj_1_epsilon_0_to_fp16 = const()[name = tensor("obj_1_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor("obj_1_cast_fp16")]; + tensor var_276_pad_type_0 = const()[name = tensor("op_276_pad_type_0"), val = tensor("valid")]; + tensor var_276_strides_0 = const()[name = tensor("op_276_strides_0"), val = tensor([1, 1])]; + tensor var_276_pad_0 = const()[name = tensor("op_276_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_276_dilations_0 = const()[name = tensor("op_276_dilations_0"), val = tensor([1, 1])]; + tensor var_276_groups_0 = const()[name = tensor("op_276_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7198720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7493696))), name = tensor("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7493824)))]; + tensor var_276_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_276_dilations_0, groups = var_276_groups_0, pad = var_276_pad_0, pad_type = var_276_pad_type_0, strides = var_276_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_276_cast_fp16")]; + tensor var_282_pad_type_0 = const()[name = tensor("op_282_pad_type_0"), val = tensor("valid")]; + tensor var_282_strides_0 = const()[name = tensor("op_282_strides_0"), val = tensor([1, 1])]; + tensor var_282_pad_0 = const()[name = tensor("op_282_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_282_dilations_0 = const()[name = tensor("op_282_dilations_0"), val = tensor([1, 1])]; + tensor var_282_groups_0 = const()[name = tensor("op_282_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7520576))), name = tensor("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7495424))), shape = tensor([768, 768, 1, 1])]; + tensor var_282_cast_fp16 = conv(dilations = var_282_dilations_0, groups = var_282_groups_0, pad = var_282_pad_0, pad_type = var_282_pad_type_0, strides = var_282_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor query_1_cast_fp16 = add(x = var_276_cast_fp16, y = var_282_cast_fp16)[name = tensor("query_1_cast_fp16")]; + tensor var_291_pad_type_0 = const()[name = tensor("op_291_pad_type_0"), val = tensor("valid")]; + tensor var_291_strides_0 = const()[name = tensor("op_291_strides_0"), val = tensor([1, 1])]; + tensor var_291_pad_0 = const()[name = tensor("op_291_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_291_dilations_0 = const()[name = tensor("op_291_dilations_0"), val = tensor([1, 1])]; + tensor var_291_groups_0 = const()[name = tensor("op_291_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7594368))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7889344))), name = tensor("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_291_cast_fp16 = conv(dilations = var_291_dilations_0, groups = var_291_groups_0, pad = var_291_pad_0, pad_type = var_291_pad_type_0, strides = var_291_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_291_cast_fp16")]; + tensor var_297_pad_type_0 = const()[name = tensor("op_297_pad_type_0"), val = tensor("valid")]; + tensor var_297_strides_0 = const()[name = tensor("op_297_strides_0"), val = tensor([1, 1])]; + tensor var_297_pad_0 = const()[name = tensor("op_297_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_297_dilations_0 = const()[name = tensor("op_297_dilations_0"), val = tensor([1, 1])]; + tensor var_297_groups_0 = const()[name = tensor("op_297_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7913600))), name = tensor("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7889472))), shape = tensor([768, 768, 1, 1])]; + tensor var_297_cast_fp16 = conv(dilations = var_297_dilations_0, groups = var_297_groups_0, pad = var_297_pad_0, pad_type = var_297_pad_type_0, strides = var_297_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_297_cast_fp16")]; + tensor key_1_cast_fp16 = add(x = var_291_cast_fp16, y = var_297_cast_fp16)[name = tensor("key_1_cast_fp16")]; + tensor var_307_pad_type_0 = const()[name = tensor("op_307_pad_type_0"), val = tensor("valid")]; + tensor var_307_strides_0 = const()[name = tensor("op_307_strides_0"), val = tensor([1, 1])]; + tensor var_307_pad_0 = const()[name = tensor("op_307_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_307_dilations_0 = const()[name = tensor("op_307_dilations_0"), val = tensor([1, 1])]; + tensor var_307_groups_0 = const()[name = tensor("op_307_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7987392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8282368))), name = tensor("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8282496)))]; + tensor var_307_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_307_dilations_0, groups = var_307_groups_0, pad = var_307_pad_0, pad_type = var_307_pad_type_0, strides = var_307_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_307_cast_fp16")]; + tensor var_313_pad_type_0 = const()[name = tensor("op_313_pad_type_0"), val = tensor("valid")]; + tensor var_313_strides_0 = const()[name = tensor("op_313_strides_0"), val = tensor([1, 1])]; + tensor var_313_pad_0 = const()[name = tensor("op_313_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_313_dilations_0 = const()[name = tensor("op_313_dilations_0"), val = tensor([1, 1])]; + tensor var_313_groups_0 = const()[name = tensor("op_313_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8308864))), name = tensor("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8284096))), shape = tensor([768, 768, 1, 1])]; + tensor var_313_cast_fp16 = conv(dilations = var_313_dilations_0, groups = var_313_groups_0, pad = var_313_pad_0, pad_type = var_313_pad_type_0, strides = var_313_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor value_1_cast_fp16 = add(x = var_307_cast_fp16, y = var_313_cast_fp16)[name = tensor("value_1_cast_fp16")]; + tensor var_317 = const()[name = tensor("op_317"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_1_cast_fp16 = reshape(shape = var_317, x = query_1_cast_fp16)[name = tensor("mh_q_1_cast_fp16")]; + tensor var_319_to_fp16 = const()[name = tensor("op_319_to_fp16"), val = tensor(0x1p-3)]; + tensor var_320_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_319_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor var_323 = const()[name = tensor("op_323"), val = tensor([1, 12, 64, 1500])]; + tensor var_324_cast_fp16 = reshape(shape = var_323, x = key_1_cast_fp16)[name = tensor("op_324_cast_fp16")]; + tensor mh_w_1_transpose_x_0 = const()[name = tensor("mh_w_1_transpose_x_0"), val = tensor(true)]; + tensor mh_w_1_transpose_y_0 = const()[name = tensor("mh_w_1_transpose_y_0"), val = tensor(false)]; + tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_320_cast_fp16, y = var_324_cast_fp16)[name = tensor("mh_w_1_cast_fp16")]; + tensor var_327_cast_fp16 = softmax(axis = var_232, x = mh_w_1_cast_fp16)[name = tensor("op_327_cast_fp16")]; + tensor var_328 = const()[name = tensor("op_328"), val = tensor([1, 12, 64, 1500])]; + tensor var_329_cast_fp16 = reshape(shape = var_328, x = value_1_cast_fp16)[name = tensor("op_329_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_329_cast_fp16, y = var_327_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_332 = const()[name = tensor("op_332"), val = tensor([1, 768, 1, 1500])]; + tensor input_1_cast_fp16 = reshape(shape = var_332, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_342_pad_type_0 = const()[name = tensor("op_342_pad_type_0"), val = tensor("valid")]; + tensor var_342_strides_0 = const()[name = tensor("op_342_strides_0"), val = tensor([1, 1])]; + tensor var_342_pad_0 = const()[name = tensor("op_342_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_342_dilations_0 = const()[name = tensor("op_342_dilations_0"), val = tensor([1, 1])]; + tensor var_342_groups_0 = const()[name = tensor("op_342_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8382656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8677632))), name = tensor("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8677760)))]; + tensor var_342_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_342_dilations_0, groups = var_342_groups_0, pad = var_342_pad_0, pad_type = var_342_pad_type_0, strides = var_342_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = tensor("op_342_cast_fp16")]; + tensor var_348_pad_type_0 = const()[name = tensor("op_348_pad_type_0"), val = tensor("valid")]; + tensor var_348_strides_0 = const()[name = tensor("op_348_strides_0"), val = tensor([1, 1])]; + tensor var_348_pad_0 = const()[name = tensor("op_348_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_348_dilations_0 = const()[name = tensor("op_348_dilations_0"), val = tensor([1, 1])]; + tensor var_348_groups_0 = const()[name = tensor("op_348_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8699776))), name = tensor("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8679360))), shape = tensor([768, 768, 1, 1])]; + tensor var_348_cast_fp16 = conv(dilations = var_348_dilations_0, groups = var_348_groups_0, pad = var_348_pad_0, pad_type = var_348_pad_type_0, strides = var_348_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor obj_3_cast_fp16 = add(x = var_342_cast_fp16, y = var_348_cast_fp16)[name = tensor("obj_3_cast_fp16")]; + tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = tensor("inputs_3_cast_fp16")]; + tensor out_3_axes_0 = const()[name = tensor("out_3_axes_0"), val = tensor([1])]; + tensor var_359_to_fp16 = const()[name = tensor("op_359_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_359_to_fp16, x = inputs_3_cast_fp16)[name = tensor("out_3_cast_fp16")]; + tensor input_3_gamma_0_to_fp16 = const()[name = tensor("input_3_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8773568)))]; + tensor input_3_beta_0_to_fp16 = const()[name = tensor("input_3_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8775168)))]; + tensor input_3_epsilon_0_to_fp16 = const()[name = tensor("input_3_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_377_pad_type_0 = const()[name = tensor("op_377_pad_type_0"), val = tensor("valid")]; + tensor var_377_strides_0 = const()[name = tensor("op_377_strides_0"), val = tensor([1, 1])]; + tensor var_377_pad_0 = const()[name = tensor("op_377_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_377_dilations_0 = const()[name = tensor("op_377_dilations_0"), val = tensor([1, 1])]; + tensor var_377_groups_0 = const()[name = tensor("op_377_groups_0"), val = tensor(1)]; + tensor layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8776768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9956480))), name = tensor("layers_0_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9956608)))]; + tensor var_377_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_377_dilations_0, groups = var_377_groups_0, pad = var_377_pad_0, pad_type = var_377_pad_type_0, strides = var_377_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = tensor("op_377_cast_fp16")]; + tensor var_383_pad_type_0 = const()[name = tensor("op_383_pad_type_0"), val = tensor("valid")]; + tensor var_383_strides_0 = const()[name = tensor("op_383_strides_0"), val = tensor([1, 1])]; + tensor var_383_pad_0 = const()[name = tensor("op_383_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_dilations_0 = const()[name = tensor("op_383_dilations_0"), val = tensor([1, 1])]; + tensor var_383_groups_0 = const()[name = tensor("op_383_groups_0"), val = tensor(1)]; + tensor layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10057408))), name = tensor("layers_0_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9962816))), shape = tensor([3072, 768, 1, 1])]; + tensor var_383_cast_fp16 = conv(dilations = var_383_dilations_0, groups = var_383_groups_0, pad = var_383_pad_0, pad_type = var_383_pad_type_0, strides = var_383_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor input_5_cast_fp16 = add(x = var_377_cast_fp16, y = var_383_cast_fp16)[name = tensor("input_5_cast_fp16")]; + tensor input_7_mode_0 = const()[name = tensor("input_7_mode_0"), val = tensor("EXACT")]; + tensor input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_394_pad_type_0 = const()[name = tensor("op_394_pad_type_0"), val = tensor("valid")]; + tensor var_394_strides_0 = const()[name = tensor("op_394_strides_0"), val = tensor([1, 1])]; + tensor var_394_pad_0 = const()[name = tensor("op_394_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_394_dilations_0 = const()[name = tensor("op_394_dilations_0"), val = tensor([1, 1])]; + tensor var_394_groups_0 = const()[name = tensor("op_394_groups_0"), val = tensor(1)]; + tensor layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10352384))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11532096))), name = tensor("layers_0_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11532224)))]; + tensor var_394_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_394_dilations_0, groups = var_394_groups_0, pad = var_394_pad_0, pad_type = var_394_pad_type_0, strides = var_394_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_7_cast_fp16)[name = tensor("op_394_cast_fp16")]; + tensor var_400_pad_type_0 = const()[name = tensor("op_400_pad_type_0"), val = tensor("valid")]; + tensor var_400_strides_0 = const()[name = tensor("op_400_strides_0"), val = tensor([1, 1])]; + tensor var_400_pad_0 = const()[name = tensor("op_400_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_400_dilations_0 = const()[name = tensor("op_400_dilations_0"), val = tensor([1, 1])]; + tensor var_400_groups_0 = const()[name = tensor("op_400_groups_0"), val = tensor(1)]; + tensor layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11613568))), name = tensor("layers_0_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11533824))), shape = tensor([768, 3072, 1, 1])]; + tensor var_400_cast_fp16 = conv(dilations = var_400_dilations_0, groups = var_400_groups_0, pad = var_400_pad_0, pad_type = var_400_pad_type_0, strides = var_400_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_7_cast_fp16)[name = tensor("op_400_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = var_394_cast_fp16, y = var_400_cast_fp16)[name = tensor("hidden_states_5_cast_fp16")]; + tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor("inputs_5_cast_fp16")]; + tensor var_406 = const()[name = tensor("op_406"), val = tensor(3)]; + tensor out_5_axes_0 = const()[name = tensor("out_5_axes_0"), val = tensor([1])]; + tensor var_428_to_fp16 = const()[name = tensor("op_428_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_428_to_fp16, x = inputs_5_cast_fp16)[name = tensor("out_5_cast_fp16")]; + tensor obj_5_gamma_0_to_fp16 = const()[name = tensor("obj_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11908544)))]; + tensor obj_5_beta_0_to_fp16 = const()[name = tensor("obj_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11910144)))]; + tensor obj_5_epsilon_0_to_fp16 = const()[name = tensor("obj_5_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor("obj_5_cast_fp16")]; + tensor var_450_pad_type_0 = const()[name = tensor("op_450_pad_type_0"), val = tensor("valid")]; + tensor var_450_strides_0 = const()[name = tensor("op_450_strides_0"), val = tensor([1, 1])]; + tensor var_450_pad_0 = const()[name = tensor("op_450_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_450_dilations_0 = const()[name = tensor("op_450_dilations_0"), val = tensor([1, 1])]; + tensor var_450_groups_0 = const()[name = tensor("op_450_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11911744))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12206720))), name = tensor("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12206848)))]; + tensor var_450_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_450_dilations_0, groups = var_450_groups_0, pad = var_450_pad_0, pad_type = var_450_pad_type_0, strides = var_450_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = tensor("op_450_cast_fp16")]; + tensor var_456_pad_type_0 = const()[name = tensor("op_456_pad_type_0"), val = tensor("valid")]; + tensor var_456_strides_0 = const()[name = tensor("op_456_strides_0"), val = tensor([1, 1])]; + tensor var_456_pad_0 = const()[name = tensor("op_456_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_456_dilations_0 = const()[name = tensor("op_456_dilations_0"), val = tensor([1, 1])]; + tensor var_456_groups_0 = const()[name = tensor("op_456_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12226688))), name = tensor("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12208448))), shape = tensor([768, 768, 1, 1])]; + tensor var_456_cast_fp16 = conv(dilations = var_456_dilations_0, groups = var_456_groups_0, pad = var_456_pad_0, pad_type = var_456_pad_type_0, strides = var_456_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = tensor("op_456_cast_fp16")]; + tensor query_3_cast_fp16 = add(x = var_450_cast_fp16, y = var_456_cast_fp16)[name = tensor("query_3_cast_fp16")]; + tensor var_465_pad_type_0 = const()[name = tensor("op_465_pad_type_0"), val = tensor("valid")]; + tensor var_465_strides_0 = const()[name = tensor("op_465_strides_0"), val = tensor([1, 1])]; + tensor var_465_pad_0 = const()[name = tensor("op_465_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_465_dilations_0 = const()[name = tensor("op_465_dilations_0"), val = tensor([1, 1])]; + tensor var_465_groups_0 = const()[name = tensor("op_465_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12300480))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12595456))), name = tensor("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_465_cast_fp16 = conv(dilations = var_465_dilations_0, groups = var_465_groups_0, pad = var_465_pad_0, pad_type = var_465_pad_type_0, strides = var_465_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = tensor("op_465_cast_fp16")]; + tensor var_471_pad_type_0 = const()[name = tensor("op_471_pad_type_0"), val = tensor("valid")]; + tensor var_471_strides_0 = const()[name = tensor("op_471_strides_0"), val = tensor([1, 1])]; + tensor var_471_pad_0 = const()[name = tensor("op_471_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_471_dilations_0 = const()[name = tensor("op_471_dilations_0"), val = tensor([1, 1])]; + tensor var_471_groups_0 = const()[name = tensor("op_471_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12614464))), name = tensor("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12595584))), shape = tensor([768, 768, 1, 1])]; + tensor var_471_cast_fp16 = conv(dilations = var_471_dilations_0, groups = var_471_groups_0, pad = var_471_pad_0, pad_type = var_471_pad_type_0, strides = var_471_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = tensor("op_471_cast_fp16")]; + tensor key_3_cast_fp16 = add(x = var_465_cast_fp16, y = var_471_cast_fp16)[name = tensor("key_3_cast_fp16")]; + tensor var_481_pad_type_0 = const()[name = tensor("op_481_pad_type_0"), val = tensor("valid")]; + tensor var_481_strides_0 = const()[name = tensor("op_481_strides_0"), val = tensor([1, 1])]; + tensor var_481_pad_0 = const()[name = tensor("op_481_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_481_dilations_0 = const()[name = tensor("op_481_dilations_0"), val = tensor([1, 1])]; + tensor var_481_groups_0 = const()[name = tensor("op_481_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12688256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12983232))), name = tensor("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12983360)))]; + tensor var_481_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_481_dilations_0, groups = var_481_groups_0, pad = var_481_pad_0, pad_type = var_481_pad_type_0, strides = var_481_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_487_pad_type_0 = const()[name = tensor("op_487_pad_type_0"), val = tensor("valid")]; + tensor var_487_strides_0 = const()[name = tensor("op_487_strides_0"), val = tensor([1, 1])]; + tensor var_487_pad_0 = const()[name = tensor("op_487_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_487_dilations_0 = const()[name = tensor("op_487_dilations_0"), val = tensor([1, 1])]; + tensor var_487_groups_0 = const()[name = tensor("op_487_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13000768))), name = tensor("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12984960))), shape = tensor([768, 768, 1, 1])]; + tensor var_487_cast_fp16 = conv(dilations = var_487_dilations_0, groups = var_487_groups_0, pad = var_487_pad_0, pad_type = var_487_pad_type_0, strides = var_487_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = tensor("op_487_cast_fp16")]; + tensor value_3_cast_fp16 = add(x = var_481_cast_fp16, y = var_487_cast_fp16)[name = tensor("value_3_cast_fp16")]; + tensor var_491 = const()[name = tensor("op_491"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_3_cast_fp16 = reshape(shape = var_491, x = query_3_cast_fp16)[name = tensor("mh_q_3_cast_fp16")]; + tensor var_493_to_fp16 = const()[name = tensor("op_493_to_fp16"), val = tensor(0x1p-3)]; + tensor var_494_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_493_to_fp16)[name = tensor("op_494_cast_fp16")]; + tensor var_497 = const()[name = tensor("op_497"), val = tensor([1, 12, 64, 1500])]; + tensor var_498_cast_fp16 = reshape(shape = var_497, x = key_3_cast_fp16)[name = tensor("op_498_cast_fp16")]; + tensor mh_w_3_transpose_x_0 = const()[name = tensor("mh_w_3_transpose_x_0"), val = tensor(true)]; + tensor mh_w_3_transpose_y_0 = const()[name = tensor("mh_w_3_transpose_y_0"), val = tensor(false)]; + tensor mh_w_3_cast_fp16 = matmul(transpose_x = mh_w_3_transpose_x_0, transpose_y = mh_w_3_transpose_y_0, x = var_494_cast_fp16, y = var_498_cast_fp16)[name = tensor("mh_w_3_cast_fp16")]; + tensor var_501_cast_fp16 = softmax(axis = var_406, x = mh_w_3_cast_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_502 = const()[name = tensor("op_502"), val = tensor([1, 12, 64, 1500])]; + tensor var_503_cast_fp16 = reshape(shape = var_502, x = value_3_cast_fp16)[name = tensor("op_503_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_503_cast_fp16, y = var_501_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 768, 1, 1500])]; + tensor input_9_cast_fp16 = reshape(shape = var_506, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_516_pad_type_0 = const()[name = tensor("op_516_pad_type_0"), val = tensor("valid")]; + tensor var_516_strides_0 = const()[name = tensor("op_516_strides_0"), val = tensor([1, 1])]; + tensor var_516_pad_0 = const()[name = tensor("op_516_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_516_dilations_0 = const()[name = tensor("op_516_dilations_0"), val = tensor([1, 1])]; + tensor var_516_groups_0 = const()[name = tensor("op_516_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13074560))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13369536))), name = tensor("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13369664)))]; + tensor var_516_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_516_dilations_0, groups = var_516_groups_0, pad = var_516_pad_0, pad_type = var_516_pad_type_0, strides = var_516_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = tensor("op_516_cast_fp16")]; + tensor var_522_pad_type_0 = const()[name = tensor("op_522_pad_type_0"), val = tensor("valid")]; + tensor var_522_strides_0 = const()[name = tensor("op_522_strides_0"), val = tensor([1, 1])]; + tensor var_522_pad_0 = const()[name = tensor("op_522_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_522_dilations_0 = const()[name = tensor("op_522_dilations_0"), val = tensor([1, 1])]; + tensor var_522_groups_0 = const()[name = tensor("op_522_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13384576))), name = tensor("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13371264))), shape = tensor([768, 768, 1, 1])]; + tensor var_522_cast_fp16 = conv(dilations = var_522_dilations_0, groups = var_522_groups_0, pad = var_522_pad_0, pad_type = var_522_pad_type_0, strides = var_522_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = tensor("op_522_cast_fp16")]; + tensor obj_7_cast_fp16 = add(x = var_516_cast_fp16, y = var_522_cast_fp16)[name = tensor("obj_7_cast_fp16")]; + tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = tensor("inputs_7_cast_fp16")]; + tensor out_7_axes_0 = const()[name = tensor("out_7_axes_0"), val = tensor([1])]; + tensor var_533_to_fp16 = const()[name = tensor("op_533_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_533_to_fp16, x = inputs_7_cast_fp16)[name = tensor("out_7_cast_fp16")]; + tensor input_11_gamma_0_to_fp16 = const()[name = tensor("input_11_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13458368)))]; + tensor input_11_beta_0_to_fp16 = const()[name = tensor("input_11_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13459968)))]; + tensor input_11_epsilon_0_to_fp16 = const()[name = tensor("input_11_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_551_pad_type_0 = const()[name = tensor("op_551_pad_type_0"), val = tensor("valid")]; + tensor var_551_strides_0 = const()[name = tensor("op_551_strides_0"), val = tensor([1, 1])]; + tensor var_551_pad_0 = const()[name = tensor("op_551_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_551_dilations_0 = const()[name = tensor("op_551_dilations_0"), val = tensor([1, 1])]; + tensor var_551_groups_0 = const()[name = tensor("op_551_groups_0"), val = tensor(1)]; + tensor layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13461568))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14641280))), name = tensor("layers_1_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14641408)))]; + tensor var_551_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_551_dilations_0, groups = var_551_groups_0, pad = var_551_pad_0, pad_type = var_551_pad_type_0, strides = var_551_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = tensor("op_551_cast_fp16")]; + tensor var_557_pad_type_0 = const()[name = tensor("op_557_pad_type_0"), val = tensor("valid")]; + tensor var_557_strides_0 = const()[name = tensor("op_557_strides_0"), val = tensor([1, 1])]; + tensor var_557_pad_0 = const()[name = tensor("op_557_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_557_dilations_0 = const()[name = tensor("op_557_dilations_0"), val = tensor([1, 1])]; + tensor var_557_groups_0 = const()[name = tensor("op_557_groups_0"), val = tensor(1)]; + tensor layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14721024))), name = tensor("layers_1_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14647616))), shape = tensor([3072, 768, 1, 1])]; + tensor var_557_cast_fp16 = conv(dilations = var_557_dilations_0, groups = var_557_groups_0, pad = var_557_pad_0, pad_type = var_557_pad_type_0, strides = var_557_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = tensor("op_557_cast_fp16")]; + tensor input_13_cast_fp16 = add(x = var_551_cast_fp16, y = var_557_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor input_15_mode_0 = const()[name = tensor("input_15_mode_0"), val = tensor("EXACT")]; + tensor input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_568_pad_type_0 = const()[name = tensor("op_568_pad_type_0"), val = tensor("valid")]; + tensor var_568_strides_0 = const()[name = tensor("op_568_strides_0"), val = tensor([1, 1])]; + tensor var_568_pad_0 = const()[name = tensor("op_568_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_568_dilations_0 = const()[name = tensor("op_568_dilations_0"), val = tensor([1, 1])]; + tensor var_568_groups_0 = const()[name = tensor("op_568_groups_0"), val = tensor(1)]; + tensor layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15016000))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16195712))), name = tensor("layers_1_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16195840)))]; + tensor var_568_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_568_dilations_0, groups = var_568_groups_0, pad = var_568_pad_0, pad_type = var_568_pad_type_0, strides = var_568_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = tensor("op_568_cast_fp16")]; + tensor var_574_pad_type_0 = const()[name = tensor("op_574_pad_type_0"), val = tensor("valid")]; + tensor var_574_strides_0 = const()[name = tensor("op_574_strides_0"), val = tensor([1, 1])]; + tensor var_574_pad_0 = const()[name = tensor("op_574_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_574_dilations_0 = const()[name = tensor("op_574_dilations_0"), val = tensor([1, 1])]; + tensor var_574_groups_0 = const()[name = tensor("op_574_groups_0"), val = tensor(1)]; + tensor layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16268160))), name = tensor("layers_1_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16197440))), shape = tensor([768, 3072, 1, 1])]; + tensor var_574_cast_fp16 = conv(dilations = var_574_dilations_0, groups = var_574_groups_0, pad = var_574_pad_0, pad_type = var_574_pad_type_0, strides = var_574_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = tensor("op_574_cast_fp16")]; + tensor hidden_states_7_cast_fp16 = add(x = var_568_cast_fp16, y = var_574_cast_fp16)[name = tensor("hidden_states_7_cast_fp16")]; + tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor("inputs_9_cast_fp16")]; + tensor var_580 = const()[name = tensor("op_580"), val = tensor(3)]; + tensor out_9_axes_0 = const()[name = tensor("out_9_axes_0"), val = tensor([1])]; + tensor var_602_to_fp16 = const()[name = tensor("op_602_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_602_to_fp16, x = inputs_9_cast_fp16)[name = tensor("out_9_cast_fp16")]; + tensor obj_9_gamma_0_to_fp16 = const()[name = tensor("obj_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16563136)))]; + tensor obj_9_beta_0_to_fp16 = const()[name = tensor("obj_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16564736)))]; + tensor obj_9_epsilon_0_to_fp16 = const()[name = tensor("obj_9_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor("obj_9_cast_fp16")]; + tensor var_624_pad_type_0 = const()[name = tensor("op_624_pad_type_0"), val = tensor("valid")]; + tensor var_624_strides_0 = const()[name = tensor("op_624_strides_0"), val = tensor([1, 1])]; + tensor var_624_pad_0 = const()[name = tensor("op_624_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_624_dilations_0 = const()[name = tensor("op_624_dilations_0"), val = tensor([1, 1])]; + tensor var_624_groups_0 = const()[name = tensor("op_624_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16566336))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16861312))), name = tensor("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16861440)))]; + tensor var_624_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_624_dilations_0, groups = var_624_groups_0, pad = var_624_pad_0, pad_type = var_624_pad_type_0, strides = var_624_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_624_cast_fp16")]; + tensor var_630_pad_type_0 = const()[name = tensor("op_630_pad_type_0"), val = tensor("valid")]; + tensor var_630_strides_0 = const()[name = tensor("op_630_strides_0"), val = tensor([1, 1])]; + tensor var_630_pad_0 = const()[name = tensor("op_630_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_630_dilations_0 = const()[name = tensor("op_630_dilations_0"), val = tensor([1, 1])]; + tensor var_630_groups_0 = const()[name = tensor("op_630_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16878784))), name = tensor("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16863040))), shape = tensor([768, 768, 1, 1])]; + tensor var_630_cast_fp16 = conv(dilations = var_630_dilations_0, groups = var_630_groups_0, pad = var_630_pad_0, pad_type = var_630_pad_type_0, strides = var_630_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_630_cast_fp16")]; + tensor query_5_cast_fp16 = add(x = var_624_cast_fp16, y = var_630_cast_fp16)[name = tensor("query_5_cast_fp16")]; + tensor var_639_pad_type_0 = const()[name = tensor("op_639_pad_type_0"), val = tensor("valid")]; + tensor var_639_strides_0 = const()[name = tensor("op_639_strides_0"), val = tensor([1, 1])]; + tensor var_639_pad_0 = const()[name = tensor("op_639_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_639_dilations_0 = const()[name = tensor("op_639_dilations_0"), val = tensor([1, 1])]; + tensor var_639_groups_0 = const()[name = tensor("op_639_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16952576))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17247552))), name = tensor("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_639_cast_fp16 = conv(dilations = var_639_dilations_0, groups = var_639_groups_0, pad = var_639_pad_0, pad_type = var_639_pad_type_0, strides = var_639_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_639_cast_fp16")]; + tensor var_645_pad_type_0 = const()[name = tensor("op_645_pad_type_0"), val = tensor("valid")]; + tensor var_645_strides_0 = const()[name = tensor("op_645_strides_0"), val = tensor([1, 1])]; + tensor var_645_pad_0 = const()[name = tensor("op_645_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_645_dilations_0 = const()[name = tensor("op_645_dilations_0"), val = tensor([1, 1])]; + tensor var_645_groups_0 = const()[name = tensor("op_645_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17262400))), name = tensor("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17247680))), shape = tensor([768, 768, 1, 1])]; + tensor var_645_cast_fp16 = conv(dilations = var_645_dilations_0, groups = var_645_groups_0, pad = var_645_pad_0, pad_type = var_645_pad_type_0, strides = var_645_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_645_cast_fp16")]; + tensor key_5_cast_fp16 = add(x = var_639_cast_fp16, y = var_645_cast_fp16)[name = tensor("key_5_cast_fp16")]; + tensor var_655_pad_type_0 = const()[name = tensor("op_655_pad_type_0"), val = tensor("valid")]; + tensor var_655_strides_0 = const()[name = tensor("op_655_strides_0"), val = tensor([1, 1])]; + tensor var_655_pad_0 = const()[name = tensor("op_655_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_655_dilations_0 = const()[name = tensor("op_655_dilations_0"), val = tensor([1, 1])]; + tensor var_655_groups_0 = const()[name = tensor("op_655_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17336192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17631168))), name = tensor("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17631296)))]; + tensor var_655_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_655_dilations_0, groups = var_655_groups_0, pad = var_655_pad_0, pad_type = var_655_pad_type_0, strides = var_655_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_655_cast_fp16")]; + tensor var_661_pad_type_0 = const()[name = tensor("op_661_pad_type_0"), val = tensor("valid")]; + tensor var_661_strides_0 = const()[name = tensor("op_661_strides_0"), val = tensor([1, 1])]; + tensor var_661_pad_0 = const()[name = tensor("op_661_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_661_dilations_0 = const()[name = tensor("op_661_dilations_0"), val = tensor([1, 1])]; + tensor var_661_groups_0 = const()[name = tensor("op_661_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17644992))), name = tensor("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17632896))), shape = tensor([768, 768, 1, 1])]; + tensor var_661_cast_fp16 = conv(dilations = var_661_dilations_0, groups = var_661_groups_0, pad = var_661_pad_0, pad_type = var_661_pad_type_0, strides = var_661_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_661_cast_fp16")]; + tensor value_5_cast_fp16 = add(x = var_655_cast_fp16, y = var_661_cast_fp16)[name = tensor("value_5_cast_fp16")]; + tensor var_665 = const()[name = tensor("op_665"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_5_cast_fp16 = reshape(shape = var_665, x = query_5_cast_fp16)[name = tensor("mh_q_5_cast_fp16")]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1p-3)]; + tensor var_668_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_667_to_fp16)[name = tensor("op_668_cast_fp16")]; + tensor var_671 = const()[name = tensor("op_671"), val = tensor([1, 12, 64, 1500])]; + tensor var_672_cast_fp16 = reshape(shape = var_671, x = key_5_cast_fp16)[name = tensor("op_672_cast_fp16")]; + tensor mh_w_5_transpose_x_0 = const()[name = tensor("mh_w_5_transpose_x_0"), val = tensor(true)]; + tensor mh_w_5_transpose_y_0 = const()[name = tensor("mh_w_5_transpose_y_0"), val = tensor(false)]; + tensor mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_668_cast_fp16, y = var_672_cast_fp16)[name = tensor("mh_w_5_cast_fp16")]; + tensor var_675_cast_fp16 = softmax(axis = var_580, x = mh_w_5_cast_fp16)[name = tensor("op_675_cast_fp16")]; + tensor var_676 = const()[name = tensor("op_676"), val = tensor([1, 12, 64, 1500])]; + tensor var_677_cast_fp16 = reshape(shape = var_676, x = value_5_cast_fp16)[name = tensor("op_677_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_677_cast_fp16, y = var_675_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_680 = const()[name = tensor("op_680"), val = tensor([1, 768, 1, 1500])]; + tensor input_17_cast_fp16 = reshape(shape = var_680, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_690_pad_type_0 = const()[name = tensor("op_690_pad_type_0"), val = tensor("valid")]; + tensor var_690_strides_0 = const()[name = tensor("op_690_strides_0"), val = tensor([1, 1])]; + tensor var_690_pad_0 = const()[name = tensor("op_690_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_690_dilations_0 = const()[name = tensor("op_690_dilations_0"), val = tensor([1, 1])]; + tensor var_690_groups_0 = const()[name = tensor("op_690_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17718784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18013760))), name = tensor("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18013888)))]; + tensor var_690_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_690_dilations_0, groups = var_690_groups_0, pad = var_690_pad_0, pad_type = var_690_pad_type_0, strides = var_690_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_17_cast_fp16)[name = tensor("op_690_cast_fp16")]; + tensor var_696_pad_type_0 = const()[name = tensor("op_696_pad_type_0"), val = tensor("valid")]; + tensor var_696_strides_0 = const()[name = tensor("op_696_strides_0"), val = tensor([1, 1])]; + tensor var_696_pad_0 = const()[name = tensor("op_696_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_696_dilations_0 = const()[name = tensor("op_696_dilations_0"), val = tensor([1, 1])]; + tensor var_696_groups_0 = const()[name = tensor("op_696_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18025408))), name = tensor("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18015488))), shape = tensor([768, 768, 1, 1])]; + tensor var_696_cast_fp16 = conv(dilations = var_696_dilations_0, groups = var_696_groups_0, pad = var_696_pad_0, pad_type = var_696_pad_type_0, strides = var_696_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_17_cast_fp16)[name = tensor("op_696_cast_fp16")]; + tensor obj_11_cast_fp16 = add(x = var_690_cast_fp16, y = var_696_cast_fp16)[name = tensor("obj_11_cast_fp16")]; + tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = tensor("inputs_11_cast_fp16")]; + tensor out_11_axes_0 = const()[name = tensor("out_11_axes_0"), val = tensor([1])]; + tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_707_to_fp16, x = inputs_11_cast_fp16)[name = tensor("out_11_cast_fp16")]; + tensor input_19_gamma_0_to_fp16 = const()[name = tensor("input_19_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18099200)))]; + tensor input_19_beta_0_to_fp16 = const()[name = tensor("input_19_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18100800)))]; + tensor input_19_epsilon_0_to_fp16 = const()[name = tensor("input_19_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_725_pad_type_0 = const()[name = tensor("op_725_pad_type_0"), val = tensor("valid")]; + tensor var_725_strides_0 = const()[name = tensor("op_725_strides_0"), val = tensor([1, 1])]; + tensor var_725_pad_0 = const()[name = tensor("op_725_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_725_dilations_0 = const()[name = tensor("op_725_dilations_0"), val = tensor([1, 1])]; + tensor var_725_groups_0 = const()[name = tensor("op_725_groups_0"), val = tensor(1)]; + tensor layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18102400))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19282112))), name = tensor("layers_2_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19282240)))]; + tensor var_725_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_725_dilations_0, groups = var_725_groups_0, pad = var_725_pad_0, pad_type = var_725_pad_type_0, strides = var_725_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = tensor("op_725_cast_fp16")]; + tensor var_731_pad_type_0 = const()[name = tensor("op_731_pad_type_0"), val = tensor("valid")]; + tensor var_731_strides_0 = const()[name = tensor("op_731_strides_0"), val = tensor([1, 1])]; + tensor var_731_pad_0 = const()[name = tensor("op_731_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_731_dilations_0 = const()[name = tensor("op_731_dilations_0"), val = tensor([1, 1])]; + tensor var_731_groups_0 = const()[name = tensor("op_731_groups_0"), val = tensor(1)]; + tensor layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19352448))), name = tensor("layers_2_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19288448))), shape = tensor([3072, 768, 1, 1])]; + tensor var_731_cast_fp16 = conv(dilations = var_731_dilations_0, groups = var_731_groups_0, pad = var_731_pad_0, pad_type = var_731_pad_type_0, strides = var_731_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = tensor("op_731_cast_fp16")]; + tensor input_21_cast_fp16 = add(x = var_725_cast_fp16, y = var_731_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor input_23_mode_0 = const()[name = tensor("input_23_mode_0"), val = tensor("EXACT")]; + tensor input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_742_pad_type_0 = const()[name = tensor("op_742_pad_type_0"), val = tensor("valid")]; + tensor var_742_strides_0 = const()[name = tensor("op_742_strides_0"), val = tensor([1, 1])]; + tensor var_742_pad_0 = const()[name = tensor("op_742_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_742_dilations_0 = const()[name = tensor("op_742_dilations_0"), val = tensor([1, 1])]; + tensor var_742_groups_0 = const()[name = tensor("op_742_groups_0"), val = tensor(1)]; + tensor layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19647424))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20827136))), name = tensor("layers_2_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20827264)))]; + tensor var_742_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_742_dilations_0, groups = var_742_groups_0, pad = var_742_pad_0, pad_type = var_742_pad_type_0, strides = var_742_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = tensor("op_742_cast_fp16")]; + tensor var_748_pad_type_0 = const()[name = tensor("op_748_pad_type_0"), val = tensor("valid")]; + tensor var_748_strides_0 = const()[name = tensor("op_748_strides_0"), val = tensor([1, 1])]; + tensor var_748_pad_0 = const()[name = tensor("op_748_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_748_dilations_0 = const()[name = tensor("op_748_dilations_0"), val = tensor([1, 1])]; + tensor var_748_groups_0 = const()[name = tensor("op_748_groups_0"), val = tensor(1)]; + tensor layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20892032))), name = tensor("layers_2_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20828864))), shape = tensor([768, 3072, 1, 1])]; + tensor var_748_cast_fp16 = conv(dilations = var_748_dilations_0, groups = var_748_groups_0, pad = var_748_pad_0, pad_type = var_748_pad_type_0, strides = var_748_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = tensor("op_748_cast_fp16")]; + tensor hidden_states_9_cast_fp16 = add(x = var_742_cast_fp16, y = var_748_cast_fp16)[name = tensor("hidden_states_9_cast_fp16")]; + tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor("inputs_13_cast_fp16")]; + tensor var_754 = const()[name = tensor("op_754"), val = tensor(3)]; + tensor out_13_axes_0 = const()[name = tensor("out_13_axes_0"), val = tensor([1])]; + tensor var_776_to_fp16 = const()[name = tensor("op_776_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_776_to_fp16, x = inputs_13_cast_fp16)[name = tensor("out_13_cast_fp16")]; + tensor obj_13_gamma_0_to_fp16 = const()[name = tensor("obj_13_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21187008)))]; + tensor obj_13_beta_0_to_fp16 = const()[name = tensor("obj_13_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21188608)))]; + tensor obj_13_epsilon_0_to_fp16 = const()[name = tensor("obj_13_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor("obj_13_cast_fp16")]; + tensor var_798_pad_type_0 = const()[name = tensor("op_798_pad_type_0"), val = tensor("valid")]; + tensor var_798_strides_0 = const()[name = tensor("op_798_strides_0"), val = tensor([1, 1])]; + tensor var_798_pad_0 = const()[name = tensor("op_798_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_798_dilations_0 = const()[name = tensor("op_798_dilations_0"), val = tensor([1, 1])]; + tensor var_798_groups_0 = const()[name = tensor("op_798_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21190208))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21485184))), name = tensor("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21485312)))]; + tensor var_798_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_798_dilations_0, groups = var_798_groups_0, pad = var_798_pad_0, pad_type = var_798_pad_type_0, strides = var_798_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = tensor("op_798_cast_fp16")]; + tensor var_804_pad_type_0 = const()[name = tensor("op_804_pad_type_0"), val = tensor("valid")]; + tensor var_804_strides_0 = const()[name = tensor("op_804_strides_0"), val = tensor([1, 1])]; + tensor var_804_pad_0 = const()[name = tensor("op_804_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_804_dilations_0 = const()[name = tensor("op_804_dilations_0"), val = tensor([1, 1])]; + tensor var_804_groups_0 = const()[name = tensor("op_804_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21500352))), name = tensor("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21486912))), shape = tensor([768, 768, 1, 1])]; + tensor var_804_cast_fp16 = conv(dilations = var_804_dilations_0, groups = var_804_groups_0, pad = var_804_pad_0, pad_type = var_804_pad_type_0, strides = var_804_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = tensor("op_804_cast_fp16")]; + tensor query_7_cast_fp16 = add(x = var_798_cast_fp16, y = var_804_cast_fp16)[name = tensor("query_7_cast_fp16")]; + tensor var_813_pad_type_0 = const()[name = tensor("op_813_pad_type_0"), val = tensor("valid")]; + tensor var_813_strides_0 = const()[name = tensor("op_813_strides_0"), val = tensor([1, 1])]; + tensor var_813_pad_0 = const()[name = tensor("op_813_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_813_dilations_0 = const()[name = tensor("op_813_dilations_0"), val = tensor([1, 1])]; + tensor var_813_groups_0 = const()[name = tensor("op_813_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21574144))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21869120))), name = tensor("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_813_cast_fp16 = conv(dilations = var_813_dilations_0, groups = var_813_groups_0, pad = var_813_pad_0, pad_type = var_813_pad_type_0, strides = var_813_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = tensor("op_813_cast_fp16")]; + tensor var_819_pad_type_0 = const()[name = tensor("op_819_pad_type_0"), val = tensor("valid")]; + tensor var_819_strides_0 = const()[name = tensor("op_819_strides_0"), val = tensor([1, 1])]; + tensor var_819_pad_0 = const()[name = tensor("op_819_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_819_dilations_0 = const()[name = tensor("op_819_dilations_0"), val = tensor([1, 1])]; + tensor var_819_groups_0 = const()[name = tensor("op_819_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21881920))), name = tensor("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21869248))), shape = tensor([768, 768, 1, 1])]; + tensor var_819_cast_fp16 = conv(dilations = var_819_dilations_0, groups = var_819_groups_0, pad = var_819_pad_0, pad_type = var_819_pad_type_0, strides = var_819_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = tensor("op_819_cast_fp16")]; + tensor key_7_cast_fp16 = add(x = var_813_cast_fp16, y = var_819_cast_fp16)[name = tensor("key_7_cast_fp16")]; + tensor var_829_pad_type_0 = const()[name = tensor("op_829_pad_type_0"), val = tensor("valid")]; + tensor var_829_strides_0 = const()[name = tensor("op_829_strides_0"), val = tensor([1, 1])]; + tensor var_829_pad_0 = const()[name = tensor("op_829_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_829_dilations_0 = const()[name = tensor("op_829_dilations_0"), val = tensor([1, 1])]; + tensor var_829_groups_0 = const()[name = tensor("op_829_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21955712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22250688))), name = tensor("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22250816)))]; + tensor var_829_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_829_dilations_0, groups = var_829_groups_0, pad = var_829_pad_0, pad_type = var_829_pad_type_0, strides = var_829_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = tensor("op_829_cast_fp16")]; + tensor var_835_pad_type_0 = const()[name = tensor("op_835_pad_type_0"), val = tensor("valid")]; + tensor var_835_strides_0 = const()[name = tensor("op_835_strides_0"), val = tensor([1, 1])]; + tensor var_835_pad_0 = const()[name = tensor("op_835_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_835_dilations_0 = const()[name = tensor("op_835_dilations_0"), val = tensor([1, 1])]; + tensor var_835_groups_0 = const()[name = tensor("op_835_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22262080))), name = tensor("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22252416))), shape = tensor([768, 768, 1, 1])]; + tensor var_835_cast_fp16 = conv(dilations = var_835_dilations_0, groups = var_835_groups_0, pad = var_835_pad_0, pad_type = var_835_pad_type_0, strides = var_835_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = tensor("op_835_cast_fp16")]; + tensor value_7_cast_fp16 = add(x = var_829_cast_fp16, y = var_835_cast_fp16)[name = tensor("value_7_cast_fp16")]; + tensor var_839 = const()[name = tensor("op_839"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_7_cast_fp16 = reshape(shape = var_839, x = query_7_cast_fp16)[name = tensor("mh_q_7_cast_fp16")]; + tensor var_841_to_fp16 = const()[name = tensor("op_841_to_fp16"), val = tensor(0x1p-3)]; + tensor var_842_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_841_to_fp16)[name = tensor("op_842_cast_fp16")]; + tensor var_845 = const()[name = tensor("op_845"), val = tensor([1, 12, 64, 1500])]; + tensor var_846_cast_fp16 = reshape(shape = var_845, x = key_7_cast_fp16)[name = tensor("op_846_cast_fp16")]; + tensor mh_w_7_transpose_x_0 = const()[name = tensor("mh_w_7_transpose_x_0"), val = tensor(true)]; + tensor mh_w_7_transpose_y_0 = const()[name = tensor("mh_w_7_transpose_y_0"), val = tensor(false)]; + tensor mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_842_cast_fp16, y = var_846_cast_fp16)[name = tensor("mh_w_7_cast_fp16")]; + tensor var_849_cast_fp16 = softmax(axis = var_754, x = mh_w_7_cast_fp16)[name = tensor("op_849_cast_fp16")]; + tensor var_850 = const()[name = tensor("op_850"), val = tensor([1, 12, 64, 1500])]; + tensor var_851_cast_fp16 = reshape(shape = var_850, x = value_7_cast_fp16)[name = tensor("op_851_cast_fp16")]; + tensor attn_7_transpose_x_0 = const()[name = tensor("attn_7_transpose_x_0"), val = tensor(false)]; + tensor attn_7_transpose_y_0 = const()[name = tensor("attn_7_transpose_y_0"), val = tensor(true)]; + tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_851_cast_fp16, y = var_849_cast_fp16)[name = tensor("attn_7_cast_fp16")]; + tensor var_854 = const()[name = tensor("op_854"), val = tensor([1, 768, 1, 1500])]; + tensor input_25_cast_fp16 = reshape(shape = var_854, x = attn_7_cast_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_864_pad_type_0 = const()[name = tensor("op_864_pad_type_0"), val = tensor("valid")]; + tensor var_864_strides_0 = const()[name = tensor("op_864_strides_0"), val = tensor([1, 1])]; + tensor var_864_pad_0 = const()[name = tensor("op_864_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_864_dilations_0 = const()[name = tensor("op_864_dilations_0"), val = tensor([1, 1])]; + tensor var_864_groups_0 = const()[name = tensor("op_864_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22335872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22630848))), name = tensor("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22630976)))]; + tensor var_864_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_864_dilations_0, groups = var_864_groups_0, pad = var_864_pad_0, pad_type = var_864_pad_type_0, strides = var_864_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = tensor("op_864_cast_fp16")]; + tensor var_870_pad_type_0 = const()[name = tensor("op_870_pad_type_0"), val = tensor("valid")]; + tensor var_870_strides_0 = const()[name = tensor("op_870_strides_0"), val = tensor([1, 1])]; + tensor var_870_pad_0 = const()[name = tensor("op_870_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_870_dilations_0 = const()[name = tensor("op_870_dilations_0"), val = tensor([1, 1])]; + tensor var_870_groups_0 = const()[name = tensor("op_870_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22640640))), name = tensor("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22632576))), shape = tensor([768, 768, 1, 1])]; + tensor var_870_cast_fp16 = conv(dilations = var_870_dilations_0, groups = var_870_groups_0, pad = var_870_pad_0, pad_type = var_870_pad_type_0, strides = var_870_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = tensor("op_870_cast_fp16")]; + tensor obj_15_cast_fp16 = add(x = var_864_cast_fp16, y = var_870_cast_fp16)[name = tensor("obj_15_cast_fp16")]; + tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = tensor("inputs_15_cast_fp16")]; + tensor out_15_axes_0 = const()[name = tensor("out_15_axes_0"), val = tensor([1])]; + tensor var_881_to_fp16 = const()[name = tensor("op_881_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_881_to_fp16, x = inputs_15_cast_fp16)[name = tensor("out_15_cast_fp16")]; + tensor input_27_gamma_0_to_fp16 = const()[name = tensor("input_27_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22714432)))]; + tensor input_27_beta_0_to_fp16 = const()[name = tensor("input_27_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22716032)))]; + tensor input_27_epsilon_0_to_fp16 = const()[name = tensor("input_27_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_899_pad_type_0 = const()[name = tensor("op_899_pad_type_0"), val = tensor("valid")]; + tensor var_899_strides_0 = const()[name = tensor("op_899_strides_0"), val = tensor([1, 1])]; + tensor var_899_pad_0 = const()[name = tensor("op_899_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_899_dilations_0 = const()[name = tensor("op_899_dilations_0"), val = tensor([1, 1])]; + tensor var_899_groups_0 = const()[name = tensor("op_899_groups_0"), val = tensor(1)]; + tensor layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22717632))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23897344))), name = tensor("layers_3_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23897472)))]; + tensor var_899_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_899_dilations_0, groups = var_899_groups_0, pad = var_899_pad_0, pad_type = var_899_pad_type_0, strides = var_899_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = tensor("op_899_cast_fp16")]; + tensor var_905_pad_type_0 = const()[name = tensor("op_905_pad_type_0"), val = tensor("valid")]; + tensor var_905_strides_0 = const()[name = tensor("op_905_strides_0"), val = tensor([1, 1])]; + tensor var_905_pad_0 = const()[name = tensor("op_905_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_905_dilations_0 = const()[name = tensor("op_905_dilations_0"), val = tensor([1, 1])]; + tensor var_905_groups_0 = const()[name = tensor("op_905_groups_0"), val = tensor(1)]; + tensor layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23960640))), name = tensor("layers_3_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23903680))), shape = tensor([3072, 768, 1, 1])]; + tensor var_905_cast_fp16 = conv(dilations = var_905_dilations_0, groups = var_905_groups_0, pad = var_905_pad_0, pad_type = var_905_pad_type_0, strides = var_905_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_27_cast_fp16)[name = tensor("op_905_cast_fp16")]; + tensor input_29_cast_fp16 = add(x = var_899_cast_fp16, y = var_905_cast_fp16)[name = tensor("input_29_cast_fp16")]; + tensor input_31_mode_0 = const()[name = tensor("input_31_mode_0"), val = tensor("EXACT")]; + tensor input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor("input_31_cast_fp16")]; + tensor var_916_pad_type_0 = const()[name = tensor("op_916_pad_type_0"), val = tensor("valid")]; + tensor var_916_strides_0 = const()[name = tensor("op_916_strides_0"), val = tensor([1, 1])]; + tensor var_916_pad_0 = const()[name = tensor("op_916_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_916_dilations_0 = const()[name = tensor("op_916_dilations_0"), val = tensor([1, 1])]; + tensor var_916_groups_0 = const()[name = tensor("op_916_groups_0"), val = tensor(1)]; + tensor layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24255616))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25435328))), name = tensor("layers_3_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25435456)))]; + tensor var_916_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_916_dilations_0, groups = var_916_groups_0, pad = var_916_pad_0, pad_type = var_916_pad_type_0, strides = var_916_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = tensor("op_916_cast_fp16")]; + tensor var_922_pad_type_0 = const()[name = tensor("op_922_pad_type_0"), val = tensor("valid")]; + tensor var_922_strides_0 = const()[name = tensor("op_922_strides_0"), val = tensor([1, 1])]; + tensor var_922_pad_0 = const()[name = tensor("op_922_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_922_dilations_0 = const()[name = tensor("op_922_dilations_0"), val = tensor([1, 1])]; + tensor var_922_groups_0 = const()[name = tensor("op_922_groups_0"), val = tensor(1)]; + tensor layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25492672))), name = tensor("layers_3_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25437056))), shape = tensor([768, 3072, 1, 1])]; + tensor var_922_cast_fp16 = conv(dilations = var_922_dilations_0, groups = var_922_groups_0, pad = var_922_pad_0, pad_type = var_922_pad_type_0, strides = var_922_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = tensor("op_922_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = var_916_cast_fp16, y = var_922_cast_fp16)[name = tensor("hidden_states_11_cast_fp16")]; + tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor("inputs_17_cast_fp16")]; + tensor var_928 = const()[name = tensor("op_928"), val = tensor(3)]; + tensor out_17_axes_0 = const()[name = tensor("out_17_axes_0"), val = tensor([1])]; + tensor var_950_to_fp16 = const()[name = tensor("op_950_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_950_to_fp16, x = inputs_17_cast_fp16)[name = tensor("out_17_cast_fp16")]; + tensor obj_17_gamma_0_to_fp16 = const()[name = tensor("obj_17_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25787648)))]; + tensor obj_17_beta_0_to_fp16 = const()[name = tensor("obj_17_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25789248)))]; + tensor obj_17_epsilon_0_to_fp16 = const()[name = tensor("obj_17_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor("obj_17_cast_fp16")]; + tensor var_972_pad_type_0 = const()[name = tensor("op_972_pad_type_0"), val = tensor("valid")]; + tensor var_972_strides_0 = const()[name = tensor("op_972_strides_0"), val = tensor([1, 1])]; + tensor var_972_pad_0 = const()[name = tensor("op_972_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_972_dilations_0 = const()[name = tensor("op_972_dilations_0"), val = tensor([1, 1])]; + tensor var_972_groups_0 = const()[name = tensor("op_972_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25790848))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26085824))), name = tensor("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26085952)))]; + tensor var_972_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_972_dilations_0, groups = var_972_groups_0, pad = var_972_pad_0, pad_type = var_972_pad_type_0, strides = var_972_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_978_pad_type_0 = const()[name = tensor("op_978_pad_type_0"), val = tensor("valid")]; + tensor var_978_strides_0 = const()[name = tensor("op_978_strides_0"), val = tensor([1, 1])]; + tensor var_978_pad_0 = const()[name = tensor("op_978_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_978_dilations_0 = const()[name = tensor("op_978_dilations_0"), val = tensor([1, 1])]; + tensor var_978_groups_0 = const()[name = tensor("op_978_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26100672))), name = tensor("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26087552))), shape = tensor([768, 768, 1, 1])]; + tensor var_978_cast_fp16 = conv(dilations = var_978_dilations_0, groups = var_978_groups_0, pad = var_978_pad_0, pad_type = var_978_pad_type_0, strides = var_978_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = tensor("op_978_cast_fp16")]; + tensor query_9_cast_fp16 = add(x = var_972_cast_fp16, y = var_978_cast_fp16)[name = tensor("query_9_cast_fp16")]; + tensor var_987_pad_type_0 = const()[name = tensor("op_987_pad_type_0"), val = tensor("valid")]; + tensor var_987_strides_0 = const()[name = tensor("op_987_strides_0"), val = tensor([1, 1])]; + tensor var_987_pad_0 = const()[name = tensor("op_987_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_987_dilations_0 = const()[name = tensor("op_987_dilations_0"), val = tensor([1, 1])]; + tensor var_987_groups_0 = const()[name = tensor("op_987_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26174464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26469440))), name = tensor("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_987_cast_fp16 = conv(dilations = var_987_dilations_0, groups = var_987_groups_0, pad = var_987_pad_0, pad_type = var_987_pad_type_0, strides = var_987_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = tensor("op_987_cast_fp16")]; + tensor var_993_pad_type_0 = const()[name = tensor("op_993_pad_type_0"), val = tensor("valid")]; + tensor var_993_strides_0 = const()[name = tensor("op_993_strides_0"), val = tensor([1, 1])]; + tensor var_993_pad_0 = const()[name = tensor("op_993_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_993_dilations_0 = const()[name = tensor("op_993_dilations_0"), val = tensor([1, 1])]; + tensor var_993_groups_0 = const()[name = tensor("op_993_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26482240))), name = tensor("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26469568))), shape = tensor([768, 768, 1, 1])]; + tensor var_993_cast_fp16 = conv(dilations = var_993_dilations_0, groups = var_993_groups_0, pad = var_993_pad_0, pad_type = var_993_pad_type_0, strides = var_993_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = tensor("op_993_cast_fp16")]; + tensor key_9_cast_fp16 = add(x = var_987_cast_fp16, y = var_993_cast_fp16)[name = tensor("key_9_cast_fp16")]; + tensor var_1003_pad_type_0 = const()[name = tensor("op_1003_pad_type_0"), val = tensor("valid")]; + tensor var_1003_strides_0 = const()[name = tensor("op_1003_strides_0"), val = tensor([1, 1])]; + tensor var_1003_pad_0 = const()[name = tensor("op_1003_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1003_dilations_0 = const()[name = tensor("op_1003_dilations_0"), val = tensor([1, 1])]; + tensor var_1003_groups_0 = const()[name = tensor("op_1003_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26556032))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26851008))), name = tensor("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26851136)))]; + tensor var_1003_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1003_dilations_0, groups = var_1003_groups_0, pad = var_1003_pad_0, pad_type = var_1003_pad_type_0, strides = var_1003_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = tensor("op_1003_cast_fp16")]; + tensor var_1009_pad_type_0 = const()[name = tensor("op_1009_pad_type_0"), val = tensor("valid")]; + tensor var_1009_strides_0 = const()[name = tensor("op_1009_strides_0"), val = tensor([1, 1])]; + tensor var_1009_pad_0 = const()[name = tensor("op_1009_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1009_dilations_0 = const()[name = tensor("op_1009_dilations_0"), val = tensor([1, 1])]; + tensor var_1009_groups_0 = const()[name = tensor("op_1009_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26862144))), name = tensor("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26852736))), shape = tensor([768, 768, 1, 1])]; + tensor var_1009_cast_fp16 = conv(dilations = var_1009_dilations_0, groups = var_1009_groups_0, pad = var_1009_pad_0, pad_type = var_1009_pad_type_0, strides = var_1009_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = tensor("op_1009_cast_fp16")]; + tensor value_9_cast_fp16 = add(x = var_1003_cast_fp16, y = var_1009_cast_fp16)[name = tensor("value_9_cast_fp16")]; + tensor var_1013 = const()[name = tensor("op_1013"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_9_cast_fp16 = reshape(shape = var_1013, x = query_9_cast_fp16)[name = tensor("mh_q_9_cast_fp16")]; + tensor var_1015_to_fp16 = const()[name = tensor("op_1015_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1016_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_1015_to_fp16)[name = tensor("op_1016_cast_fp16")]; + tensor var_1019 = const()[name = tensor("op_1019"), val = tensor([1, 12, 64, 1500])]; + tensor var_1020_cast_fp16 = reshape(shape = var_1019, x = key_9_cast_fp16)[name = tensor("op_1020_cast_fp16")]; + tensor mh_w_9_transpose_x_0 = const()[name = tensor("mh_w_9_transpose_x_0"), val = tensor(true)]; + tensor mh_w_9_transpose_y_0 = const()[name = tensor("mh_w_9_transpose_y_0"), val = tensor(false)]; + tensor mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_1016_cast_fp16, y = var_1020_cast_fp16)[name = tensor("mh_w_9_cast_fp16")]; + tensor var_1023_cast_fp16 = softmax(axis = var_928, x = mh_w_9_cast_fp16)[name = tensor("op_1023_cast_fp16")]; + tensor var_1024 = const()[name = tensor("op_1024"), val = tensor([1, 12, 64, 1500])]; + tensor var_1025_cast_fp16 = reshape(shape = var_1024, x = value_9_cast_fp16)[name = tensor("op_1025_cast_fp16")]; + tensor attn_9_transpose_x_0 = const()[name = tensor("attn_9_transpose_x_0"), val = tensor(false)]; + tensor attn_9_transpose_y_0 = const()[name = tensor("attn_9_transpose_y_0"), val = tensor(true)]; + tensor attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_1025_cast_fp16, y = var_1023_cast_fp16)[name = tensor("attn_9_cast_fp16")]; + tensor var_1028 = const()[name = tensor("op_1028"), val = tensor([1, 768, 1, 1500])]; + tensor input_33_cast_fp16 = reshape(shape = var_1028, x = attn_9_cast_fp16)[name = tensor("input_33_cast_fp16")]; + tensor var_1038_pad_type_0 = const()[name = tensor("op_1038_pad_type_0"), val = tensor("valid")]; + tensor var_1038_strides_0 = const()[name = tensor("op_1038_strides_0"), val = tensor([1, 1])]; + tensor var_1038_pad_0 = const()[name = tensor("op_1038_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1038_dilations_0 = const()[name = tensor("op_1038_dilations_0"), val = tensor([1, 1])]; + tensor var_1038_groups_0 = const()[name = tensor("op_1038_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26935936))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27230912))), name = tensor("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27231040)))]; + tensor var_1038_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1038_dilations_0, groups = var_1038_groups_0, pad = var_1038_pad_0, pad_type = var_1038_pad_type_0, strides = var_1038_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = tensor("op_1038_cast_fp16")]; + tensor var_1044_pad_type_0 = const()[name = tensor("op_1044_pad_type_0"), val = tensor("valid")]; + tensor var_1044_strides_0 = const()[name = tensor("op_1044_strides_0"), val = tensor([1, 1])]; + tensor var_1044_pad_0 = const()[name = tensor("op_1044_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1044_dilations_0 = const()[name = tensor("op_1044_dilations_0"), val = tensor([1, 1])]; + tensor var_1044_groups_0 = const()[name = tensor("op_1044_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27241344))), name = tensor("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27232640))), shape = tensor([768, 768, 1, 1])]; + tensor var_1044_cast_fp16 = conv(dilations = var_1044_dilations_0, groups = var_1044_groups_0, pad = var_1044_pad_0, pad_type = var_1044_pad_type_0, strides = var_1044_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = tensor("op_1044_cast_fp16")]; + tensor obj_19_cast_fp16 = add(x = var_1038_cast_fp16, y = var_1044_cast_fp16)[name = tensor("obj_19_cast_fp16")]; + tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = tensor("inputs_19_cast_fp16")]; + tensor out_19_axes_0 = const()[name = tensor("out_19_axes_0"), val = tensor([1])]; + tensor var_1055_to_fp16 = const()[name = tensor("op_1055_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_1055_to_fp16, x = inputs_19_cast_fp16)[name = tensor("out_19_cast_fp16")]; + tensor input_35_gamma_0_to_fp16 = const()[name = tensor("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27315136)))]; + tensor input_35_beta_0_to_fp16 = const()[name = tensor("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27316736)))]; + tensor input_35_epsilon_0_to_fp16 = const()[name = tensor("input_35_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor("input_35_cast_fp16")]; + tensor var_1073_pad_type_0 = const()[name = tensor("op_1073_pad_type_0"), val = tensor("valid")]; + tensor var_1073_strides_0 = const()[name = tensor("op_1073_strides_0"), val = tensor([1, 1])]; + tensor var_1073_pad_0 = const()[name = tensor("op_1073_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1073_dilations_0 = const()[name = tensor("op_1073_dilations_0"), val = tensor([1, 1])]; + tensor var_1073_groups_0 = const()[name = tensor("op_1073_groups_0"), val = tensor(1)]; + tensor layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27318336))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28498048))), name = tensor("layers_4_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28498176)))]; + tensor var_1073_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_1073_dilations_0, groups = var_1073_groups_0, pad = var_1073_pad_0, pad_type = var_1073_pad_type_0, strides = var_1073_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = tensor("op_1073_cast_fp16")]; + tensor var_1079_pad_type_0 = const()[name = tensor("op_1079_pad_type_0"), val = tensor("valid")]; + tensor var_1079_strides_0 = const()[name = tensor("op_1079_strides_0"), val = tensor([1, 1])]; + tensor var_1079_pad_0 = const()[name = tensor("op_1079_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1079_dilations_0 = const()[name = tensor("op_1079_dilations_0"), val = tensor([1, 1])]; + tensor var_1079_groups_0 = const()[name = tensor("op_1079_groups_0"), val = tensor(1)]; + tensor layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28545344))), name = tensor("layers_4_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28504384))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1079_cast_fp16 = conv(dilations = var_1079_dilations_0, groups = var_1079_groups_0, pad = var_1079_pad_0, pad_type = var_1079_pad_type_0, strides = var_1079_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = tensor("op_1079_cast_fp16")]; + tensor input_37_cast_fp16 = add(x = var_1073_cast_fp16, y = var_1079_cast_fp16)[name = tensor("input_37_cast_fp16")]; + tensor input_39_mode_0 = const()[name = tensor("input_39_mode_0"), val = tensor("EXACT")]; + tensor input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor("input_39_cast_fp16")]; + tensor var_1090_pad_type_0 = const()[name = tensor("op_1090_pad_type_0"), val = tensor("valid")]; + tensor var_1090_strides_0 = const()[name = tensor("op_1090_strides_0"), val = tensor([1, 1])]; + tensor var_1090_pad_0 = const()[name = tensor("op_1090_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1090_dilations_0 = const()[name = tensor("op_1090_dilations_0"), val = tensor([1, 1])]; + tensor var_1090_groups_0 = const()[name = tensor("op_1090_groups_0"), val = tensor(1)]; + tensor layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28840320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30020032))), name = tensor("layers_4_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30020160)))]; + tensor var_1090_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_1090_dilations_0, groups = var_1090_groups_0, pad = var_1090_pad_0, pad_type = var_1090_pad_type_0, strides = var_1090_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1096_pad_type_0 = const()[name = tensor("op_1096_pad_type_0"), val = tensor("valid")]; + tensor var_1096_strides_0 = const()[name = tensor("op_1096_strides_0"), val = tensor([1, 1])]; + tensor var_1096_pad_0 = const()[name = tensor("op_1096_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1096_dilations_0 = const()[name = tensor("op_1096_dilations_0"), val = tensor([1, 1])]; + tensor var_1096_groups_0 = const()[name = tensor("op_1096_groups_0"), val = tensor(1)]; + tensor layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30065024))), name = tensor("layers_4_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30021760))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1096_cast_fp16 = conv(dilations = var_1096_dilations_0, groups = var_1096_groups_0, pad = var_1096_pad_0, pad_type = var_1096_pad_type_0, strides = var_1096_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = tensor("op_1096_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = var_1090_cast_fp16, y = var_1096_cast_fp16)[name = tensor("hidden_states_13_cast_fp16")]; + tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor("inputs_21_cast_fp16")]; + tensor var_1102 = const()[name = tensor("op_1102"), val = tensor(3)]; + tensor out_21_axes_0 = const()[name = tensor("out_21_axes_0"), val = tensor([1])]; + tensor var_1124_to_fp16 = const()[name = tensor("op_1124_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1124_to_fp16, x = inputs_21_cast_fp16)[name = tensor("out_21_cast_fp16")]; + tensor obj_21_gamma_0_to_fp16 = const()[name = tensor("obj_21_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30360000)))]; + tensor obj_21_beta_0_to_fp16 = const()[name = tensor("obj_21_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30361600)))]; + tensor obj_21_epsilon_0_to_fp16 = const()[name = tensor("obj_21_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor("obj_21_cast_fp16")]; + tensor var_1146_pad_type_0 = const()[name = tensor("op_1146_pad_type_0"), val = tensor("valid")]; + tensor var_1146_strides_0 = const()[name = tensor("op_1146_strides_0"), val = tensor([1, 1])]; + tensor var_1146_pad_0 = const()[name = tensor("op_1146_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1146_dilations_0 = const()[name = tensor("op_1146_dilations_0"), val = tensor([1, 1])]; + tensor var_1146_groups_0 = const()[name = tensor("op_1146_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30363200))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30658176))), name = tensor("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30658304)))]; + tensor var_1146_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1146_dilations_0, groups = var_1146_groups_0, pad = var_1146_pad_0, pad_type = var_1146_pad_type_0, strides = var_1146_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor var_1152_pad_type_0 = const()[name = tensor("op_1152_pad_type_0"), val = tensor("valid")]; + tensor var_1152_strides_0 = const()[name = tensor("op_1152_strides_0"), val = tensor([1, 1])]; + tensor var_1152_pad_0 = const()[name = tensor("op_1152_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1152_dilations_0 = const()[name = tensor("op_1152_dilations_0"), val = tensor([1, 1])]; + tensor var_1152_groups_0 = const()[name = tensor("op_1152_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30670336))), name = tensor("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30659904))), shape = tensor([768, 768, 1, 1])]; + tensor var_1152_cast_fp16 = conv(dilations = var_1152_dilations_0, groups = var_1152_groups_0, pad = var_1152_pad_0, pad_type = var_1152_pad_type_0, strides = var_1152_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = tensor("op_1152_cast_fp16")]; + tensor query_11_cast_fp16 = add(x = var_1146_cast_fp16, y = var_1152_cast_fp16)[name = tensor("query_11_cast_fp16")]; + tensor var_1161_pad_type_0 = const()[name = tensor("op_1161_pad_type_0"), val = tensor("valid")]; + tensor var_1161_strides_0 = const()[name = tensor("op_1161_strides_0"), val = tensor([1, 1])]; + tensor var_1161_pad_0 = const()[name = tensor("op_1161_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1161_dilations_0 = const()[name = tensor("op_1161_dilations_0"), val = tensor([1, 1])]; + tensor var_1161_groups_0 = const()[name = tensor("op_1161_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30744128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31039104))), name = tensor("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1161_cast_fp16 = conv(dilations = var_1161_dilations_0, groups = var_1161_groups_0, pad = var_1161_pad_0, pad_type = var_1161_pad_type_0, strides = var_1161_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = tensor("op_1161_cast_fp16")]; + tensor var_1167_pad_type_0 = const()[name = tensor("op_1167_pad_type_0"), val = tensor("valid")]; + tensor var_1167_strides_0 = const()[name = tensor("op_1167_strides_0"), val = tensor([1, 1])]; + tensor var_1167_pad_0 = const()[name = tensor("op_1167_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1167_dilations_0 = const()[name = tensor("op_1167_dilations_0"), val = tensor([1, 1])]; + tensor var_1167_groups_0 = const()[name = tensor("op_1167_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31048960))), name = tensor("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31039232))), shape = tensor([768, 768, 1, 1])]; + tensor var_1167_cast_fp16 = conv(dilations = var_1167_dilations_0, groups = var_1167_groups_0, pad = var_1167_pad_0, pad_type = var_1167_pad_type_0, strides = var_1167_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = tensor("op_1167_cast_fp16")]; + tensor key_11_cast_fp16 = add(x = var_1161_cast_fp16, y = var_1167_cast_fp16)[name = tensor("key_11_cast_fp16")]; + tensor var_1177_pad_type_0 = const()[name = tensor("op_1177_pad_type_0"), val = tensor("valid")]; + tensor var_1177_strides_0 = const()[name = tensor("op_1177_strides_0"), val = tensor([1, 1])]; + tensor var_1177_pad_0 = const()[name = tensor("op_1177_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1177_dilations_0 = const()[name = tensor("op_1177_dilations_0"), val = tensor([1, 1])]; + tensor var_1177_groups_0 = const()[name = tensor("op_1177_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31122752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31417728))), name = tensor("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31417856)))]; + tensor var_1177_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1177_dilations_0, groups = var_1177_groups_0, pad = var_1177_pad_0, pad_type = var_1177_pad_type_0, strides = var_1177_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = tensor("op_1177_cast_fp16")]; + tensor var_1183_pad_type_0 = const()[name = tensor("op_1183_pad_type_0"), val = tensor("valid")]; + tensor var_1183_strides_0 = const()[name = tensor("op_1183_strides_0"), val = tensor([1, 1])]; + tensor var_1183_pad_0 = const()[name = tensor("op_1183_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1183_dilations_0 = const()[name = tensor("op_1183_dilations_0"), val = tensor([1, 1])]; + tensor var_1183_groups_0 = const()[name = tensor("op_1183_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31426368))), name = tensor("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31419456))), shape = tensor([768, 768, 1, 1])]; + tensor var_1183_cast_fp16 = conv(dilations = var_1183_dilations_0, groups = var_1183_groups_0, pad = var_1183_pad_0, pad_type = var_1183_pad_type_0, strides = var_1183_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = tensor("op_1183_cast_fp16")]; + tensor value_11_cast_fp16 = add(x = var_1177_cast_fp16, y = var_1183_cast_fp16)[name = tensor("value_11_cast_fp16")]; + tensor var_1187 = const()[name = tensor("op_1187"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_11_cast_fp16 = reshape(shape = var_1187, x = query_11_cast_fp16)[name = tensor("mh_q_11_cast_fp16")]; + tensor var_1189_to_fp16 = const()[name = tensor("op_1189_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1190_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_1189_to_fp16)[name = tensor("op_1190_cast_fp16")]; + tensor var_1193 = const()[name = tensor("op_1193"), val = tensor([1, 12, 64, 1500])]; + tensor var_1194_cast_fp16 = reshape(shape = var_1193, x = key_11_cast_fp16)[name = tensor("op_1194_cast_fp16")]; + tensor mh_w_11_transpose_x_0 = const()[name = tensor("mh_w_11_transpose_x_0"), val = tensor(true)]; + tensor mh_w_11_transpose_y_0 = const()[name = tensor("mh_w_11_transpose_y_0"), val = tensor(false)]; + tensor mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_1190_cast_fp16, y = var_1194_cast_fp16)[name = tensor("mh_w_11_cast_fp16")]; + tensor var_1197_cast_fp16 = softmax(axis = var_1102, x = mh_w_11_cast_fp16)[name = tensor("op_1197_cast_fp16")]; + tensor var_1198 = const()[name = tensor("op_1198"), val = tensor([1, 12, 64, 1500])]; + tensor var_1199_cast_fp16 = reshape(shape = var_1198, x = value_11_cast_fp16)[name = tensor("op_1199_cast_fp16")]; + tensor attn_11_transpose_x_0 = const()[name = tensor("attn_11_transpose_x_0"), val = tensor(false)]; + tensor attn_11_transpose_y_0 = const()[name = tensor("attn_11_transpose_y_0"), val = tensor(true)]; + tensor attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_1199_cast_fp16, y = var_1197_cast_fp16)[name = tensor("attn_11_cast_fp16")]; + tensor var_1202 = const()[name = tensor("op_1202"), val = tensor([1, 768, 1, 1500])]; + tensor input_41_cast_fp16 = reshape(shape = var_1202, x = attn_11_cast_fp16)[name = tensor("input_41_cast_fp16")]; + tensor var_1212_pad_type_0 = const()[name = tensor("op_1212_pad_type_0"), val = tensor("valid")]; + tensor var_1212_strides_0 = const()[name = tensor("op_1212_strides_0"), val = tensor([1, 1])]; + tensor var_1212_pad_0 = const()[name = tensor("op_1212_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1212_dilations_0 = const()[name = tensor("op_1212_dilations_0"), val = tensor([1, 1])]; + tensor var_1212_groups_0 = const()[name = tensor("op_1212_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31500160))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31795136))), name = tensor("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31795264)))]; + tensor var_1212_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1212_dilations_0, groups = var_1212_groups_0, pad = var_1212_pad_0, pad_type = var_1212_pad_type_0, strides = var_1212_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = tensor("op_1212_cast_fp16")]; + tensor var_1218_pad_type_0 = const()[name = tensor("op_1218_pad_type_0"), val = tensor("valid")]; + tensor var_1218_strides_0 = const()[name = tensor("op_1218_strides_0"), val = tensor([1, 1])]; + tensor var_1218_pad_0 = const()[name = tensor("op_1218_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1218_dilations_0 = const()[name = tensor("op_1218_dilations_0"), val = tensor([1, 1])]; + tensor var_1218_groups_0 = const()[name = tensor("op_1218_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31804736))), name = tensor("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31796864))), shape = tensor([768, 768, 1, 1])]; + tensor var_1218_cast_fp16 = conv(dilations = var_1218_dilations_0, groups = var_1218_groups_0, pad = var_1218_pad_0, pad_type = var_1218_pad_type_0, strides = var_1218_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor obj_23_cast_fp16 = add(x = var_1212_cast_fp16, y = var_1218_cast_fp16)[name = tensor("obj_23_cast_fp16")]; + tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_23_cast_fp16)[name = tensor("inputs_23_cast_fp16")]; + tensor out_23_axes_0 = const()[name = tensor("out_23_axes_0"), val = tensor([1])]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1229_to_fp16, x = inputs_23_cast_fp16)[name = tensor("out_23_cast_fp16")]; + tensor input_43_gamma_0_to_fp16 = const()[name = tensor("input_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31878528)))]; + tensor input_43_beta_0_to_fp16 = const()[name = tensor("input_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31880128)))]; + tensor input_43_epsilon_0_to_fp16 = const()[name = tensor("input_43_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor("input_43_cast_fp16")]; + tensor var_1247_pad_type_0 = const()[name = tensor("op_1247_pad_type_0"), val = tensor("valid")]; + tensor var_1247_strides_0 = const()[name = tensor("op_1247_strides_0"), val = tensor([1, 1])]; + tensor var_1247_pad_0 = const()[name = tensor("op_1247_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1247_dilations_0 = const()[name = tensor("op_1247_dilations_0"), val = tensor([1, 1])]; + tensor var_1247_groups_0 = const()[name = tensor("op_1247_groups_0"), val = tensor(1)]; + tensor layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31881728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33061440))), name = tensor("layers_5_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33061568)))]; + tensor var_1247_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_1247_dilations_0, groups = var_1247_groups_0, pad = var_1247_pad_0, pad_type = var_1247_pad_type_0, strides = var_1247_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = tensor("op_1247_cast_fp16")]; + tensor var_1253_pad_type_0 = const()[name = tensor("op_1253_pad_type_0"), val = tensor("valid")]; + tensor var_1253_strides_0 = const()[name = tensor("op_1253_strides_0"), val = tensor([1, 1])]; + tensor var_1253_pad_0 = const()[name = tensor("op_1253_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1253_dilations_0 = const()[name = tensor("op_1253_dilations_0"), val = tensor([1, 1])]; + tensor var_1253_groups_0 = const()[name = tensor("op_1253_groups_0"), val = tensor(1)]; + tensor layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33101248))), name = tensor("layers_5_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33067776))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1253_cast_fp16 = conv(dilations = var_1253_dilations_0, groups = var_1253_groups_0, pad = var_1253_pad_0, pad_type = var_1253_pad_type_0, strides = var_1253_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = tensor("op_1253_cast_fp16")]; + tensor input_45_cast_fp16 = add(x = var_1247_cast_fp16, y = var_1253_cast_fp16)[name = tensor("input_45_cast_fp16")]; + tensor input_47_mode_0 = const()[name = tensor("input_47_mode_0"), val = tensor("EXACT")]; + tensor input_47_cast_fp16 = gelu(mode = input_47_mode_0, x = input_45_cast_fp16)[name = tensor("input_47_cast_fp16")]; + tensor var_1264_pad_type_0 = const()[name = tensor("op_1264_pad_type_0"), val = tensor("valid")]; + tensor var_1264_strides_0 = const()[name = tensor("op_1264_strides_0"), val = tensor([1, 1])]; + tensor var_1264_pad_0 = const()[name = tensor("op_1264_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1264_dilations_0 = const()[name = tensor("op_1264_dilations_0"), val = tensor([1, 1])]; + tensor var_1264_groups_0 = const()[name = tensor("op_1264_groups_0"), val = tensor(1)]; + tensor layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33396224))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34575936))), name = tensor("layers_5_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34576064)))]; + tensor var_1264_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_1264_dilations_0, groups = var_1264_groups_0, pad = var_1264_pad_0, pad_type = var_1264_pad_type_0, strides = var_1264_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_47_cast_fp16)[name = tensor("op_1264_cast_fp16")]; + tensor var_1270_pad_type_0 = const()[name = tensor("op_1270_pad_type_0"), val = tensor("valid")]; + tensor var_1270_strides_0 = const()[name = tensor("op_1270_strides_0"), val = tensor([1, 1])]; + tensor var_1270_pad_0 = const()[name = tensor("op_1270_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1270_dilations_0 = const()[name = tensor("op_1270_dilations_0"), val = tensor([1, 1])]; + tensor var_1270_groups_0 = const()[name = tensor("op_1270_groups_0"), val = tensor(1)]; + tensor layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34614208))), name = tensor("layers_5_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34577664))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1270_cast_fp16 = conv(dilations = var_1270_dilations_0, groups = var_1270_groups_0, pad = var_1270_pad_0, pad_type = var_1270_pad_type_0, strides = var_1270_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_47_cast_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = add(x = var_1264_cast_fp16, y = var_1270_cast_fp16)[name = tensor("hidden_states_15_cast_fp16")]; + tensor inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_15_cast_fp16)[name = tensor("inputs_25_cast_fp16")]; + tensor var_1276 = const()[name = tensor("op_1276"), val = tensor(3)]; + tensor out_25_axes_0 = const()[name = tensor("out_25_axes_0"), val = tensor([1])]; + tensor var_1298_to_fp16 = const()[name = tensor("op_1298_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1298_to_fp16, x = inputs_25_cast_fp16)[name = tensor("out_25_cast_fp16")]; + tensor obj_25_gamma_0_to_fp16 = const()[name = tensor("obj_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34909184)))]; + tensor obj_25_beta_0_to_fp16 = const()[name = tensor("obj_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34910784)))]; + tensor obj_25_epsilon_0_to_fp16 = const()[name = tensor("obj_25_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor("obj_25_cast_fp16")]; + tensor var_1320_pad_type_0 = const()[name = tensor("op_1320_pad_type_0"), val = tensor("valid")]; + tensor var_1320_strides_0 = const()[name = tensor("op_1320_strides_0"), val = tensor([1, 1])]; + tensor var_1320_pad_0 = const()[name = tensor("op_1320_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1320_dilations_0 = const()[name = tensor("op_1320_dilations_0"), val = tensor([1, 1])]; + tensor var_1320_groups_0 = const()[name = tensor("op_1320_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34912384))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35207360))), name = tensor("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35207488)))]; + tensor var_1320_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1320_dilations_0, groups = var_1320_groups_0, pad = var_1320_pad_0, pad_type = var_1320_pad_type_0, strides = var_1320_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1326_pad_type_0 = const()[name = tensor("op_1326_pad_type_0"), val = tensor("valid")]; + tensor var_1326_strides_0 = const()[name = tensor("op_1326_strides_0"), val = tensor([1, 1])]; + tensor var_1326_pad_0 = const()[name = tensor("op_1326_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1326_dilations_0 = const()[name = tensor("op_1326_dilations_0"), val = tensor([1, 1])]; + tensor var_1326_groups_0 = const()[name = tensor("op_1326_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35217280))), name = tensor("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35209088))), shape = tensor([768, 768, 1, 1])]; + tensor var_1326_cast_fp16 = conv(dilations = var_1326_dilations_0, groups = var_1326_groups_0, pad = var_1326_pad_0, pad_type = var_1326_pad_type_0, strides = var_1326_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = tensor("op_1326_cast_fp16")]; + tensor query_13_cast_fp16 = add(x = var_1320_cast_fp16, y = var_1326_cast_fp16)[name = tensor("query_13_cast_fp16")]; + tensor var_1335_pad_type_0 = const()[name = tensor("op_1335_pad_type_0"), val = tensor("valid")]; + tensor var_1335_strides_0 = const()[name = tensor("op_1335_strides_0"), val = tensor([1, 1])]; + tensor var_1335_pad_0 = const()[name = tensor("op_1335_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1335_dilations_0 = const()[name = tensor("op_1335_dilations_0"), val = tensor([1, 1])]; + tensor var_1335_groups_0 = const()[name = tensor("op_1335_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35291072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35586048))), name = tensor("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1335_cast_fp16 = conv(dilations = var_1335_dilations_0, groups = var_1335_groups_0, pad = var_1335_pad_0, pad_type = var_1335_pad_type_0, strides = var_1335_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1341_pad_type_0 = const()[name = tensor("op_1341_pad_type_0"), val = tensor("valid")]; + tensor var_1341_strides_0 = const()[name = tensor("op_1341_strides_0"), val = tensor([1, 1])]; + tensor var_1341_pad_0 = const()[name = tensor("op_1341_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1341_dilations_0 = const()[name = tensor("op_1341_dilations_0"), val = tensor([1, 1])]; + tensor var_1341_groups_0 = const()[name = tensor("op_1341_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35593984))), name = tensor("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35586176))), shape = tensor([768, 768, 1, 1])]; + tensor var_1341_cast_fp16 = conv(dilations = var_1341_dilations_0, groups = var_1341_groups_0, pad = var_1341_pad_0, pad_type = var_1341_pad_type_0, strides = var_1341_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor key_13_cast_fp16 = add(x = var_1335_cast_fp16, y = var_1341_cast_fp16)[name = tensor("key_13_cast_fp16")]; + tensor var_1351_pad_type_0 = const()[name = tensor("op_1351_pad_type_0"), val = tensor("valid")]; + tensor var_1351_strides_0 = const()[name = tensor("op_1351_strides_0"), val = tensor([1, 1])]; + tensor var_1351_pad_0 = const()[name = tensor("op_1351_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1351_dilations_0 = const()[name = tensor("op_1351_dilations_0"), val = tensor([1, 1])]; + tensor var_1351_groups_0 = const()[name = tensor("op_1351_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35667776))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35962752))), name = tensor("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35962880)))]; + tensor var_1351_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1351_dilations_0, groups = var_1351_groups_0, pad = var_1351_pad_0, pad_type = var_1351_pad_type_0, strides = var_1351_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = tensor("op_1351_cast_fp16")]; + tensor var_1357_pad_type_0 = const()[name = tensor("op_1357_pad_type_0"), val = tensor("valid")]; + tensor var_1357_strides_0 = const()[name = tensor("op_1357_strides_0"), val = tensor([1, 1])]; + tensor var_1357_pad_0 = const()[name = tensor("op_1357_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1357_dilations_0 = const()[name = tensor("op_1357_dilations_0"), val = tensor([1, 1])]; + tensor var_1357_groups_0 = const()[name = tensor("op_1357_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35970496))), name = tensor("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35964480))), shape = tensor([768, 768, 1, 1])]; + tensor var_1357_cast_fp16 = conv(dilations = var_1357_dilations_0, groups = var_1357_groups_0, pad = var_1357_pad_0, pad_type = var_1357_pad_type_0, strides = var_1357_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = tensor("op_1357_cast_fp16")]; + tensor value_13_cast_fp16 = add(x = var_1351_cast_fp16, y = var_1357_cast_fp16)[name = tensor("value_13_cast_fp16")]; + tensor var_1361 = const()[name = tensor("op_1361"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_13_cast_fp16 = reshape(shape = var_1361, x = query_13_cast_fp16)[name = tensor("mh_q_13_cast_fp16")]; + tensor var_1363_to_fp16 = const()[name = tensor("op_1363_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1364_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1363_to_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1367 = const()[name = tensor("op_1367"), val = tensor([1, 12, 64, 1500])]; + tensor var_1368_cast_fp16 = reshape(shape = var_1367, x = key_13_cast_fp16)[name = tensor("op_1368_cast_fp16")]; + tensor mh_w_13_transpose_x_0 = const()[name = tensor("mh_w_13_transpose_x_0"), val = tensor(true)]; + tensor mh_w_13_transpose_y_0 = const()[name = tensor("mh_w_13_transpose_y_0"), val = tensor(false)]; + tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_1364_cast_fp16, y = var_1368_cast_fp16)[name = tensor("mh_w_13_cast_fp16")]; + tensor var_1371_cast_fp16 = softmax(axis = var_1276, x = mh_w_13_cast_fp16)[name = tensor("op_1371_cast_fp16")]; + tensor var_1372 = const()[name = tensor("op_1372"), val = tensor([1, 12, 64, 1500])]; + tensor var_1373_cast_fp16 = reshape(shape = var_1372, x = value_13_cast_fp16)[name = tensor("op_1373_cast_fp16")]; + tensor attn_13_transpose_x_0 = const()[name = tensor("attn_13_transpose_x_0"), val = tensor(false)]; + tensor attn_13_transpose_y_0 = const()[name = tensor("attn_13_transpose_y_0"), val = tensor(true)]; + tensor attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1373_cast_fp16, y = var_1371_cast_fp16)[name = tensor("attn_13_cast_fp16")]; + tensor var_1376 = const()[name = tensor("op_1376"), val = tensor([1, 768, 1, 1500])]; + tensor input_49_cast_fp16 = reshape(shape = var_1376, x = attn_13_cast_fp16)[name = tensor("input_49_cast_fp16")]; + tensor var_1386_pad_type_0 = const()[name = tensor("op_1386_pad_type_0"), val = tensor("valid")]; + tensor var_1386_strides_0 = const()[name = tensor("op_1386_strides_0"), val = tensor([1, 1])]; + tensor var_1386_pad_0 = const()[name = tensor("op_1386_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1386_dilations_0 = const()[name = tensor("op_1386_dilations_0"), val = tensor([1, 1])]; + tensor var_1386_groups_0 = const()[name = tensor("op_1386_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36044288))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36339264))), name = tensor("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36339392)))]; + tensor var_1386_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1386_dilations_0, groups = var_1386_groups_0, pad = var_1386_pad_0, pad_type = var_1386_pad_type_0, strides = var_1386_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor var_1392_pad_type_0 = const()[name = tensor("op_1392_pad_type_0"), val = tensor("valid")]; + tensor var_1392_strides_0 = const()[name = tensor("op_1392_strides_0"), val = tensor([1, 1])]; + tensor var_1392_pad_0 = const()[name = tensor("op_1392_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1392_dilations_0 = const()[name = tensor("op_1392_dilations_0"), val = tensor([1, 1])]; + tensor var_1392_groups_0 = const()[name = tensor("op_1392_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36346496))), name = tensor("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36340992))), shape = tensor([768, 768, 1, 1])]; + tensor var_1392_cast_fp16 = conv(dilations = var_1392_dilations_0, groups = var_1392_groups_0, pad = var_1392_pad_0, pad_type = var_1392_pad_type_0, strides = var_1392_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = tensor("op_1392_cast_fp16")]; + tensor obj_27_cast_fp16 = add(x = var_1386_cast_fp16, y = var_1392_cast_fp16)[name = tensor("obj_27_cast_fp16")]; + tensor inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_27_cast_fp16)[name = tensor("inputs_27_cast_fp16")]; + tensor out_27_axes_0 = const()[name = tensor("out_27_axes_0"), val = tensor([1])]; + tensor var_1403_to_fp16 = const()[name = tensor("op_1403_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1403_to_fp16, x = inputs_27_cast_fp16)[name = tensor("out_27_cast_fp16")]; + tensor input_51_gamma_0_to_fp16 = const()[name = tensor("input_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36420288)))]; + tensor input_51_beta_0_to_fp16 = const()[name = tensor("input_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36421888)))]; + tensor input_51_epsilon_0_to_fp16 = const()[name = tensor("input_51_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_51_cast_fp16 = batch_norm(beta = input_51_beta_0_to_fp16, epsilon = input_51_epsilon_0_to_fp16, gamma = input_51_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor("input_51_cast_fp16")]; + tensor var_1421_pad_type_0 = const()[name = tensor("op_1421_pad_type_0"), val = tensor("valid")]; + tensor var_1421_strides_0 = const()[name = tensor("op_1421_strides_0"), val = tensor([1, 1])]; + tensor var_1421_pad_0 = const()[name = tensor("op_1421_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1421_dilations_0 = const()[name = tensor("op_1421_dilations_0"), val = tensor([1, 1])]; + tensor var_1421_groups_0 = const()[name = tensor("op_1421_groups_0"), val = tensor(1)]; + tensor layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36423488))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37603200))), name = tensor("layers_6_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37603328)))]; + tensor var_1421_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_1421_dilations_0, groups = var_1421_groups_0, pad = var_1421_pad_0, pad_type = var_1421_pad_type_0, strides = var_1421_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = tensor("op_1421_cast_fp16")]; + tensor var_1427_pad_type_0 = const()[name = tensor("op_1427_pad_type_0"), val = tensor("valid")]; + tensor var_1427_strides_0 = const()[name = tensor("op_1427_strides_0"), val = tensor([1, 1])]; + tensor var_1427_pad_0 = const()[name = tensor("op_1427_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1427_dilations_0 = const()[name = tensor("op_1427_dilations_0"), val = tensor([1, 1])]; + tensor var_1427_groups_0 = const()[name = tensor("op_1427_groups_0"), val = tensor(1)]; + tensor layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37638976))), name = tensor("layers_6_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37609536))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1427_cast_fp16 = conv(dilations = var_1427_dilations_0, groups = var_1427_groups_0, pad = var_1427_pad_0, pad_type = var_1427_pad_type_0, strides = var_1427_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = tensor("op_1427_cast_fp16")]; + tensor input_53_cast_fp16 = add(x = var_1421_cast_fp16, y = var_1427_cast_fp16)[name = tensor("input_53_cast_fp16")]; + tensor input_55_mode_0 = const()[name = tensor("input_55_mode_0"), val = tensor("EXACT")]; + tensor input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = input_53_cast_fp16)[name = tensor("input_55_cast_fp16")]; + tensor var_1438_pad_type_0 = const()[name = tensor("op_1438_pad_type_0"), val = tensor("valid")]; + tensor var_1438_strides_0 = const()[name = tensor("op_1438_strides_0"), val = tensor([1, 1])]; + tensor var_1438_pad_0 = const()[name = tensor("op_1438_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1438_dilations_0 = const()[name = tensor("op_1438_dilations_0"), val = tensor([1, 1])]; + tensor var_1438_groups_0 = const()[name = tensor("op_1438_groups_0"), val = tensor(1)]; + tensor layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37933952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39113664))), name = tensor("layers_6_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39113792)))]; + tensor var_1438_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_1438_dilations_0, groups = var_1438_groups_0, pad = var_1438_pad_0, pad_type = var_1438_pad_type_0, strides = var_1438_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = tensor("op_1438_cast_fp16")]; + tensor var_1444_pad_type_0 = const()[name = tensor("op_1444_pad_type_0"), val = tensor("valid")]; + tensor var_1444_strides_0 = const()[name = tensor("op_1444_strides_0"), val = tensor([1, 1])]; + tensor var_1444_pad_0 = const()[name = tensor("op_1444_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1444_dilations_0 = const()[name = tensor("op_1444_dilations_0"), val = tensor([1, 1])]; + tensor var_1444_groups_0 = const()[name = tensor("op_1444_groups_0"), val = tensor(1)]; + tensor layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39142720))), name = tensor("layers_6_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39115392))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1444_cast_fp16 = conv(dilations = var_1444_dilations_0, groups = var_1444_groups_0, pad = var_1444_pad_0, pad_type = var_1444_pad_type_0, strides = var_1444_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = tensor("op_1444_cast_fp16")]; + tensor hidden_states_17_cast_fp16 = add(x = var_1438_cast_fp16, y = var_1444_cast_fp16)[name = tensor("hidden_states_17_cast_fp16")]; + tensor inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = hidden_states_17_cast_fp16)[name = tensor("inputs_29_cast_fp16")]; + tensor var_1450 = const()[name = tensor("op_1450"), val = tensor(3)]; + tensor out_29_axes_0 = const()[name = tensor("out_29_axes_0"), val = tensor([1])]; + tensor var_1472_to_fp16 = const()[name = tensor("op_1472_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1472_to_fp16, x = inputs_29_cast_fp16)[name = tensor("out_29_cast_fp16")]; + tensor obj_29_gamma_0_to_fp16 = const()[name = tensor("obj_29_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39437696)))]; + tensor obj_29_beta_0_to_fp16 = const()[name = tensor("obj_29_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39439296)))]; + tensor obj_29_epsilon_0_to_fp16 = const()[name = tensor("obj_29_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor("obj_29_cast_fp16")]; + tensor var_1494_pad_type_0 = const()[name = tensor("op_1494_pad_type_0"), val = tensor("valid")]; + tensor var_1494_strides_0 = const()[name = tensor("op_1494_strides_0"), val = tensor([1, 1])]; + tensor var_1494_pad_0 = const()[name = tensor("op_1494_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1494_dilations_0 = const()[name = tensor("op_1494_dilations_0"), val = tensor([1, 1])]; + tensor var_1494_groups_0 = const()[name = tensor("op_1494_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39440896))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39735872))), name = tensor("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39736000)))]; + tensor var_1494_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1494_dilations_0, groups = var_1494_groups_0, pad = var_1494_pad_0, pad_type = var_1494_pad_type_0, strides = var_1494_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_1494_cast_fp16")]; + tensor var_1500_pad_type_0 = const()[name = tensor("op_1500_pad_type_0"), val = tensor("valid")]; + tensor var_1500_strides_0 = const()[name = tensor("op_1500_strides_0"), val = tensor([1, 1])]; + tensor var_1500_pad_0 = const()[name = tensor("op_1500_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1500_dilations_0 = const()[name = tensor("op_1500_dilations_0"), val = tensor([1, 1])]; + tensor var_1500_groups_0 = const()[name = tensor("op_1500_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39745536))), name = tensor("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39737600))), shape = tensor([768, 768, 1, 1])]; + tensor var_1500_cast_fp16 = conv(dilations = var_1500_dilations_0, groups = var_1500_groups_0, pad = var_1500_pad_0, pad_type = var_1500_pad_type_0, strides = var_1500_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_1500_cast_fp16")]; + tensor query_15_cast_fp16 = add(x = var_1494_cast_fp16, y = var_1500_cast_fp16)[name = tensor("query_15_cast_fp16")]; + tensor var_1509_pad_type_0 = const()[name = tensor("op_1509_pad_type_0"), val = tensor("valid")]; + tensor var_1509_strides_0 = const()[name = tensor("op_1509_strides_0"), val = tensor([1, 1])]; + tensor var_1509_pad_0 = const()[name = tensor("op_1509_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1509_dilations_0 = const()[name = tensor("op_1509_dilations_0"), val = tensor([1, 1])]; + tensor var_1509_groups_0 = const()[name = tensor("op_1509_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39819328))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40114304))), name = tensor("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1509_cast_fp16 = conv(dilations = var_1509_dilations_0, groups = var_1509_groups_0, pad = var_1509_pad_0, pad_type = var_1509_pad_type_0, strides = var_1509_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_1509_cast_fp16")]; + tensor var_1515_pad_type_0 = const()[name = tensor("op_1515_pad_type_0"), val = tensor("valid")]; + tensor var_1515_strides_0 = const()[name = tensor("op_1515_strides_0"), val = tensor([1, 1])]; + tensor var_1515_pad_0 = const()[name = tensor("op_1515_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1515_dilations_0 = const()[name = tensor("op_1515_dilations_0"), val = tensor([1, 1])]; + tensor var_1515_groups_0 = const()[name = tensor("op_1515_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40123840))), name = tensor("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40114432))), shape = tensor([768, 768, 1, 1])]; + tensor var_1515_cast_fp16 = conv(dilations = var_1515_dilations_0, groups = var_1515_groups_0, pad = var_1515_pad_0, pad_type = var_1515_pad_type_0, strides = var_1515_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_1515_cast_fp16")]; + tensor key_15_cast_fp16 = add(x = var_1509_cast_fp16, y = var_1515_cast_fp16)[name = tensor("key_15_cast_fp16")]; + tensor var_1525_pad_type_0 = const()[name = tensor("op_1525_pad_type_0"), val = tensor("valid")]; + tensor var_1525_strides_0 = const()[name = tensor("op_1525_strides_0"), val = tensor([1, 1])]; + tensor var_1525_pad_0 = const()[name = tensor("op_1525_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1525_dilations_0 = const()[name = tensor("op_1525_dilations_0"), val = tensor([1, 1])]; + tensor var_1525_groups_0 = const()[name = tensor("op_1525_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40197632))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40492608))), name = tensor("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40492736)))]; + tensor var_1525_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1525_dilations_0, groups = var_1525_groups_0, pad = var_1525_pad_0, pad_type = var_1525_pad_type_0, strides = var_1525_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_1525_cast_fp16")]; + tensor var_1531_pad_type_0 = const()[name = tensor("op_1531_pad_type_0"), val = tensor("valid")]; + tensor var_1531_strides_0 = const()[name = tensor("op_1531_strides_0"), val = tensor([1, 1])]; + tensor var_1531_pad_0 = const()[name = tensor("op_1531_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1531_dilations_0 = const()[name = tensor("op_1531_dilations_0"), val = tensor([1, 1])]; + tensor var_1531_groups_0 = const()[name = tensor("op_1531_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40503360))), name = tensor("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40494336))), shape = tensor([768, 768, 1, 1])]; + tensor var_1531_cast_fp16 = conv(dilations = var_1531_dilations_0, groups = var_1531_groups_0, pad = var_1531_pad_0, pad_type = var_1531_pad_type_0, strides = var_1531_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_1531_cast_fp16")]; + tensor value_15_cast_fp16 = add(x = var_1525_cast_fp16, y = var_1531_cast_fp16)[name = tensor("value_15_cast_fp16")]; + tensor var_1535 = const()[name = tensor("op_1535"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_15_cast_fp16 = reshape(shape = var_1535, x = query_15_cast_fp16)[name = tensor("mh_q_15_cast_fp16")]; + tensor var_1537_to_fp16 = const()[name = tensor("op_1537_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1538_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1537_to_fp16)[name = tensor("op_1538_cast_fp16")]; + tensor var_1541 = const()[name = tensor("op_1541"), val = tensor([1, 12, 64, 1500])]; + tensor var_1542_cast_fp16 = reshape(shape = var_1541, x = key_15_cast_fp16)[name = tensor("op_1542_cast_fp16")]; + tensor mh_w_15_transpose_x_0 = const()[name = tensor("mh_w_15_transpose_x_0"), val = tensor(true)]; + tensor mh_w_15_transpose_y_0 = const()[name = tensor("mh_w_15_transpose_y_0"), val = tensor(false)]; + tensor mh_w_15_cast_fp16 = matmul(transpose_x = mh_w_15_transpose_x_0, transpose_y = mh_w_15_transpose_y_0, x = var_1538_cast_fp16, y = var_1542_cast_fp16)[name = tensor("mh_w_15_cast_fp16")]; + tensor var_1545_cast_fp16 = softmax(axis = var_1450, x = mh_w_15_cast_fp16)[name = tensor("op_1545_cast_fp16")]; + tensor var_1546 = const()[name = tensor("op_1546"), val = tensor([1, 12, 64, 1500])]; + tensor var_1547_cast_fp16 = reshape(shape = var_1546, x = value_15_cast_fp16)[name = tensor("op_1547_cast_fp16")]; + tensor attn_15_transpose_x_0 = const()[name = tensor("attn_15_transpose_x_0"), val = tensor(false)]; + tensor attn_15_transpose_y_0 = const()[name = tensor("attn_15_transpose_y_0"), val = tensor(true)]; + tensor attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1547_cast_fp16, y = var_1545_cast_fp16)[name = tensor("attn_15_cast_fp16")]; + tensor var_1550 = const()[name = tensor("op_1550"), val = tensor([1, 768, 1, 1500])]; + tensor input_57_cast_fp16 = reshape(shape = var_1550, x = attn_15_cast_fp16)[name = tensor("input_57_cast_fp16")]; + tensor var_1560_pad_type_0 = const()[name = tensor("op_1560_pad_type_0"), val = tensor("valid")]; + tensor var_1560_strides_0 = const()[name = tensor("op_1560_strides_0"), val = tensor([1, 1])]; + tensor var_1560_pad_0 = const()[name = tensor("op_1560_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1560_dilations_0 = const()[name = tensor("op_1560_dilations_0"), val = tensor([1, 1])]; + tensor var_1560_groups_0 = const()[name = tensor("op_1560_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40577152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40872128))), name = tensor("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40872256)))]; + tensor var_1560_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1560_dilations_0, groups = var_1560_groups_0, pad = var_1560_pad_0, pad_type = var_1560_pad_type_0, strides = var_1560_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_57_cast_fp16)[name = tensor("op_1560_cast_fp16")]; + tensor var_1566_pad_type_0 = const()[name = tensor("op_1566_pad_type_0"), val = tensor("valid")]; + tensor var_1566_strides_0 = const()[name = tensor("op_1566_strides_0"), val = tensor([1, 1])]; + tensor var_1566_pad_0 = const()[name = tensor("op_1566_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1566_dilations_0 = const()[name = tensor("op_1566_dilations_0"), val = tensor([1, 1])]; + tensor var_1566_groups_0 = const()[name = tensor("op_1566_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40885568))), name = tensor("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40873856))), shape = tensor([768, 768, 1, 1])]; + tensor var_1566_cast_fp16 = conv(dilations = var_1566_dilations_0, groups = var_1566_groups_0, pad = var_1566_pad_0, pad_type = var_1566_pad_type_0, strides = var_1566_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_57_cast_fp16)[name = tensor("op_1566_cast_fp16")]; + tensor obj_31_cast_fp16 = add(x = var_1560_cast_fp16, y = var_1566_cast_fp16)[name = tensor("obj_31_cast_fp16")]; + tensor inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = obj_31_cast_fp16)[name = tensor("inputs_31_cast_fp16")]; + tensor out_31_axes_0 = const()[name = tensor("out_31_axes_0"), val = tensor([1])]; + tensor var_1577_to_fp16 = const()[name = tensor("op_1577_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1577_to_fp16, x = inputs_31_cast_fp16)[name = tensor("out_31_cast_fp16")]; + tensor input_59_gamma_0_to_fp16 = const()[name = tensor("input_59_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40959360)))]; + tensor input_59_beta_0_to_fp16 = const()[name = tensor("input_59_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40960960)))]; + tensor input_59_epsilon_0_to_fp16 = const()[name = tensor("input_59_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_59_cast_fp16 = batch_norm(beta = input_59_beta_0_to_fp16, epsilon = input_59_epsilon_0_to_fp16, gamma = input_59_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor("input_59_cast_fp16")]; + tensor var_1595_pad_type_0 = const()[name = tensor("op_1595_pad_type_0"), val = tensor("valid")]; + tensor var_1595_strides_0 = const()[name = tensor("op_1595_strides_0"), val = tensor([1, 1])]; + tensor var_1595_pad_0 = const()[name = tensor("op_1595_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1595_dilations_0 = const()[name = tensor("op_1595_dilations_0"), val = tensor([1, 1])]; + tensor var_1595_groups_0 = const()[name = tensor("op_1595_groups_0"), val = tensor(1)]; + tensor layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40962560))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42142272))), name = tensor("layers_7_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42142400)))]; + tensor var_1595_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_1595_dilations_0, groups = var_1595_groups_0, pad = var_1595_pad_0, pad_type = var_1595_pad_type_0, strides = var_1595_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = tensor("op_1595_cast_fp16")]; + tensor var_1601_pad_type_0 = const()[name = tensor("op_1601_pad_type_0"), val = tensor("valid")]; + tensor var_1601_strides_0 = const()[name = tensor("op_1601_strides_0"), val = tensor([1, 1])]; + tensor var_1601_pad_0 = const()[name = tensor("op_1601_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1601_dilations_0 = const()[name = tensor("op_1601_dilations_0"), val = tensor([1, 1])]; + tensor var_1601_groups_0 = const()[name = tensor("op_1601_groups_0"), val = tensor(1)]; + tensor layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42172224))), name = tensor("layers_7_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42148608))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1601_cast_fp16 = conv(dilations = var_1601_dilations_0, groups = var_1601_groups_0, pad = var_1601_pad_0, pad_type = var_1601_pad_type_0, strides = var_1601_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = tensor("op_1601_cast_fp16")]; + tensor input_61_cast_fp16 = add(x = var_1595_cast_fp16, y = var_1601_cast_fp16)[name = tensor("input_61_cast_fp16")]; + tensor input_63_mode_0 = const()[name = tensor("input_63_mode_0"), val = tensor("EXACT")]; + tensor input_63_cast_fp16 = gelu(mode = input_63_mode_0, x = input_61_cast_fp16)[name = tensor("input_63_cast_fp16")]; + tensor var_1612_pad_type_0 = const()[name = tensor("op_1612_pad_type_0"), val = tensor("valid")]; + tensor var_1612_strides_0 = const()[name = tensor("op_1612_strides_0"), val = tensor([1, 1])]; + tensor var_1612_pad_0 = const()[name = tensor("op_1612_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1612_dilations_0 = const()[name = tensor("op_1612_dilations_0"), val = tensor([1, 1])]; + tensor var_1612_groups_0 = const()[name = tensor("op_1612_groups_0"), val = tensor(1)]; + tensor layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42467200))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43646912))), name = tensor("layers_7_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43647040)))]; + tensor var_1612_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_1612_dilations_0, groups = var_1612_groups_0, pad = var_1612_pad_0, pad_type = var_1612_pad_type_0, strides = var_1612_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1618_pad_type_0 = const()[name = tensor("op_1618_pad_type_0"), val = tensor("valid")]; + tensor var_1618_strides_0 = const()[name = tensor("op_1618_strides_0"), val = tensor([1, 1])]; + tensor var_1618_pad_0 = const()[name = tensor("op_1618_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1618_dilations_0 = const()[name = tensor("op_1618_dilations_0"), val = tensor([1, 1])]; + tensor var_1618_groups_0 = const()[name = tensor("op_1618_groups_0"), val = tensor(1)]; + tensor layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43671552))), name = tensor("layers_7_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43648640))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1618_cast_fp16 = conv(dilations = var_1618_dilations_0, groups = var_1618_groups_0, pad = var_1618_pad_0, pad_type = var_1618_pad_type_0, strides = var_1618_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = tensor("op_1618_cast_fp16")]; + tensor hidden_states_19_cast_fp16 = add(x = var_1612_cast_fp16, y = var_1618_cast_fp16)[name = tensor("hidden_states_19_cast_fp16")]; + tensor inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor("inputs_33_cast_fp16")]; + tensor var_1624 = const()[name = tensor("op_1624"), val = tensor(3)]; + tensor out_33_axes_0 = const()[name = tensor("out_33_axes_0"), val = tensor([1])]; + tensor var_1646_to_fp16 = const()[name = tensor("op_1646_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1646_to_fp16, x = inputs_33_cast_fp16)[name = tensor("out_33_cast_fp16")]; + tensor obj_33_gamma_0_to_fp16 = const()[name = tensor("obj_33_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43966528)))]; + tensor obj_33_beta_0_to_fp16 = const()[name = tensor("obj_33_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43968128)))]; + tensor obj_33_epsilon_0_to_fp16 = const()[name = tensor("obj_33_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor("obj_33_cast_fp16")]; + tensor var_1668_pad_type_0 = const()[name = tensor("op_1668_pad_type_0"), val = tensor("valid")]; + tensor var_1668_strides_0 = const()[name = tensor("op_1668_strides_0"), val = tensor([1, 1])]; + tensor var_1668_pad_0 = const()[name = tensor("op_1668_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1668_dilations_0 = const()[name = tensor("op_1668_dilations_0"), val = tensor([1, 1])]; + tensor var_1668_groups_0 = const()[name = tensor("op_1668_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43969728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44264704))), name = tensor("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44264832)))]; + tensor var_1668_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1668_dilations_0, groups = var_1668_groups_0, pad = var_1668_pad_0, pad_type = var_1668_pad_type_0, strides = var_1668_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = tensor("op_1668_cast_fp16")]; + tensor var_1674_pad_type_0 = const()[name = tensor("op_1674_pad_type_0"), val = tensor("valid")]; + tensor var_1674_strides_0 = const()[name = tensor("op_1674_strides_0"), val = tensor([1, 1])]; + tensor var_1674_pad_0 = const()[name = tensor("op_1674_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1674_dilations_0 = const()[name = tensor("op_1674_dilations_0"), val = tensor([1, 1])]; + tensor var_1674_groups_0 = const()[name = tensor("op_1674_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44272704))), name = tensor("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44266432))), shape = tensor([768, 768, 1, 1])]; + tensor var_1674_cast_fp16 = conv(dilations = var_1674_dilations_0, groups = var_1674_groups_0, pad = var_1674_pad_0, pad_type = var_1674_pad_type_0, strides = var_1674_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = tensor("op_1674_cast_fp16")]; + tensor query_17_cast_fp16 = add(x = var_1668_cast_fp16, y = var_1674_cast_fp16)[name = tensor("query_17_cast_fp16")]; + tensor var_1683_pad_type_0 = const()[name = tensor("op_1683_pad_type_0"), val = tensor("valid")]; + tensor var_1683_strides_0 = const()[name = tensor("op_1683_strides_0"), val = tensor([1, 1])]; + tensor var_1683_pad_0 = const()[name = tensor("op_1683_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1683_dilations_0 = const()[name = tensor("op_1683_dilations_0"), val = tensor([1, 1])]; + tensor var_1683_groups_0 = const()[name = tensor("op_1683_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44346496))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44641472))), name = tensor("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1683_cast_fp16 = conv(dilations = var_1683_dilations_0, groups = var_1683_groups_0, pad = var_1683_pad_0, pad_type = var_1683_pad_type_0, strides = var_1683_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = tensor("op_1683_cast_fp16")]; + tensor var_1689_pad_type_0 = const()[name = tensor("op_1689_pad_type_0"), val = tensor("valid")]; + tensor var_1689_strides_0 = const()[name = tensor("op_1689_strides_0"), val = tensor([1, 1])]; + tensor var_1689_pad_0 = const()[name = tensor("op_1689_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1689_dilations_0 = const()[name = tensor("op_1689_dilations_0"), val = tensor([1, 1])]; + tensor var_1689_groups_0 = const()[name = tensor("op_1689_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44648384))), name = tensor("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44641600))), shape = tensor([768, 768, 1, 1])]; + tensor var_1689_cast_fp16 = conv(dilations = var_1689_dilations_0, groups = var_1689_groups_0, pad = var_1689_pad_0, pad_type = var_1689_pad_type_0, strides = var_1689_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = tensor("op_1689_cast_fp16")]; + tensor key_17_cast_fp16 = add(x = var_1683_cast_fp16, y = var_1689_cast_fp16)[name = tensor("key_17_cast_fp16")]; + tensor var_1699_pad_type_0 = const()[name = tensor("op_1699_pad_type_0"), val = tensor("valid")]; + tensor var_1699_strides_0 = const()[name = tensor("op_1699_strides_0"), val = tensor([1, 1])]; + tensor var_1699_pad_0 = const()[name = tensor("op_1699_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1699_dilations_0 = const()[name = tensor("op_1699_dilations_0"), val = tensor([1, 1])]; + tensor var_1699_groups_0 = const()[name = tensor("op_1699_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44722176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45017152))), name = tensor("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45017280)))]; + tensor var_1699_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1699_dilations_0, groups = var_1699_groups_0, pad = var_1699_pad_0, pad_type = var_1699_pad_type_0, strides = var_1699_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = tensor("op_1699_cast_fp16")]; + tensor var_1705_pad_type_0 = const()[name = tensor("op_1705_pad_type_0"), val = tensor("valid")]; + tensor var_1705_strides_0 = const()[name = tensor("op_1705_strides_0"), val = tensor([1, 1])]; + tensor var_1705_pad_0 = const()[name = tensor("op_1705_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1705_dilations_0 = const()[name = tensor("op_1705_dilations_0"), val = tensor([1, 1])]; + tensor var_1705_groups_0 = const()[name = tensor("op_1705_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45025152))), name = tensor("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45018880))), shape = tensor([768, 768, 1, 1])]; + tensor var_1705_cast_fp16 = conv(dilations = var_1705_dilations_0, groups = var_1705_groups_0, pad = var_1705_pad_0, pad_type = var_1705_pad_type_0, strides = var_1705_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = tensor("op_1705_cast_fp16")]; + tensor value_17_cast_fp16 = add(x = var_1699_cast_fp16, y = var_1705_cast_fp16)[name = tensor("value_17_cast_fp16")]; + tensor var_1709 = const()[name = tensor("op_1709"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_17_cast_fp16 = reshape(shape = var_1709, x = query_17_cast_fp16)[name = tensor("mh_q_17_cast_fp16")]; + tensor var_1711_to_fp16 = const()[name = tensor("op_1711_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1712_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1711_to_fp16)[name = tensor("op_1712_cast_fp16")]; + tensor var_1715 = const()[name = tensor("op_1715"), val = tensor([1, 12, 64, 1500])]; + tensor var_1716_cast_fp16 = reshape(shape = var_1715, x = key_17_cast_fp16)[name = tensor("op_1716_cast_fp16")]; + tensor mh_w_17_transpose_x_0 = const()[name = tensor("mh_w_17_transpose_x_0"), val = tensor(true)]; + tensor mh_w_17_transpose_y_0 = const()[name = tensor("mh_w_17_transpose_y_0"), val = tensor(false)]; + tensor mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_1712_cast_fp16, y = var_1716_cast_fp16)[name = tensor("mh_w_17_cast_fp16")]; + tensor var_1719_cast_fp16 = softmax(axis = var_1624, x = mh_w_17_cast_fp16)[name = tensor("op_1719_cast_fp16")]; + tensor var_1720 = const()[name = tensor("op_1720"), val = tensor([1, 12, 64, 1500])]; + tensor var_1721_cast_fp16 = reshape(shape = var_1720, x = value_17_cast_fp16)[name = tensor("op_1721_cast_fp16")]; + tensor attn_17_transpose_x_0 = const()[name = tensor("attn_17_transpose_x_0"), val = tensor(false)]; + tensor attn_17_transpose_y_0 = const()[name = tensor("attn_17_transpose_y_0"), val = tensor(true)]; + tensor attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1721_cast_fp16, y = var_1719_cast_fp16)[name = tensor("attn_17_cast_fp16")]; + tensor var_1724 = const()[name = tensor("op_1724"), val = tensor([1, 768, 1, 1500])]; + tensor input_65_cast_fp16 = reshape(shape = var_1724, x = attn_17_cast_fp16)[name = tensor("input_65_cast_fp16")]; + tensor var_1734_pad_type_0 = const()[name = tensor("op_1734_pad_type_0"), val = tensor("valid")]; + tensor var_1734_strides_0 = const()[name = tensor("op_1734_strides_0"), val = tensor([1, 1])]; + tensor var_1734_pad_0 = const()[name = tensor("op_1734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1734_dilations_0 = const()[name = tensor("op_1734_dilations_0"), val = tensor([1, 1])]; + tensor var_1734_groups_0 = const()[name = tensor("op_1734_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45098944))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45393920))), name = tensor("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45394048)))]; + tensor var_1734_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1734_dilations_0, groups = var_1734_groups_0, pad = var_1734_pad_0, pad_type = var_1734_pad_type_0, strides = var_1734_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = tensor("op_1734_cast_fp16")]; + tensor var_1740_pad_type_0 = const()[name = tensor("op_1740_pad_type_0"), val = tensor("valid")]; + tensor var_1740_strides_0 = const()[name = tensor("op_1740_strides_0"), val = tensor([1, 1])]; + tensor var_1740_pad_0 = const()[name = tensor("op_1740_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1740_dilations_0 = const()[name = tensor("op_1740_dilations_0"), val = tensor([1, 1])]; + tensor var_1740_groups_0 = const()[name = tensor("op_1740_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45402880))), name = tensor("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45395648))), shape = tensor([768, 768, 1, 1])]; + tensor var_1740_cast_fp16 = conv(dilations = var_1740_dilations_0, groups = var_1740_groups_0, pad = var_1740_pad_0, pad_type = var_1740_pad_type_0, strides = var_1740_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = tensor("op_1740_cast_fp16")]; + tensor obj_35_cast_fp16 = add(x = var_1734_cast_fp16, y = var_1740_cast_fp16)[name = tensor("obj_35_cast_fp16")]; + tensor inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_35_cast_fp16)[name = tensor("inputs_35_cast_fp16")]; + tensor out_35_axes_0 = const()[name = tensor("out_35_axes_0"), val = tensor([1])]; + tensor var_1751_to_fp16 = const()[name = tensor("op_1751_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1751_to_fp16, x = inputs_35_cast_fp16)[name = tensor("out_35_cast_fp16")]; + tensor input_67_gamma_0_to_fp16 = const()[name = tensor("input_67_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45476672)))]; + tensor input_67_beta_0_to_fp16 = const()[name = tensor("input_67_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45478272)))]; + tensor input_67_epsilon_0_to_fp16 = const()[name = tensor("input_67_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_67_cast_fp16 = batch_norm(beta = input_67_beta_0_to_fp16, epsilon = input_67_epsilon_0_to_fp16, gamma = input_67_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor("input_67_cast_fp16")]; + tensor var_1769_pad_type_0 = const()[name = tensor("op_1769_pad_type_0"), val = tensor("valid")]; + tensor var_1769_strides_0 = const()[name = tensor("op_1769_strides_0"), val = tensor([1, 1])]; + tensor var_1769_pad_0 = const()[name = tensor("op_1769_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1769_dilations_0 = const()[name = tensor("op_1769_dilations_0"), val = tensor([1, 1])]; + tensor var_1769_groups_0 = const()[name = tensor("op_1769_groups_0"), val = tensor(1)]; + tensor layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45479872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46659584))), name = tensor("layers_8_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46659712)))]; + tensor var_1769_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_1769_dilations_0, groups = var_1769_groups_0, pad = var_1769_pad_0, pad_type = var_1769_pad_type_0, strides = var_1769_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_67_cast_fp16)[name = tensor("op_1769_cast_fp16")]; + tensor var_1775_pad_type_0 = const()[name = tensor("op_1775_pad_type_0"), val = tensor("valid")]; + tensor var_1775_strides_0 = const()[name = tensor("op_1775_strides_0"), val = tensor([1, 1])]; + tensor var_1775_pad_0 = const()[name = tensor("op_1775_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1775_dilations_0 = const()[name = tensor("op_1775_dilations_0"), val = tensor([1, 1])]; + tensor var_1775_groups_0 = const()[name = tensor("op_1775_groups_0"), val = tensor(1)]; + tensor layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46692096))), name = tensor("layers_8_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46665920))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1775_cast_fp16 = conv(dilations = var_1775_dilations_0, groups = var_1775_groups_0, pad = var_1775_pad_0, pad_type = var_1775_pad_type_0, strides = var_1775_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_67_cast_fp16)[name = tensor("op_1775_cast_fp16")]; + tensor input_69_cast_fp16 = add(x = var_1769_cast_fp16, y = var_1775_cast_fp16)[name = tensor("input_69_cast_fp16")]; + tensor input_71_mode_0 = const()[name = tensor("input_71_mode_0"), val = tensor("EXACT")]; + tensor input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor("input_71_cast_fp16")]; + tensor var_1786_pad_type_0 = const()[name = tensor("op_1786_pad_type_0"), val = tensor("valid")]; + tensor var_1786_strides_0 = const()[name = tensor("op_1786_strides_0"), val = tensor([1, 1])]; + tensor var_1786_pad_0 = const()[name = tensor("op_1786_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1786_dilations_0 = const()[name = tensor("op_1786_dilations_0"), val = tensor([1, 1])]; + tensor var_1786_groups_0 = const()[name = tensor("op_1786_groups_0"), val = tensor(1)]; + tensor layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46987072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48166784))), name = tensor("layers_8_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48166912)))]; + tensor var_1786_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_1786_dilations_0, groups = var_1786_groups_0, pad = var_1786_pad_0, pad_type = var_1786_pad_type_0, strides = var_1786_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = tensor("op_1786_cast_fp16")]; + tensor var_1792_pad_type_0 = const()[name = tensor("op_1792_pad_type_0"), val = tensor("valid")]; + tensor var_1792_strides_0 = const()[name = tensor("op_1792_strides_0"), val = tensor([1, 1])]; + tensor var_1792_pad_0 = const()[name = tensor("op_1792_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1792_dilations_0 = const()[name = tensor("op_1792_dilations_0"), val = tensor([1, 1])]; + tensor var_1792_groups_0 = const()[name = tensor("op_1792_groups_0"), val = tensor(1)]; + tensor layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48189952))), name = tensor("layers_8_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48168512))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1792_cast_fp16 = conv(dilations = var_1792_dilations_0, groups = var_1792_groups_0, pad = var_1792_pad_0, pad_type = var_1792_pad_type_0, strides = var_1792_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = tensor("op_1792_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = var_1786_cast_fp16, y = var_1792_cast_fp16)[name = tensor("hidden_states_21_cast_fp16")]; + tensor inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor("inputs_37_cast_fp16")]; + tensor var_1798 = const()[name = tensor("op_1798"), val = tensor(3)]; + tensor out_37_axes_0 = const()[name = tensor("out_37_axes_0"), val = tensor([1])]; + tensor var_1820_to_fp16 = const()[name = tensor("op_1820_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1820_to_fp16, x = inputs_37_cast_fp16)[name = tensor("out_37_cast_fp16")]; + tensor obj_37_gamma_0_to_fp16 = const()[name = tensor("obj_37_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48484928)))]; + tensor obj_37_beta_0_to_fp16 = const()[name = tensor("obj_37_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48486528)))]; + tensor obj_37_epsilon_0_to_fp16 = const()[name = tensor("obj_37_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = tensor("obj_37_cast_fp16")]; + tensor var_1842_pad_type_0 = const()[name = tensor("op_1842_pad_type_0"), val = tensor("valid")]; + tensor var_1842_strides_0 = const()[name = tensor("op_1842_strides_0"), val = tensor([1, 1])]; + tensor var_1842_pad_0 = const()[name = tensor("op_1842_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1842_dilations_0 = const()[name = tensor("op_1842_dilations_0"), val = tensor([1, 1])]; + tensor var_1842_groups_0 = const()[name = tensor("op_1842_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48488128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48783104))), name = tensor("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48783232)))]; + tensor var_1842_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1842_dilations_0, groups = var_1842_groups_0, pad = var_1842_pad_0, pad_type = var_1842_pad_type_0, strides = var_1842_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_1842_cast_fp16")]; + tensor var_1848_pad_type_0 = const()[name = tensor("op_1848_pad_type_0"), val = tensor("valid")]; + tensor var_1848_strides_0 = const()[name = tensor("op_1848_strides_0"), val = tensor([1, 1])]; + tensor var_1848_pad_0 = const()[name = tensor("op_1848_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1848_dilations_0 = const()[name = tensor("op_1848_dilations_0"), val = tensor([1, 1])]; + tensor var_1848_groups_0 = const()[name = tensor("op_1848_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48790784))), name = tensor("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48784832))), shape = tensor([768, 768, 1, 1])]; + tensor var_1848_cast_fp16 = conv(dilations = var_1848_dilations_0, groups = var_1848_groups_0, pad = var_1848_pad_0, pad_type = var_1848_pad_type_0, strides = var_1848_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_1848_cast_fp16")]; + tensor query_19_cast_fp16 = add(x = var_1842_cast_fp16, y = var_1848_cast_fp16)[name = tensor("query_19_cast_fp16")]; + tensor var_1857_pad_type_0 = const()[name = tensor("op_1857_pad_type_0"), val = tensor("valid")]; + tensor var_1857_strides_0 = const()[name = tensor("op_1857_strides_0"), val = tensor([1, 1])]; + tensor var_1857_pad_0 = const()[name = tensor("op_1857_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1857_dilations_0 = const()[name = tensor("op_1857_dilations_0"), val = tensor([1, 1])]; + tensor var_1857_groups_0 = const()[name = tensor("op_1857_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48864576))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49159552))), name = tensor("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1857_cast_fp16 = conv(dilations = var_1857_dilations_0, groups = var_1857_groups_0, pad = var_1857_pad_0, pad_type = var_1857_pad_type_0, strides = var_1857_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_1857_cast_fp16")]; + tensor var_1863_pad_type_0 = const()[name = tensor("op_1863_pad_type_0"), val = tensor("valid")]; + tensor var_1863_strides_0 = const()[name = tensor("op_1863_strides_0"), val = tensor([1, 1])]; + tensor var_1863_pad_0 = const()[name = tensor("op_1863_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1863_dilations_0 = const()[name = tensor("op_1863_dilations_0"), val = tensor([1, 1])]; + tensor var_1863_groups_0 = const()[name = tensor("op_1863_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49166528))), name = tensor("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49159680))), shape = tensor([768, 768, 1, 1])]; + tensor var_1863_cast_fp16 = conv(dilations = var_1863_dilations_0, groups = var_1863_groups_0, pad = var_1863_pad_0, pad_type = var_1863_pad_type_0, strides = var_1863_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_1863_cast_fp16")]; + tensor key_19_cast_fp16 = add(x = var_1857_cast_fp16, y = var_1863_cast_fp16)[name = tensor("key_19_cast_fp16")]; + tensor var_1873_pad_type_0 = const()[name = tensor("op_1873_pad_type_0"), val = tensor("valid")]; + tensor var_1873_strides_0 = const()[name = tensor("op_1873_strides_0"), val = tensor([1, 1])]; + tensor var_1873_pad_0 = const()[name = tensor("op_1873_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1873_dilations_0 = const()[name = tensor("op_1873_dilations_0"), val = tensor([1, 1])]; + tensor var_1873_groups_0 = const()[name = tensor("op_1873_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49240320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49535296))), name = tensor("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49535424)))]; + tensor var_1873_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1873_dilations_0, groups = var_1873_groups_0, pad = var_1873_pad_0, pad_type = var_1873_pad_type_0, strides = var_1873_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_1873_cast_fp16")]; + tensor var_1879_pad_type_0 = const()[name = tensor("op_1879_pad_type_0"), val = tensor("valid")]; + tensor var_1879_strides_0 = const()[name = tensor("op_1879_strides_0"), val = tensor([1, 1])]; + tensor var_1879_pad_0 = const()[name = tensor("op_1879_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1879_dilations_0 = const()[name = tensor("op_1879_dilations_0"), val = tensor([1, 1])]; + tensor var_1879_groups_0 = const()[name = tensor("op_1879_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49541568))), name = tensor("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49537024))), shape = tensor([768, 768, 1, 1])]; + tensor var_1879_cast_fp16 = conv(dilations = var_1879_dilations_0, groups = var_1879_groups_0, pad = var_1879_pad_0, pad_type = var_1879_pad_type_0, strides = var_1879_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_1879_cast_fp16")]; + tensor value_19_cast_fp16 = add(x = var_1873_cast_fp16, y = var_1879_cast_fp16)[name = tensor("value_19_cast_fp16")]; + tensor var_1883 = const()[name = tensor("op_1883"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_19_cast_fp16 = reshape(shape = var_1883, x = query_19_cast_fp16)[name = tensor("mh_q_19_cast_fp16")]; + tensor var_1885_to_fp16 = const()[name = tensor("op_1885_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1886_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1885_to_fp16)[name = tensor("op_1886_cast_fp16")]; + tensor var_1889 = const()[name = tensor("op_1889"), val = tensor([1, 12, 64, 1500])]; + tensor var_1890_cast_fp16 = reshape(shape = var_1889, x = key_19_cast_fp16)[name = tensor("op_1890_cast_fp16")]; + tensor mh_w_19_transpose_x_0 = const()[name = tensor("mh_w_19_transpose_x_0"), val = tensor(true)]; + tensor mh_w_19_transpose_y_0 = const()[name = tensor("mh_w_19_transpose_y_0"), val = tensor(false)]; + tensor mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_1886_cast_fp16, y = var_1890_cast_fp16)[name = tensor("mh_w_19_cast_fp16")]; + tensor var_1893_cast_fp16 = softmax(axis = var_1798, x = mh_w_19_cast_fp16)[name = tensor("op_1893_cast_fp16")]; + tensor var_1894 = const()[name = tensor("op_1894"), val = tensor([1, 12, 64, 1500])]; + tensor var_1895_cast_fp16 = reshape(shape = var_1894, x = value_19_cast_fp16)[name = tensor("op_1895_cast_fp16")]; + tensor attn_19_transpose_x_0 = const()[name = tensor("attn_19_transpose_x_0"), val = tensor(false)]; + tensor attn_19_transpose_y_0 = const()[name = tensor("attn_19_transpose_y_0"), val = tensor(true)]; + tensor attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1895_cast_fp16, y = var_1893_cast_fp16)[name = tensor("attn_19_cast_fp16")]; + tensor var_1898 = const()[name = tensor("op_1898"), val = tensor([1, 768, 1, 1500])]; + tensor input_73_cast_fp16 = reshape(shape = var_1898, x = attn_19_cast_fp16)[name = tensor("input_73_cast_fp16")]; + tensor var_1908_pad_type_0 = const()[name = tensor("op_1908_pad_type_0"), val = tensor("valid")]; + tensor var_1908_strides_0 = const()[name = tensor("op_1908_strides_0"), val = tensor([1, 1])]; + tensor var_1908_pad_0 = const()[name = tensor("op_1908_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1908_dilations_0 = const()[name = tensor("op_1908_dilations_0"), val = tensor([1, 1])]; + tensor var_1908_groups_0 = const()[name = tensor("op_1908_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49615360))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49910336))), name = tensor("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49910464)))]; + tensor var_1908_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1908_dilations_0, groups = var_1908_groups_0, pad = var_1908_pad_0, pad_type = var_1908_pad_type_0, strides = var_1908_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = tensor("op_1908_cast_fp16")]; + tensor var_1914_pad_type_0 = const()[name = tensor("op_1914_pad_type_0"), val = tensor("valid")]; + tensor var_1914_strides_0 = const()[name = tensor("op_1914_strides_0"), val = tensor([1, 1])]; + tensor var_1914_pad_0 = const()[name = tensor("op_1914_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1914_dilations_0 = const()[name = tensor("op_1914_dilations_0"), val = tensor([1, 1])]; + tensor var_1914_groups_0 = const()[name = tensor("op_1914_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49916928))), name = tensor("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49912064))), shape = tensor([768, 768, 1, 1])]; + tensor var_1914_cast_fp16 = conv(dilations = var_1914_dilations_0, groups = var_1914_groups_0, pad = var_1914_pad_0, pad_type = var_1914_pad_type_0, strides = var_1914_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = tensor("op_1914_cast_fp16")]; + tensor obj_39_cast_fp16 = add(x = var_1908_cast_fp16, y = var_1914_cast_fp16)[name = tensor("obj_39_cast_fp16")]; + tensor inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_39_cast_fp16)[name = tensor("inputs_39_cast_fp16")]; + tensor out_39_axes_0 = const()[name = tensor("out_39_axes_0"), val = tensor([1])]; + tensor var_1925_to_fp16 = const()[name = tensor("op_1925_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_1925_to_fp16, x = inputs_39_cast_fp16)[name = tensor("out_39_cast_fp16")]; + tensor input_75_gamma_0_to_fp16 = const()[name = tensor("input_75_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49990720)))]; + tensor input_75_beta_0_to_fp16 = const()[name = tensor("input_75_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49992320)))]; + tensor input_75_epsilon_0_to_fp16 = const()[name = tensor("input_75_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = tensor("input_75_cast_fp16")]; + tensor var_1943_pad_type_0 = const()[name = tensor("op_1943_pad_type_0"), val = tensor("valid")]; + tensor var_1943_strides_0 = const()[name = tensor("op_1943_strides_0"), val = tensor([1, 1])]; + tensor var_1943_pad_0 = const()[name = tensor("op_1943_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1943_dilations_0 = const()[name = tensor("op_1943_dilations_0"), val = tensor([1, 1])]; + tensor var_1943_groups_0 = const()[name = tensor("op_1943_groups_0"), val = tensor(1)]; + tensor layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49993920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51173632))), name = tensor("layers_9_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51173760)))]; + tensor var_1943_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_1943_dilations_0, groups = var_1943_groups_0, pad = var_1943_pad_0, pad_type = var_1943_pad_type_0, strides = var_1943_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = tensor("op_1943_cast_fp16")]; + tensor var_1949_pad_type_0 = const()[name = tensor("op_1949_pad_type_0"), val = tensor("valid")]; + tensor var_1949_strides_0 = const()[name = tensor("op_1949_strides_0"), val = tensor([1, 1])]; + tensor var_1949_pad_0 = const()[name = tensor("op_1949_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1949_dilations_0 = const()[name = tensor("op_1949_dilations_0"), val = tensor([1, 1])]; + tensor var_1949_groups_0 = const()[name = tensor("op_1949_groups_0"), val = tensor(1)]; + tensor layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51201984))), name = tensor("layers_9_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51179968))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1949_cast_fp16 = conv(dilations = var_1949_dilations_0, groups = var_1949_groups_0, pad = var_1949_pad_0, pad_type = var_1949_pad_type_0, strides = var_1949_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = tensor("op_1949_cast_fp16")]; + tensor input_77_cast_fp16 = add(x = var_1943_cast_fp16, y = var_1949_cast_fp16)[name = tensor("input_77_cast_fp16")]; + tensor input_79_mode_0 = const()[name = tensor("input_79_mode_0"), val = tensor("EXACT")]; + tensor input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = tensor("input_79_cast_fp16")]; + tensor var_1960_pad_type_0 = const()[name = tensor("op_1960_pad_type_0"), val = tensor("valid")]; + tensor var_1960_strides_0 = const()[name = tensor("op_1960_strides_0"), val = tensor([1, 1])]; + tensor var_1960_pad_0 = const()[name = tensor("op_1960_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1960_dilations_0 = const()[name = tensor("op_1960_dilations_0"), val = tensor([1, 1])]; + tensor var_1960_groups_0 = const()[name = tensor("op_1960_groups_0"), val = tensor(1)]; + tensor layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51496960))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52676672))), name = tensor("layers_9_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52676800)))]; + tensor var_1960_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_1960_dilations_0, groups = var_1960_groups_0, pad = var_1960_pad_0, pad_type = var_1960_pad_type_0, strides = var_1960_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = tensor("op_1960_cast_fp16")]; + tensor var_1966_pad_type_0 = const()[name = tensor("op_1966_pad_type_0"), val = tensor("valid")]; + tensor var_1966_strides_0 = const()[name = tensor("op_1966_strides_0"), val = tensor([1, 1])]; + tensor var_1966_pad_0 = const()[name = tensor("op_1966_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1966_dilations_0 = const()[name = tensor("op_1966_dilations_0"), val = tensor([1, 1])]; + tensor var_1966_groups_0 = const()[name = tensor("op_1966_groups_0"), val = tensor(1)]; + tensor layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52698816))), name = tensor("layers_9_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52678400))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1966_cast_fp16 = conv(dilations = var_1966_dilations_0, groups = var_1966_groups_0, pad = var_1966_pad_0, pad_type = var_1966_pad_type_0, strides = var_1966_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = tensor("op_1966_cast_fp16")]; + tensor hidden_states_23_cast_fp16 = add(x = var_1960_cast_fp16, y = var_1966_cast_fp16)[name = tensor("hidden_states_23_cast_fp16")]; + tensor inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor("inputs_41_cast_fp16")]; + tensor var_1972 = const()[name = tensor("op_1972"), val = tensor(3)]; + tensor out_41_axes_0 = const()[name = tensor("out_41_axes_0"), val = tensor([1])]; + tensor var_1994_to_fp16 = const()[name = tensor("op_1994_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_1994_to_fp16, x = inputs_41_cast_fp16)[name = tensor("out_41_cast_fp16")]; + tensor obj_41_gamma_0_to_fp16 = const()[name = tensor("obj_41_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52993792)))]; + tensor obj_41_beta_0_to_fp16 = const()[name = tensor("obj_41_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52995392)))]; + tensor obj_41_epsilon_0_to_fp16 = const()[name = tensor("obj_41_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_41_cast_fp16 = batch_norm(beta = obj_41_beta_0_to_fp16, epsilon = obj_41_epsilon_0_to_fp16, gamma = obj_41_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = tensor("obj_41_cast_fp16")]; + tensor var_2016_pad_type_0 = const()[name = tensor("op_2016_pad_type_0"), val = tensor("valid")]; + tensor var_2016_strides_0 = const()[name = tensor("op_2016_strides_0"), val = tensor([1, 1])]; + tensor var_2016_pad_0 = const()[name = tensor("op_2016_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2016_dilations_0 = const()[name = tensor("op_2016_dilations_0"), val = tensor([1, 1])]; + tensor var_2016_groups_0 = const()[name = tensor("op_2016_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52996992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53291968))), name = tensor("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53292096)))]; + tensor var_2016_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2016_dilations_0, groups = var_2016_groups_0, pad = var_2016_pad_0, pad_type = var_2016_pad_type_0, strides = var_2016_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = tensor("op_2016_cast_fp16")]; + tensor var_2022_pad_type_0 = const()[name = tensor("op_2022_pad_type_0"), val = tensor("valid")]; + tensor var_2022_strides_0 = const()[name = tensor("op_2022_strides_0"), val = tensor([1, 1])]; + tensor var_2022_pad_0 = const()[name = tensor("op_2022_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2022_dilations_0 = const()[name = tensor("op_2022_dilations_0"), val = tensor([1, 1])]; + tensor var_2022_groups_0 = const()[name = tensor("op_2022_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53299776))), name = tensor("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53293696))), shape = tensor([768, 768, 1, 1])]; + tensor var_2022_cast_fp16 = conv(dilations = var_2022_dilations_0, groups = var_2022_groups_0, pad = var_2022_pad_0, pad_type = var_2022_pad_type_0, strides = var_2022_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = tensor("op_2022_cast_fp16")]; + tensor query_21_cast_fp16 = add(x = var_2016_cast_fp16, y = var_2022_cast_fp16)[name = tensor("query_21_cast_fp16")]; + tensor var_2031_pad_type_0 = const()[name = tensor("op_2031_pad_type_0"), val = tensor("valid")]; + tensor var_2031_strides_0 = const()[name = tensor("op_2031_strides_0"), val = tensor([1, 1])]; + tensor var_2031_pad_0 = const()[name = tensor("op_2031_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2031_dilations_0 = const()[name = tensor("op_2031_dilations_0"), val = tensor([1, 1])]; + tensor var_2031_groups_0 = const()[name = tensor("op_2031_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53373568))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53668544))), name = tensor("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2031_cast_fp16 = conv(dilations = var_2031_dilations_0, groups = var_2031_groups_0, pad = var_2031_pad_0, pad_type = var_2031_pad_type_0, strides = var_2031_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = tensor("op_2031_cast_fp16")]; + tensor var_2037_pad_type_0 = const()[name = tensor("op_2037_pad_type_0"), val = tensor("valid")]; + tensor var_2037_strides_0 = const()[name = tensor("op_2037_strides_0"), val = tensor([1, 1])]; + tensor var_2037_pad_0 = const()[name = tensor("op_2037_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2037_dilations_0 = const()[name = tensor("op_2037_dilations_0"), val = tensor([1, 1])]; + tensor var_2037_groups_0 = const()[name = tensor("op_2037_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53675072))), name = tensor("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53668672))), shape = tensor([768, 768, 1, 1])]; + tensor var_2037_cast_fp16 = conv(dilations = var_2037_dilations_0, groups = var_2037_groups_0, pad = var_2037_pad_0, pad_type = var_2037_pad_type_0, strides = var_2037_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = tensor("op_2037_cast_fp16")]; + tensor key_21_cast_fp16 = add(x = var_2031_cast_fp16, y = var_2037_cast_fp16)[name = tensor("key_21_cast_fp16")]; + tensor var_2047_pad_type_0 = const()[name = tensor("op_2047_pad_type_0"), val = tensor("valid")]; + tensor var_2047_strides_0 = const()[name = tensor("op_2047_strides_0"), val = tensor([1, 1])]; + tensor var_2047_pad_0 = const()[name = tensor("op_2047_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2047_dilations_0 = const()[name = tensor("op_2047_dilations_0"), val = tensor([1, 1])]; + tensor var_2047_groups_0 = const()[name = tensor("op_2047_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53748864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54043840))), name = tensor("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54043968)))]; + tensor var_2047_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2047_dilations_0, groups = var_2047_groups_0, pad = var_2047_pad_0, pad_type = var_2047_pad_type_0, strides = var_2047_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = tensor("op_2047_cast_fp16")]; + tensor var_2053_pad_type_0 = const()[name = tensor("op_2053_pad_type_0"), val = tensor("valid")]; + tensor var_2053_strides_0 = const()[name = tensor("op_2053_strides_0"), val = tensor([1, 1])]; + tensor var_2053_pad_0 = const()[name = tensor("op_2053_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2053_dilations_0 = const()[name = tensor("op_2053_dilations_0"), val = tensor([1, 1])]; + tensor var_2053_groups_0 = const()[name = tensor("op_2053_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54050496))), name = tensor("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54045568))), shape = tensor([768, 768, 1, 1])]; + tensor var_2053_cast_fp16 = conv(dilations = var_2053_dilations_0, groups = var_2053_groups_0, pad = var_2053_pad_0, pad_type = var_2053_pad_type_0, strides = var_2053_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = tensor("op_2053_cast_fp16")]; + tensor value_21_cast_fp16 = add(x = var_2047_cast_fp16, y = var_2053_cast_fp16)[name = tensor("value_21_cast_fp16")]; + tensor var_2057 = const()[name = tensor("op_2057"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_21_cast_fp16 = reshape(shape = var_2057, x = query_21_cast_fp16)[name = tensor("mh_q_21_cast_fp16")]; + tensor var_2059_to_fp16 = const()[name = tensor("op_2059_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2060_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_2059_to_fp16)[name = tensor("op_2060_cast_fp16")]; + tensor var_2063 = const()[name = tensor("op_2063"), val = tensor([1, 12, 64, 1500])]; + tensor var_2064_cast_fp16 = reshape(shape = var_2063, x = key_21_cast_fp16)[name = tensor("op_2064_cast_fp16")]; + tensor mh_w_21_transpose_x_0 = const()[name = tensor("mh_w_21_transpose_x_0"), val = tensor(true)]; + tensor mh_w_21_transpose_y_0 = const()[name = tensor("mh_w_21_transpose_y_0"), val = tensor(false)]; + tensor mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_2060_cast_fp16, y = var_2064_cast_fp16)[name = tensor("mh_w_21_cast_fp16")]; + tensor var_2067_cast_fp16 = softmax(axis = var_1972, x = mh_w_21_cast_fp16)[name = tensor("op_2067_cast_fp16")]; + tensor var_2068 = const()[name = tensor("op_2068"), val = tensor([1, 12, 64, 1500])]; + tensor var_2069_cast_fp16 = reshape(shape = var_2068, x = value_21_cast_fp16)[name = tensor("op_2069_cast_fp16")]; + tensor attn_21_transpose_x_0 = const()[name = tensor("attn_21_transpose_x_0"), val = tensor(false)]; + tensor attn_21_transpose_y_0 = const()[name = tensor("attn_21_transpose_y_0"), val = tensor(true)]; + tensor attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_2069_cast_fp16, y = var_2067_cast_fp16)[name = tensor("attn_21_cast_fp16")]; + tensor var_2072 = const()[name = tensor("op_2072"), val = tensor([1, 768, 1, 1500])]; + tensor input_81_cast_fp16 = reshape(shape = var_2072, x = attn_21_cast_fp16)[name = tensor("input_81_cast_fp16")]; + tensor var_2082_pad_type_0 = const()[name = tensor("op_2082_pad_type_0"), val = tensor("valid")]; + tensor var_2082_strides_0 = const()[name = tensor("op_2082_strides_0"), val = tensor([1, 1])]; + tensor var_2082_pad_0 = const()[name = tensor("op_2082_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2082_dilations_0 = const()[name = tensor("op_2082_dilations_0"), val = tensor([1, 1])]; + tensor var_2082_groups_0 = const()[name = tensor("op_2082_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54124288))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54419264))), name = tensor("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54419392)))]; + tensor var_2082_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2082_dilations_0, groups = var_2082_groups_0, pad = var_2082_pad_0, pad_type = var_2082_pad_type_0, strides = var_2082_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = tensor("op_2082_cast_fp16")]; + tensor var_2088_pad_type_0 = const()[name = tensor("op_2088_pad_type_0"), val = tensor("valid")]; + tensor var_2088_strides_0 = const()[name = tensor("op_2088_strides_0"), val = tensor([1, 1])]; + tensor var_2088_pad_0 = const()[name = tensor("op_2088_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2088_dilations_0 = const()[name = tensor("op_2088_dilations_0"), val = tensor([1, 1])]; + tensor var_2088_groups_0 = const()[name = tensor("op_2088_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54426624))), name = tensor("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54420992))), shape = tensor([768, 768, 1, 1])]; + tensor var_2088_cast_fp16 = conv(dilations = var_2088_dilations_0, groups = var_2088_groups_0, pad = var_2088_pad_0, pad_type = var_2088_pad_type_0, strides = var_2088_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = tensor("op_2088_cast_fp16")]; + tensor obj_43_cast_fp16 = add(x = var_2082_cast_fp16, y = var_2088_cast_fp16)[name = tensor("obj_43_cast_fp16")]; + tensor inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = obj_43_cast_fp16)[name = tensor("inputs_43_cast_fp16")]; + tensor out_43_axes_0 = const()[name = tensor("out_43_axes_0"), val = tensor([1])]; + tensor var_2099_to_fp16 = const()[name = tensor("op_2099_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_2099_to_fp16, x = inputs_43_cast_fp16)[name = tensor("out_43_cast_fp16")]; + tensor input_83_gamma_0_to_fp16 = const()[name = tensor("input_83_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54500416)))]; + tensor input_83_beta_0_to_fp16 = const()[name = tensor("input_83_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54502016)))]; + tensor input_83_epsilon_0_to_fp16 = const()[name = tensor("input_83_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_83_cast_fp16 = batch_norm(beta = input_83_beta_0_to_fp16, epsilon = input_83_epsilon_0_to_fp16, gamma = input_83_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = tensor("input_83_cast_fp16")]; + tensor var_2117_pad_type_0 = const()[name = tensor("op_2117_pad_type_0"), val = tensor("valid")]; + tensor var_2117_strides_0 = const()[name = tensor("op_2117_strides_0"), val = tensor([1, 1])]; + tensor var_2117_pad_0 = const()[name = tensor("op_2117_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2117_dilations_0 = const()[name = tensor("op_2117_dilations_0"), val = tensor([1, 1])]; + tensor var_2117_groups_0 = const()[name = tensor("op_2117_groups_0"), val = tensor(1)]; + tensor layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54503616))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55683328))), name = tensor("layers_10_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55683456)))]; + tensor var_2117_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_2117_dilations_0, groups = var_2117_groups_0, pad = var_2117_pad_0, pad_type = var_2117_pad_type_0, strides = var_2117_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = tensor("op_2117_cast_fp16")]; + tensor var_2123_pad_type_0 = const()[name = tensor("op_2123_pad_type_0"), val = tensor("valid")]; + tensor var_2123_strides_0 = const()[name = tensor("op_2123_strides_0"), val = tensor([1, 1])]; + tensor var_2123_pad_0 = const()[name = tensor("op_2123_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2123_dilations_0 = const()[name = tensor("op_2123_dilations_0"), val = tensor([1, 1])]; + tensor var_2123_groups_0 = const()[name = tensor("op_2123_groups_0"), val = tensor(1)]; + tensor layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55711488))), name = tensor("layers_10_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55689664))), shape = tensor([3072, 768, 1, 1])]; + tensor var_2123_cast_fp16 = conv(dilations = var_2123_dilations_0, groups = var_2123_groups_0, pad = var_2123_pad_0, pad_type = var_2123_pad_type_0, strides = var_2123_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = tensor("op_2123_cast_fp16")]; + tensor input_85_cast_fp16 = add(x = var_2117_cast_fp16, y = var_2123_cast_fp16)[name = tensor("input_85_cast_fp16")]; + tensor input_87_mode_0 = const()[name = tensor("input_87_mode_0"), val = tensor("EXACT")]; + tensor input_87_cast_fp16 = gelu(mode = input_87_mode_0, x = input_85_cast_fp16)[name = tensor("input_87_cast_fp16")]; + tensor var_2134_pad_type_0 = const()[name = tensor("op_2134_pad_type_0"), val = tensor("valid")]; + tensor var_2134_strides_0 = const()[name = tensor("op_2134_strides_0"), val = tensor([1, 1])]; + tensor var_2134_pad_0 = const()[name = tensor("op_2134_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2134_dilations_0 = const()[name = tensor("op_2134_dilations_0"), val = tensor([1, 1])]; + tensor var_2134_groups_0 = const()[name = tensor("op_2134_groups_0"), val = tensor(1)]; + tensor layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56006464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57186176))), name = tensor("layers_10_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57186304)))]; + tensor var_2134_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_2134_dilations_0, groups = var_2134_groups_0, pad = var_2134_pad_0, pad_type = var_2134_pad_type_0, strides = var_2134_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_87_cast_fp16)[name = tensor("op_2134_cast_fp16")]; + tensor var_2140_pad_type_0 = const()[name = tensor("op_2140_pad_type_0"), val = tensor("valid")]; + tensor var_2140_strides_0 = const()[name = tensor("op_2140_strides_0"), val = tensor([1, 1])]; + tensor var_2140_pad_0 = const()[name = tensor("op_2140_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2140_dilations_0 = const()[name = tensor("op_2140_dilations_0"), val = tensor([1, 1])]; + tensor var_2140_groups_0 = const()[name = tensor("op_2140_groups_0"), val = tensor(1)]; + tensor layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57213952))), name = tensor("layers_10_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57187904))), shape = tensor([768, 3072, 1, 1])]; + tensor var_2140_cast_fp16 = conv(dilations = var_2140_dilations_0, groups = var_2140_groups_0, pad = var_2140_pad_0, pad_type = var_2140_pad_type_0, strides = var_2140_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_87_cast_fp16)[name = tensor("op_2140_cast_fp16")]; + tensor hidden_states_25_cast_fp16 = add(x = var_2134_cast_fp16, y = var_2140_cast_fp16)[name = tensor("hidden_states_25_cast_fp16")]; + tensor inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor("inputs_45_cast_fp16")]; + tensor var_2146 = const()[name = tensor("op_2146"), val = tensor(3)]; + tensor out_45_axes_0 = const()[name = tensor("out_45_axes_0"), val = tensor([1])]; + tensor var_2168_to_fp16 = const()[name = tensor("op_2168_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_2168_to_fp16, x = inputs_45_cast_fp16)[name = tensor("out_45_cast_fp16")]; + tensor obj_45_gamma_0_to_fp16 = const()[name = tensor("obj_45_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57508928)))]; + tensor obj_45_beta_0_to_fp16 = const()[name = tensor("obj_45_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57510528)))]; + tensor obj_45_epsilon_0_to_fp16 = const()[name = tensor("obj_45_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_45_cast_fp16 = batch_norm(beta = obj_45_beta_0_to_fp16, epsilon = obj_45_epsilon_0_to_fp16, gamma = obj_45_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = tensor("obj_45_cast_fp16")]; + tensor var_2190_pad_type_0 = const()[name = tensor("op_2190_pad_type_0"), val = tensor("valid")]; + tensor var_2190_strides_0 = const()[name = tensor("op_2190_strides_0"), val = tensor([1, 1])]; + tensor var_2190_pad_0 = const()[name = tensor("op_2190_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2190_dilations_0 = const()[name = tensor("op_2190_dilations_0"), val = tensor([1, 1])]; + tensor var_2190_groups_0 = const()[name = tensor("op_2190_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57512128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57807104))), name = tensor("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57807232)))]; + tensor var_2190_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2190_dilations_0, groups = var_2190_groups_0, pad = var_2190_pad_0, pad_type = var_2190_pad_type_0, strides = var_2190_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = tensor("op_2190_cast_fp16")]; + tensor var_2196_pad_type_0 = const()[name = tensor("op_2196_pad_type_0"), val = tensor("valid")]; + tensor var_2196_strides_0 = const()[name = tensor("op_2196_strides_0"), val = tensor([1, 1])]; + tensor var_2196_pad_0 = const()[name = tensor("op_2196_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2196_dilations_0 = const()[name = tensor("op_2196_dilations_0"), val = tensor([1, 1])]; + tensor var_2196_groups_0 = const()[name = tensor("op_2196_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57815296))), name = tensor("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57808832))), shape = tensor([768, 768, 1, 1])]; + tensor var_2196_cast_fp16 = conv(dilations = var_2196_dilations_0, groups = var_2196_groups_0, pad = var_2196_pad_0, pad_type = var_2196_pad_type_0, strides = var_2196_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = tensor("op_2196_cast_fp16")]; + tensor query_cast_fp16 = add(x = var_2190_cast_fp16, y = var_2196_cast_fp16)[name = tensor("query_cast_fp16")]; + tensor var_2205_pad_type_0 = const()[name = tensor("op_2205_pad_type_0"), val = tensor("valid")]; + tensor var_2205_strides_0 = const()[name = tensor("op_2205_strides_0"), val = tensor([1, 1])]; + tensor var_2205_pad_0 = const()[name = tensor("op_2205_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2205_dilations_0 = const()[name = tensor("op_2205_dilations_0"), val = tensor([1, 1])]; + tensor var_2205_groups_0 = const()[name = tensor("op_2205_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57889088))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58184064))), name = tensor("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2205_cast_fp16 = conv(dilations = var_2205_dilations_0, groups = var_2205_groups_0, pad = var_2205_pad_0, pad_type = var_2205_pad_type_0, strides = var_2205_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = tensor("op_2205_cast_fp16")]; + tensor var_2211_pad_type_0 = const()[name = tensor("op_2211_pad_type_0"), val = tensor("valid")]; + tensor var_2211_strides_0 = const()[name = tensor("op_2211_strides_0"), val = tensor([1, 1])]; + tensor var_2211_pad_0 = const()[name = tensor("op_2211_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2211_dilations_0 = const()[name = tensor("op_2211_dilations_0"), val = tensor([1, 1])]; + tensor var_2211_groups_0 = const()[name = tensor("op_2211_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58191872))), name = tensor("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58184192))), shape = tensor([768, 768, 1, 1])]; + tensor var_2211_cast_fp16 = conv(dilations = var_2211_dilations_0, groups = var_2211_groups_0, pad = var_2211_pad_0, pad_type = var_2211_pad_type_0, strides = var_2211_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = tensor("op_2211_cast_fp16")]; + tensor key_cast_fp16 = add(x = var_2205_cast_fp16, y = var_2211_cast_fp16)[name = tensor("key_cast_fp16")]; + tensor var_2221_pad_type_0 = const()[name = tensor("op_2221_pad_type_0"), val = tensor("valid")]; + tensor var_2221_strides_0 = const()[name = tensor("op_2221_strides_0"), val = tensor([1, 1])]; + tensor var_2221_pad_0 = const()[name = tensor("op_2221_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2221_dilations_0 = const()[name = tensor("op_2221_dilations_0"), val = tensor([1, 1])]; + tensor var_2221_groups_0 = const()[name = tensor("op_2221_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58265664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58560640))), name = tensor("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58560768)))]; + tensor var_2221_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2221_dilations_0, groups = var_2221_groups_0, pad = var_2221_pad_0, pad_type = var_2221_pad_type_0, strides = var_2221_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = tensor("op_2221_cast_fp16")]; + tensor var_2227_pad_type_0 = const()[name = tensor("op_2227_pad_type_0"), val = tensor("valid")]; + tensor var_2227_strides_0 = const()[name = tensor("op_2227_strides_0"), val = tensor([1, 1])]; + tensor var_2227_pad_0 = const()[name = tensor("op_2227_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2227_dilations_0 = const()[name = tensor("op_2227_dilations_0"), val = tensor([1, 1])]; + tensor var_2227_groups_0 = const()[name = tensor("op_2227_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58567232))), name = tensor("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58562368))), shape = tensor([768, 768, 1, 1])]; + tensor var_2227_cast_fp16 = conv(dilations = var_2227_dilations_0, groups = var_2227_groups_0, pad = var_2227_pad_0, pad_type = var_2227_pad_type_0, strides = var_2227_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = tensor("op_2227_cast_fp16")]; + tensor value_cast_fp16 = add(x = var_2221_cast_fp16, y = var_2227_cast_fp16)[name = tensor("value_cast_fp16")]; + tensor var_2231 = const()[name = tensor("op_2231"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_cast_fp16 = reshape(shape = var_2231, x = query_cast_fp16)[name = tensor("mh_q_cast_fp16")]; + tensor var_2233_to_fp16 = const()[name = tensor("op_2233_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2234_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_2233_to_fp16)[name = tensor("op_2234_cast_fp16")]; + tensor var_2237 = const()[name = tensor("op_2237"), val = tensor([1, 12, 64, 1500])]; + tensor var_2238_cast_fp16 = reshape(shape = var_2237, x = key_cast_fp16)[name = tensor("op_2238_cast_fp16")]; + tensor mh_w_transpose_x_0 = const()[name = tensor("mh_w_transpose_x_0"), val = tensor(true)]; + tensor mh_w_transpose_y_0 = const()[name = tensor("mh_w_transpose_y_0"), val = tensor(false)]; + tensor mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_2234_cast_fp16, y = var_2238_cast_fp16)[name = tensor("mh_w_cast_fp16")]; + tensor var_2241_cast_fp16 = softmax(axis = var_2146, x = mh_w_cast_fp16)[name = tensor("op_2241_cast_fp16")]; + tensor var_2242 = const()[name = tensor("op_2242"), val = tensor([1, 12, 64, 1500])]; + tensor var_2243_cast_fp16 = reshape(shape = var_2242, x = value_cast_fp16)[name = tensor("op_2243_cast_fp16")]; + tensor attn_transpose_x_0 = const()[name = tensor("attn_transpose_x_0"), val = tensor(false)]; + tensor attn_transpose_y_0 = const()[name = tensor("attn_transpose_y_0"), val = tensor(true)]; + tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_2243_cast_fp16, y = var_2241_cast_fp16)[name = tensor("attn_cast_fp16")]; + tensor var_2246 = const()[name = tensor("op_2246"), val = tensor([1, 768, 1, 1500])]; + tensor input_89_cast_fp16 = reshape(shape = var_2246, x = attn_cast_fp16)[name = tensor("input_89_cast_fp16")]; + tensor var_2256_pad_type_0 = const()[name = tensor("op_2256_pad_type_0"), val = tensor("valid")]; + tensor var_2256_strides_0 = const()[name = tensor("op_2256_strides_0"), val = tensor([1, 1])]; + tensor var_2256_pad_0 = const()[name = tensor("op_2256_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2256_dilations_0 = const()[name = tensor("op_2256_dilations_0"), val = tensor([1, 1])]; + tensor var_2256_groups_0 = const()[name = tensor("op_2256_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58641024))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58936000))), name = tensor("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58936128)))]; + tensor var_2256_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2256_dilations_0, groups = var_2256_groups_0, pad = var_2256_pad_0, pad_type = var_2256_pad_type_0, strides = var_2256_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = tensor("op_2256_cast_fp16")]; + tensor var_2262_pad_type_0 = const()[name = tensor("op_2262_pad_type_0"), val = tensor("valid")]; + tensor var_2262_strides_0 = const()[name = tensor("op_2262_strides_0"), val = tensor([1, 1])]; + tensor var_2262_pad_0 = const()[name = tensor("op_2262_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2262_dilations_0 = const()[name = tensor("op_2262_dilations_0"), val = tensor([1, 1])]; + tensor var_2262_groups_0 = const()[name = tensor("op_2262_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58943552))), name = tensor("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58937728))), shape = tensor([768, 768, 1, 1])]; + tensor var_2262_cast_fp16 = conv(dilations = var_2262_dilations_0, groups = var_2262_groups_0, pad = var_2262_pad_0, pad_type = var_2262_pad_type_0, strides = var_2262_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = tensor("op_2262_cast_fp16")]; + tensor obj_cast_fp16 = add(x = var_2256_cast_fp16, y = var_2262_cast_fp16)[name = tensor("obj_cast_fp16")]; + tensor inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_cast_fp16)[name = tensor("inputs_47_cast_fp16")]; + tensor out_47_axes_0 = const()[name = tensor("out_47_axes_0"), val = tensor([1])]; + tensor var_2273_to_fp16 = const()[name = tensor("op_2273_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_2273_to_fp16, x = inputs_47_cast_fp16)[name = tensor("out_47_cast_fp16")]; + tensor input_91_gamma_0_to_fp16 = const()[name = tensor("input_91_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59017344)))]; + tensor input_91_beta_0_to_fp16 = const()[name = tensor("input_91_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59018944)))]; + tensor input_91_epsilon_0_to_fp16 = const()[name = tensor("input_91_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_91_cast_fp16 = batch_norm(beta = input_91_beta_0_to_fp16, epsilon = input_91_epsilon_0_to_fp16, gamma = input_91_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = tensor("input_91_cast_fp16")]; + tensor var_2291_pad_type_0 = const()[name = tensor("op_2291_pad_type_0"), val = tensor("valid")]; + tensor var_2291_strides_0 = const()[name = tensor("op_2291_strides_0"), val = tensor([1, 1])]; + tensor var_2291_pad_0 = const()[name = tensor("op_2291_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2291_dilations_0 = const()[name = tensor("op_2291_dilations_0"), val = tensor([1, 1])]; + tensor var_2291_groups_0 = const()[name = tensor("op_2291_groups_0"), val = tensor(1)]; + tensor layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59020544))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60200256))), name = tensor("layers_11_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60200384)))]; + tensor var_2291_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_2291_dilations_0, groups = var_2291_groups_0, pad = var_2291_pad_0, pad_type = var_2291_pad_type_0, strides = var_2291_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = tensor("op_2291_cast_fp16")]; + tensor var_2297_pad_type_0 = const()[name = tensor("op_2297_pad_type_0"), val = tensor("valid")]; + tensor var_2297_strides_0 = const()[name = tensor("op_2297_strides_0"), val = tensor([1, 1])]; + tensor var_2297_pad_0 = const()[name = tensor("op_2297_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2297_dilations_0 = const()[name = tensor("op_2297_dilations_0"), val = tensor([1, 1])]; + tensor var_2297_groups_0 = const()[name = tensor("op_2297_groups_0"), val = tensor(1)]; + tensor layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60238976))), name = tensor("layers_11_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60206592))), shape = tensor([3072, 768, 1, 1])]; + tensor var_2297_cast_fp16 = conv(dilations = var_2297_dilations_0, groups = var_2297_groups_0, pad = var_2297_pad_0, pad_type = var_2297_pad_type_0, strides = var_2297_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = tensor("op_2297_cast_fp16")]; + tensor input_93_cast_fp16 = add(x = var_2291_cast_fp16, y = var_2297_cast_fp16)[name = tensor("input_93_cast_fp16")]; + tensor input_mode_0 = const()[name = tensor("input_mode_0"), val = tensor("EXACT")]; + tensor input_cast_fp16 = gelu(mode = input_mode_0, x = input_93_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_2308_pad_type_0 = const()[name = tensor("op_2308_pad_type_0"), val = tensor("valid")]; + tensor var_2308_strides_0 = const()[name = tensor("op_2308_strides_0"), val = tensor([1, 1])]; + tensor var_2308_pad_0 = const()[name = tensor("op_2308_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2308_dilations_0 = const()[name = tensor("op_2308_dilations_0"), val = tensor([1, 1])]; + tensor var_2308_groups_0 = const()[name = tensor("op_2308_groups_0"), val = tensor(1)]; + tensor layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60533952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61713664))), name = tensor("layers_11_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61713792)))]; + tensor var_2308_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_2308_dilations_0, groups = var_2308_groups_0, pad = var_2308_pad_0, pad_type = var_2308_pad_type_0, strides = var_2308_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_cast_fp16)[name = tensor("op_2308_cast_fp16")]; + tensor var_2314_pad_type_0 = const()[name = tensor("op_2314_pad_type_0"), val = tensor("valid")]; + tensor var_2314_strides_0 = const()[name = tensor("op_2314_strides_0"), val = tensor([1, 1])]; + tensor var_2314_pad_0 = const()[name = tensor("op_2314_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2314_dilations_0 = const()[name = tensor("op_2314_dilations_0"), val = tensor([1, 1])]; + tensor var_2314_groups_0 = const()[name = tensor("op_2314_groups_0"), val = tensor(1)]; + tensor layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61759168))), name = tensor("layers_11_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61715392))), shape = tensor([768, 3072, 1, 1])]; + tensor var_2314_cast_fp16 = conv(dilations = var_2314_dilations_0, groups = var_2314_groups_0, pad = var_2314_pad_0, pad_type = var_2314_pad_type_0, strides = var_2314_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_cast_fp16)[name = tensor("op_2314_cast_fp16")]; + tensor hidden_states_cast_fp16 = add(x = var_2308_cast_fp16, y = var_2314_cast_fp16)[name = tensor("hidden_states_cast_fp16")]; + tensor inputs_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_cast_fp16)[name = tensor("inputs_cast_fp16")]; + tensor out_axes_0 = const()[name = tensor("out_axes_0"), val = tensor([1])]; + tensor var_2329_to_fp16 = const()[name = tensor("op_2329_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_2329_to_fp16, x = inputs_cast_fp16)[name = tensor("out_cast_fp16")]; + tensor encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62054144)))]; + tensor encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62055744)))]; + tensor encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor("encoder_output_embeds_type_fp32_cast_fp16")]; + } -> (encoder_output_embeds); +} \ No newline at end of file