diff --git "a/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/model.mil" "b/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/model.mil" @@ -0,0 +1,2950 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3404.16.1"}, {"coremlc-version", "3404.23.1"}})] +{ + func main(tensor cache_length, tensor decoder_key_padding_mask, tensor encoder_output_embeds, tensor input_ids, tensor key_cache, tensor kv_cache_update_mask, tensor value_cache) { + tensor var_43_axis_0 = const()[name = tensor("op_43_axis_0"), val = tensor(0)]; + tensor var_43_batch_dims_0 = const()[name = tensor("op_43_batch_dims_0"), val = tensor(0)]; + tensor embed_positions_inlier_module_weight_to_fp16 = const()[name = tensor("embed_positions_inlier_module_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor var_43_cast_fp16 = gather(axis = var_43_axis_0, batch_dims = var_43_batch_dims_0, indices = cache_length, x = embed_positions_inlier_module_weight_to_fp16)[name = tensor("op_43_cast_fp16")]; + tensor var_45_axis_0 = const()[name = tensor("op_45_axis_0"), val = tensor(0)]; + tensor var_45_batch_dims_0 = const()[name = tensor("op_45_batch_dims_0"), val = tensor(0)]; + tensor embed_positions_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(693632))), name = tensor("embed_positions_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(688256))), shape = tensor([448, 768])]; + tensor var_45_cast_fp16 = gather(axis = var_45_axis_0, batch_dims = var_45_batch_dims_0, indices = cache_length, x = embed_positions_outlier_module_weight_to_fp16_sparsified)[name = tensor("op_45_cast_fp16")]; + tensor embed_positions_1_cast_fp16 = add(x = var_43_cast_fp16, y = var_45_cast_fp16)[name = tensor("embed_positions_1_cast_fp16")]; + tensor var_50_axis_0 = const()[name = tensor("op_50_axis_0"), val = tensor(0)]; + tensor var_50_batch_dims_0 = const()[name = tensor("op_50_batch_dims_0"), val = tensor(0)]; + tensor embed_tokens_weight_to_fp16 = const()[name = tensor("embed_tokens_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(736704)))]; + tensor var_50_cast_fp16 = gather(axis = var_50_axis_0, batch_dims = var_50_batch_dims_0, indices = input_ids, x = embed_tokens_weight_to_fp16)[name = tensor("op_50_cast_fp16")]; + tensor hidden_states_1_cast_fp16 = add(x = var_50_cast_fp16, y = embed_positions_1_cast_fp16)[name = tensor("hidden_states_1_cast_fp16")]; + tensor var_64_axes_0 = const()[name = tensor("op_64_axes_0"), val = tensor([2])]; + tensor var_64_cast_fp16 = expand_dims(axes = var_64_axes_0, x = hidden_states_1_cast_fp16)[name = tensor("op_64_cast_fp16")]; + tensor inputs_1_axes_0 = const()[name = tensor("inputs_1_axes_0"), val = tensor([3])]; + tensor inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_64_cast_fp16)[name = tensor("inputs_1_cast_fp16")]; + tensor tile_0 = const()[name = tensor("tile_0"), val = tensor([768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768])]; + tensor var_69_axis_0 = const()[name = tensor("op_69_axis_0"), val = tensor(1)]; + tensor var_69_cast_fp16_0, tensor var_69_cast_fp16_1, tensor var_69_cast_fp16_2, tensor var_69_cast_fp16_3, tensor var_69_cast_fp16_4, tensor var_69_cast_fp16_5, tensor var_69_cast_fp16_6, tensor var_69_cast_fp16_7, tensor var_69_cast_fp16_8, tensor var_69_cast_fp16_9, tensor var_69_cast_fp16_10, tensor var_69_cast_fp16_11 = split(axis = var_69_axis_0, split_sizes = tile_0, x = key_cache)[name = tensor("op_69_cast_fp16")]; + tensor tile_1 = const()[name = tensor("tile_1"), val = tensor([768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768])]; + tensor var_84_axis_0 = const()[name = tensor("op_84_axis_0"), val = tensor(1)]; + tensor var_84_cast_fp16_0, tensor var_84_cast_fp16_1, tensor var_84_cast_fp16_2, tensor var_84_cast_fp16_3, tensor var_84_cast_fp16_4, tensor var_84_cast_fp16_5, tensor var_84_cast_fp16_6, tensor var_84_cast_fp16_7, tensor var_84_cast_fp16_8, tensor var_84_cast_fp16_9, tensor var_84_cast_fp16_10, tensor var_84_cast_fp16_11 = split(axis = var_84_axis_0, split_sizes = tile_1, x = value_cache)[name = tensor("op_84_cast_fp16")]; + tensor var_102 = const()[name = tensor("op_102"), val = tensor(3)]; + tensor out_1_axes_0 = const()[name = tensor("out_1_axes_0"), val = tensor([1])]; + tensor var_127_to_fp16 = const()[name = tensor("op_127_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_127_to_fp16, x = inputs_1_cast_fp16)[name = tensor("out_1_cast_fp16")]; + tensor obj_1_mean_0_to_fp16 = const()[name = tensor("obj_1_mean_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80399872)))]; + tensor obj_1_variance_0_to_fp16 = const()[name = tensor("obj_1_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80401472)))]; + tensor obj_1_gamma_0_to_fp16 = const()[name = tensor("obj_1_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80403072)))]; + tensor obj_1_beta_0_to_fp16 = const()[name = tensor("obj_1_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80404672)))]; + tensor obj_1_epsilon_0_to_fp16 = const()[name = tensor("obj_1_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor("obj_1_cast_fp16")]; + tensor var_149_pad_type_0 = const()[name = tensor("op_149_pad_type_0"), val = tensor("valid")]; + tensor var_149_strides_0 = const()[name = tensor("op_149_strides_0"), val = tensor([1, 1])]; + tensor var_149_pad_0 = const()[name = tensor("op_149_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_149_dilations_0 = const()[name = tensor("op_149_dilations_0"), val = tensor([1, 1])]; + tensor var_149_groups_0 = const()[name = tensor("op_149_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80406272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80701248))), name = tensor("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80701376)))]; + tensor var_149_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_149_dilations_0, groups = var_149_groups_0, pad = var_149_pad_0, pad_type = var_149_pad_type_0, strides = var_149_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_149_cast_fp16")]; + tensor var_155_pad_type_0 = const()[name = tensor("op_155_pad_type_0"), val = tensor("valid")]; + tensor var_155_strides_0 = const()[name = tensor("op_155_strides_0"), val = tensor([1, 1])]; + tensor var_155_pad_0 = const()[name = tensor("op_155_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_155_dilations_0 = const()[name = tensor("op_155_dilations_0"), val = tensor([1, 1])]; + tensor var_155_groups_0 = const()[name = tensor("op_155_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80717184))), name = tensor("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80702976))), shape = tensor([768, 768, 1, 1])]; + tensor var_155_cast_fp16 = conv(dilations = var_155_dilations_0, groups = var_155_groups_0, pad = var_155_pad_0, pad_type = var_155_pad_type_0, strides = var_155_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_155_cast_fp16")]; + tensor query_1_cast_fp16 = add(x = var_149_cast_fp16, y = var_155_cast_fp16)[name = tensor("query_1_cast_fp16")]; + tensor var_164_pad_type_0 = const()[name = tensor("op_164_pad_type_0"), val = tensor("valid")]; + tensor var_164_strides_0 = const()[name = tensor("op_164_strides_0"), val = tensor([1, 1])]; + tensor var_164_pad_0 = const()[name = tensor("op_164_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_164_dilations_0 = const()[name = tensor("op_164_dilations_0"), val = tensor([1, 1])]; + tensor var_164_groups_0 = const()[name = tensor("op_164_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80790976))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81085952))), name = tensor("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_164_cast_fp16 = conv(dilations = var_164_dilations_0, groups = var_164_groups_0, pad = var_164_pad_0, pad_type = var_164_pad_type_0, strides = var_164_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_164_cast_fp16")]; + tensor var_170_pad_type_0 = const()[name = tensor("op_170_pad_type_0"), val = tensor("valid")]; + tensor var_170_strides_0 = const()[name = tensor("op_170_strides_0"), val = tensor([1, 1])]; + tensor var_170_pad_0 = const()[name = tensor("op_170_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_170_dilations_0 = const()[name = tensor("op_170_dilations_0"), val = tensor([1, 1])]; + tensor var_170_groups_0 = const()[name = tensor("op_170_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81100992))), name = tensor("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81086080))), shape = tensor([768, 768, 1, 1])]; + tensor var_170_cast_fp16 = conv(dilations = var_170_dilations_0, groups = var_170_groups_0, pad = var_170_pad_0, pad_type = var_170_pad_type_0, strides = var_170_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_170_cast_fp16")]; + tensor current_key_1_cast_fp16 = add(x = var_164_cast_fp16, y = var_170_cast_fp16)[name = tensor("current_key_1_cast_fp16")]; + tensor var_180_pad_type_0 = const()[name = tensor("op_180_pad_type_0"), val = tensor("valid")]; + tensor var_180_strides_0 = const()[name = tensor("op_180_strides_0"), val = tensor([1, 1])]; + tensor var_180_pad_0 = const()[name = tensor("op_180_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_180_dilations_0 = const()[name = tensor("op_180_dilations_0"), val = tensor([1, 1])]; + tensor var_180_groups_0 = const()[name = tensor("op_180_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81174784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81469760))), name = tensor("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81469888)))]; + tensor var_180_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_180_dilations_0, groups = var_180_groups_0, pad = var_180_pad_0, pad_type = var_180_pad_type_0, strides = var_180_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_180_cast_fp16")]; + tensor var_186_pad_type_0 = const()[name = tensor("op_186_pad_type_0"), val = tensor("valid")]; + tensor var_186_strides_0 = const()[name = tensor("op_186_strides_0"), val = tensor([1, 1])]; + tensor var_186_pad_0 = const()[name = tensor("op_186_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_186_dilations_0 = const()[name = tensor("op_186_dilations_0"), val = tensor([1, 1])]; + tensor var_186_groups_0 = const()[name = tensor("op_186_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81480000))), name = tensor("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81471488))), shape = tensor([768, 768, 1, 1])]; + tensor var_186_cast_fp16 = conv(dilations = var_186_dilations_0, groups = var_186_groups_0, pad = var_186_pad_0, pad_type = var_186_pad_type_0, strides = var_186_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_186_cast_fp16")]; + tensor current_value_1_cast_fp16 = add(x = var_180_cast_fp16, y = var_186_cast_fp16)[name = tensor("current_value_1_cast_fp16")]; + tensor var_189_axes_0 = const()[name = tensor("op_189_axes_0"), val = tensor([1])]; + tensor var_189_cast_fp16 = expand_dims(axes = var_189_axes_0, x = kv_cache_update_mask)[name = tensor("op_189_cast_fp16")]; + tensor var_190_axes_0 = const()[name = tensor("op_190_axes_0"), val = tensor([2])]; + tensor var_190_cast_fp16 = expand_dims(axes = var_190_axes_0, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; + tensor var_103_to_fp16 = const()[name = tensor("op_103_to_fp16"), val = tensor(0x1p+0)]; + tensor var_192_cast_fp16 = sub(x = var_103_to_fp16, y = var_190_cast_fp16)[name = tensor("op_192_cast_fp16")]; + tensor var_193_cast_fp16 = mul(x = var_69_cast_fp16_0, y = var_192_cast_fp16)[name = tensor("op_193_cast_fp16")]; + tensor var_194_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_194_cast_fp16")]; + tensor key_1_cast_fp16 = add(x = var_193_cast_fp16, y = var_194_cast_fp16)[name = tensor("key_1_cast_fp16")]; + tensor var_197_cast_fp16 = mul(x = var_84_cast_fp16_0, y = var_192_cast_fp16)[name = tensor("op_197_cast_fp16")]; + tensor var_198_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_198_cast_fp16")]; + tensor value_1_cast_fp16 = add(x = var_197_cast_fp16, y = var_198_cast_fp16)[name = tensor("value_1_cast_fp16")]; + tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_1_cast_fp16 = reshape(shape = var_202, x = query_1_cast_fp16)[name = tensor("mh_q_1_cast_fp16")]; + tensor var_204_to_fp16 = const()[name = tensor("op_204_to_fp16"), val = tensor(0x1p-3)]; + tensor var_205_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_204_to_fp16)[name = tensor("op_205_cast_fp16")]; + tensor var_208 = const()[name = tensor("op_208"), val = tensor([1, 12, 64, 448])]; + tensor var_209_cast_fp16 = reshape(shape = var_208, x = key_1_cast_fp16)[name = tensor("op_209_cast_fp16")]; + tensor mh_w_1_transpose_x_0 = const()[name = tensor("mh_w_1_transpose_x_0"), val = tensor(true)]; + tensor mh_w_1_transpose_y_0 = const()[name = tensor("mh_w_1_transpose_y_0"), val = tensor(false)]; + tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_205_cast_fp16, y = var_209_cast_fp16)[name = tensor("mh_w_1_cast_fp16")]; + tensor var_213_axes_0 = const()[name = tensor("op_213_axes_0"), val = tensor([1])]; + tensor var_213_cast_fp16 = expand_dims(axes = var_213_axes_0, x = decoder_key_padding_mask)[name = tensor("op_213_cast_fp16")]; + tensor var_214_axes_0 = const()[name = tensor("op_214_axes_0"), val = tensor([2])]; + tensor var_214_cast_fp16 = expand_dims(axes = var_214_axes_0, x = var_213_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_3_cast_fp16")]; + tensor var_217_cast_fp16 = softmax(axis = var_102, x = mh_w_3_cast_fp16)[name = tensor("op_217_cast_fp16")]; + tensor var_218 = const()[name = tensor("op_218"), val = tensor([1, 12, 64, 448])]; + tensor var_219_cast_fp16 = reshape(shape = var_218, x = value_1_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_219_cast_fp16, y = var_217_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 768, 1, 1])]; + tensor input_1_cast_fp16 = reshape(shape = var_222, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_232_pad_type_0 = const()[name = tensor("op_232_pad_type_0"), val = tensor("valid")]; + tensor var_232_strides_0 = const()[name = tensor("op_232_strides_0"), val = tensor([1, 1])]; + tensor var_232_pad_0 = const()[name = tensor("op_232_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_232_dilations_0 = const()[name = tensor("op_232_dilations_0"), val = tensor([1, 1])]; + tensor var_232_groups_0 = const()[name = tensor("op_232_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81553792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81848768))), name = tensor("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81848896)))]; + tensor var_232_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_232_dilations_0, groups = var_232_groups_0, pad = var_232_pad_0, pad_type = var_232_pad_type_0, strides = var_232_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = tensor("op_232_cast_fp16")]; + tensor var_238_pad_type_0 = const()[name = tensor("op_238_pad_type_0"), val = tensor("valid")]; + tensor var_238_strides_0 = const()[name = tensor("op_238_strides_0"), val = tensor([1, 1])]; + tensor var_238_pad_0 = const()[name = tensor("op_238_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_238_dilations_0 = const()[name = tensor("op_238_dilations_0"), val = tensor([1, 1])]; + tensor var_238_groups_0 = const()[name = tensor("op_238_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81860736))), name = tensor("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81850496))), shape = tensor([768, 768, 1, 1])]; + tensor var_238_cast_fp16 = conv(dilations = var_238_dilations_0, groups = var_238_groups_0, pad = var_238_pad_0, pad_type = var_238_pad_type_0, strides = var_238_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = tensor("op_238_cast_fp16")]; + tensor obj_7_cast_fp16 = add(x = var_232_cast_fp16, y = var_238_cast_fp16)[name = tensor("obj_7_cast_fp16")]; + tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_7_cast_fp16)[name = tensor("inputs_3_cast_fp16")]; + tensor out_3_axes_0 = const()[name = tensor("out_3_axes_0"), val = tensor([1])]; + tensor var_253_to_fp16 = const()[name = tensor("op_253_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_253_to_fp16, x = inputs_3_cast_fp16)[name = tensor("out_3_cast_fp16")]; + tensor obj_9_gamma_0_to_fp16 = const()[name = tensor("obj_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81934528)))]; + tensor obj_9_beta_0_to_fp16 = const()[name = tensor("obj_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81936128)))]; + tensor obj_9_epsilon_0_to_fp16 = const()[name = tensor("obj_9_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor("obj_9_cast_fp16")]; + tensor var_275_pad_type_0 = const()[name = tensor("op_275_pad_type_0"), val = tensor("valid")]; + tensor var_275_strides_0 = const()[name = tensor("op_275_strides_0"), val = tensor([1, 1])]; + tensor var_275_pad_0 = const()[name = tensor("op_275_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_275_dilations_0 = const()[name = tensor("op_275_dilations_0"), val = tensor([1, 1])]; + tensor var_275_groups_0 = const()[name = tensor("op_275_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81937728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82232704))), name = tensor("layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82232832)))]; + tensor var_275_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_275_dilations_0, groups = var_275_groups_0, pad = var_275_pad_0, pad_type = var_275_pad_type_0, strides = var_275_strides_0, weight = layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_281_pad_type_0 = const()[name = tensor("op_281_pad_type_0"), val = tensor("valid")]; + tensor var_281_strides_0 = const()[name = tensor("op_281_strides_0"), val = tensor([1, 1])]; + tensor var_281_pad_0 = const()[name = tensor("op_281_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_281_dilations_0 = const()[name = tensor("op_281_dilations_0"), val = tensor([1, 1])]; + tensor var_281_groups_0 = const()[name = tensor("op_281_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82258240))), name = tensor("layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82234432))), shape = tensor([768, 768, 1, 1])]; + tensor var_281_cast_fp16 = conv(dilations = var_281_dilations_0, groups = var_281_groups_0, pad = var_281_pad_0, pad_type = var_281_pad_type_0, strides = var_281_strides_0, weight = layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_281_cast_fp16")]; + tensor query_3_cast_fp16 = add(x = var_275_cast_fp16, y = var_281_cast_fp16)[name = tensor("query_3_cast_fp16")]; + tensor var_290_pad_type_0 = const()[name = tensor("op_290_pad_type_0"), val = tensor("valid")]; + tensor var_290_strides_0 = const()[name = tensor("op_290_strides_0"), val = tensor([1, 1])]; + tensor var_290_pad_0 = const()[name = tensor("op_290_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_290_dilations_0 = const()[name = tensor("op_290_dilations_0"), val = tensor([1, 1])]; + tensor var_290_groups_0 = const()[name = tensor("op_290_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82332032))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82627008))), name = tensor("layers_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_290_cast_fp16 = conv(dilations = var_290_dilations_0, groups = var_290_groups_0, pad = var_290_pad_0, pad_type = var_290_pad_type_0, strides = var_290_strides_0, weight = layers_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_290_cast_fp16")]; + tensor var_296_pad_type_0 = const()[name = tensor("op_296_pad_type_0"), val = tensor("valid")]; + tensor var_296_strides_0 = const()[name = tensor("op_296_strides_0"), val = tensor([1, 1])]; + tensor var_296_pad_0 = const()[name = tensor("op_296_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_296_dilations_0 = const()[name = tensor("op_296_dilations_0"), val = tensor([1, 1])]; + tensor var_296_groups_0 = const()[name = tensor("op_296_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82642816))), name = tensor("layers_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82627136))), shape = tensor([768, 768, 1, 1])]; + tensor var_296_cast_fp16 = conv(dilations = var_296_dilations_0, groups = var_296_groups_0, pad = var_296_pad_0, pad_type = var_296_pad_type_0, strides = var_296_strides_0, weight = layers_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_296_cast_fp16")]; + tensor key_3_cast_fp16 = add(x = var_290_cast_fp16, y = var_296_cast_fp16)[name = tensor("key_3_cast_fp16")]; + tensor var_306_pad_type_0 = const()[name = tensor("op_306_pad_type_0"), val = tensor("valid")]; + tensor var_306_strides_0 = const()[name = tensor("op_306_strides_0"), val = tensor([1, 1])]; + tensor var_306_pad_0 = const()[name = tensor("op_306_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_306_dilations_0 = const()[name = tensor("op_306_dilations_0"), val = tensor([1, 1])]; + tensor var_306_groups_0 = const()[name = tensor("op_306_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82716608))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83011584))), name = tensor("layers_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83011712)))]; + tensor var_306_cast_fp16 = conv(bias = layers_0_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_306_dilations_0, groups = var_306_groups_0, pad = var_306_pad_0, pad_type = var_306_pad_type_0, strides = var_306_strides_0, weight = layers_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_306_cast_fp16")]; + tensor var_312_pad_type_0 = const()[name = tensor("op_312_pad_type_0"), val = tensor("valid")]; + tensor var_312_strides_0 = const()[name = tensor("op_312_strides_0"), val = tensor([1, 1])]; + tensor var_312_pad_0 = const()[name = tensor("op_312_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_312_dilations_0 = const()[name = tensor("op_312_dilations_0"), val = tensor([1, 1])]; + tensor var_312_groups_0 = const()[name = tensor("op_312_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83020160))), name = tensor("layers_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83013312))), shape = tensor([768, 768, 1, 1])]; + tensor var_312_cast_fp16 = conv(dilations = var_312_dilations_0, groups = var_312_groups_0, pad = var_312_pad_0, pad_type = var_312_pad_type_0, strides = var_312_strides_0, weight = layers_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_312_cast_fp16")]; + tensor value_3_cast_fp16 = add(x = var_306_cast_fp16, y = var_312_cast_fp16)[name = tensor("value_3_cast_fp16")]; + tensor var_316 = const()[name = tensor("op_316"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_3_cast_fp16 = reshape(shape = var_316, x = query_3_cast_fp16)[name = tensor("mh_q_3_cast_fp16")]; + tensor var_318_to_fp16 = const()[name = tensor("op_318_to_fp16"), val = tensor(0x1p-3)]; + tensor var_319_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_318_to_fp16)[name = tensor("op_319_cast_fp16")]; + tensor var_322 = const()[name = tensor("op_322"), val = tensor([1, 12, 64, 1500])]; + tensor var_323_cast_fp16 = reshape(shape = var_322, x = key_3_cast_fp16)[name = tensor("op_323_cast_fp16")]; + tensor mh_w_5_transpose_x_0 = const()[name = tensor("mh_w_5_transpose_x_0"), val = tensor(true)]; + tensor mh_w_5_transpose_y_0 = const()[name = tensor("mh_w_5_transpose_y_0"), val = tensor(false)]; + tensor mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_319_cast_fp16, y = var_323_cast_fp16)[name = tensor("mh_w_5_cast_fp16")]; + tensor obj_13_cast_fp16 = softmax(axis = var_102, x = mh_w_5_cast_fp16)[name = tensor("obj_13_cast_fp16")]; + tensor var_327 = const()[name = tensor("op_327"), val = tensor([1, 12, 64, 1500])]; + tensor var_328_cast_fp16 = reshape(shape = var_327, x = value_3_cast_fp16)[name = tensor("op_328_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_328_cast_fp16, y = obj_13_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_331 = const()[name = tensor("op_331"), val = tensor([1, 768, 1, 1])]; + tensor input_3_cast_fp16 = reshape(shape = var_331, x = attn_3_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_341_pad_type_0 = const()[name = tensor("op_341_pad_type_0"), val = tensor("valid")]; + tensor var_341_strides_0 = const()[name = tensor("op_341_strides_0"), val = tensor([1, 1])]; + tensor var_341_pad_0 = const()[name = tensor("op_341_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_341_dilations_0 = const()[name = tensor("op_341_dilations_0"), val = tensor([1, 1])]; + tensor var_341_groups_0 = const()[name = tensor("op_341_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83093952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83388928))), name = tensor("layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83389056)))]; + tensor var_341_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_341_dilations_0, groups = var_341_groups_0, pad = var_341_pad_0, pad_type = var_341_pad_type_0, strides = var_341_strides_0, weight = layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = tensor("op_341_cast_fp16")]; + tensor var_347_pad_type_0 = const()[name = tensor("op_347_pad_type_0"), val = tensor("valid")]; + tensor var_347_strides_0 = const()[name = tensor("op_347_strides_0"), val = tensor([1, 1])]; + tensor var_347_pad_0 = const()[name = tensor("op_347_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_347_dilations_0 = const()[name = tensor("op_347_dilations_0"), val = tensor([1, 1])]; + tensor var_347_groups_0 = const()[name = tensor("op_347_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83398400))), name = tensor("layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83390656))), shape = tensor([768, 768, 1, 1])]; + tensor var_347_cast_fp16 = conv(dilations = var_347_dilations_0, groups = var_347_groups_0, pad = var_347_pad_0, pad_type = var_347_pad_type_0, strides = var_347_strides_0, weight = layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = tensor("op_347_cast_fp16")]; + tensor obj_11_cast_fp16 = add(x = var_341_cast_fp16, y = var_347_cast_fp16)[name = tensor("obj_11_cast_fp16")]; + tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_11_cast_fp16)[name = tensor("inputs_5_cast_fp16")]; + tensor out_5_axes_0 = const()[name = tensor("out_5_axes_0"), val = tensor([1])]; + tensor var_358_to_fp16 = const()[name = tensor("op_358_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_358_to_fp16, x = inputs_5_cast_fp16)[name = tensor("out_5_cast_fp16")]; + tensor input_5_gamma_0_to_fp16 = const()[name = tensor("input_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83472192)))]; + tensor input_5_beta_0_to_fp16 = const()[name = tensor("input_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83473792)))]; + tensor input_5_epsilon_0_to_fp16 = const()[name = tensor("input_5_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_376_pad_type_0 = const()[name = tensor("op_376_pad_type_0"), val = tensor("valid")]; + tensor var_376_strides_0 = const()[name = tensor("op_376_strides_0"), val = tensor([1, 1])]; + tensor var_376_pad_0 = const()[name = tensor("op_376_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_376_dilations_0 = const()[name = tensor("op_376_dilations_0"), val = tensor([1, 1])]; + tensor var_376_groups_0 = const()[name = tensor("op_376_groups_0"), val = tensor(1)]; + tensor layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83475392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84655104))), name = tensor("layers_0_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84655232)))]; + tensor var_376_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_376_dilations_0, groups = var_376_groups_0, pad = var_376_pad_0, pad_type = var_376_pad_type_0, strides = var_376_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_5_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor var_382_pad_type_0 = const()[name = tensor("op_382_pad_type_0"), val = tensor("valid")]; + tensor var_382_strides_0 = const()[name = tensor("op_382_strides_0"), val = tensor([1, 1])]; + tensor var_382_pad_0 = const()[name = tensor("op_382_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_382_dilations_0 = const()[name = tensor("op_382_dilations_0"), val = tensor([1, 1])]; + tensor var_382_groups_0 = const()[name = tensor("op_382_groups_0"), val = tensor(1)]; + tensor layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84717824))), name = tensor("layers_0_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84661440))), shape = tensor([3072, 768, 1, 1])]; + tensor var_382_cast_fp16 = conv(dilations = var_382_dilations_0, groups = var_382_groups_0, pad = var_382_pad_0, pad_type = var_382_pad_type_0, strides = var_382_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_5_cast_fp16)[name = tensor("op_382_cast_fp16")]; + tensor input_7_cast_fp16 = add(x = var_376_cast_fp16, y = var_382_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor input_9_mode_0 = const()[name = tensor("input_9_mode_0"), val = tensor("EXACT")]; + tensor input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_393_pad_type_0 = const()[name = tensor("op_393_pad_type_0"), val = tensor("valid")]; + tensor var_393_strides_0 = const()[name = tensor("op_393_strides_0"), val = tensor([1, 1])]; + tensor var_393_pad_0 = const()[name = tensor("op_393_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_393_dilations_0 = const()[name = tensor("op_393_dilations_0"), val = tensor([1, 1])]; + tensor var_393_groups_0 = const()[name = tensor("op_393_groups_0"), val = tensor(1)]; + tensor layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85012800))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86192512))), name = tensor("layers_0_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86192640)))]; + tensor var_393_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_393_dilations_0, groups = var_393_groups_0, pad = var_393_pad_0, pad_type = var_393_pad_type_0, strides = var_393_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = tensor("op_393_cast_fp16")]; + tensor var_399_pad_type_0 = const()[name = tensor("op_399_pad_type_0"), val = tensor("valid")]; + tensor var_399_strides_0 = const()[name = tensor("op_399_strides_0"), val = tensor([1, 1])]; + tensor var_399_pad_0 = const()[name = tensor("op_399_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_399_dilations_0 = const()[name = tensor("op_399_dilations_0"), val = tensor([1, 1])]; + tensor var_399_groups_0 = const()[name = tensor("op_399_groups_0"), val = tensor(1)]; + tensor layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86243776))), name = tensor("layers_0_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86194240))), shape = tensor([768, 3072, 1, 1])]; + tensor var_399_cast_fp16 = conv(dilations = var_399_dilations_0, groups = var_399_groups_0, pad = var_399_pad_0, pad_type = var_399_pad_type_0, strides = var_399_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = tensor("op_399_cast_fp16")]; + tensor hidden_states_3_cast_fp16 = add(x = var_393_cast_fp16, y = var_399_cast_fp16)[name = tensor("hidden_states_3_cast_fp16")]; + tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = tensor("inputs_7_cast_fp16")]; + tensor var_411 = const()[name = tensor("op_411"), val = tensor(3)]; + tensor out_7_axes_0 = const()[name = tensor("out_7_axes_0"), val = tensor([1])]; + tensor var_436_to_fp16 = const()[name = tensor("op_436_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_436_to_fp16, x = inputs_7_cast_fp16)[name = tensor("out_7_cast_fp16")]; + tensor obj_15_gamma_0_to_fp16 = const()[name = tensor("obj_15_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86538752)))]; + tensor obj_15_beta_0_to_fp16 = const()[name = tensor("obj_15_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86540352)))]; + tensor obj_15_epsilon_0_to_fp16 = const()[name = tensor("obj_15_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_15_cast_fp16 = batch_norm(beta = obj_15_beta_0_to_fp16, epsilon = obj_15_epsilon_0_to_fp16, gamma = obj_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor("obj_15_cast_fp16")]; + tensor var_458_pad_type_0 = const()[name = tensor("op_458_pad_type_0"), val = tensor("valid")]; + tensor var_458_strides_0 = const()[name = tensor("op_458_strides_0"), val = tensor([1, 1])]; + tensor var_458_pad_0 = const()[name = tensor("op_458_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_458_dilations_0 = const()[name = tensor("op_458_dilations_0"), val = tensor([1, 1])]; + tensor var_458_groups_0 = const()[name = tensor("op_458_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86541952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86836928))), name = tensor("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86837056)))]; + tensor var_458_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_458_dilations_0, groups = var_458_groups_0, pad = var_458_pad_0, pad_type = var_458_pad_type_0, strides = var_458_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_15_cast_fp16)[name = tensor("op_458_cast_fp16")]; + tensor var_464_pad_type_0 = const()[name = tensor("op_464_pad_type_0"), val = tensor("valid")]; + tensor var_464_strides_0 = const()[name = tensor("op_464_strides_0"), val = tensor([1, 1])]; + tensor var_464_pad_0 = const()[name = tensor("op_464_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_464_dilations_0 = const()[name = tensor("op_464_dilations_0"), val = tensor([1, 1])]; + tensor var_464_groups_0 = const()[name = tensor("op_464_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86859584))), name = tensor("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86838656))), shape = tensor([768, 768, 1, 1])]; + tensor var_464_cast_fp16 = conv(dilations = var_464_dilations_0, groups = var_464_groups_0, pad = var_464_pad_0, pad_type = var_464_pad_type_0, strides = var_464_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_15_cast_fp16)[name = tensor("op_464_cast_fp16")]; + tensor query_5_cast_fp16 = add(x = var_458_cast_fp16, y = var_464_cast_fp16)[name = tensor("query_5_cast_fp16")]; + tensor var_473_pad_type_0 = const()[name = tensor("op_473_pad_type_0"), val = tensor("valid")]; + tensor var_473_strides_0 = const()[name = tensor("op_473_strides_0"), val = tensor([1, 1])]; + tensor var_473_pad_0 = const()[name = tensor("op_473_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_473_dilations_0 = const()[name = tensor("op_473_dilations_0"), val = tensor([1, 1])]; + tensor var_473_groups_0 = const()[name = tensor("op_473_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86933376))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87228352))), name = tensor("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_473_cast_fp16 = conv(dilations = var_473_dilations_0, groups = var_473_groups_0, pad = var_473_pad_0, pad_type = var_473_pad_type_0, strides = var_473_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_15_cast_fp16)[name = tensor("op_473_cast_fp16")]; + tensor var_479_pad_type_0 = const()[name = tensor("op_479_pad_type_0"), val = tensor("valid")]; + tensor var_479_strides_0 = const()[name = tensor("op_479_strides_0"), val = tensor([1, 1])]; + tensor var_479_pad_0 = const()[name = tensor("op_479_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_479_dilations_0 = const()[name = tensor("op_479_dilations_0"), val = tensor([1, 1])]; + tensor var_479_groups_0 = const()[name = tensor("op_479_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87245632))), name = tensor("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87228480))), shape = tensor([768, 768, 1, 1])]; + tensor var_479_cast_fp16 = conv(dilations = var_479_dilations_0, groups = var_479_groups_0, pad = var_479_pad_0, pad_type = var_479_pad_type_0, strides = var_479_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_15_cast_fp16)[name = tensor("op_479_cast_fp16")]; + tensor current_key_3_cast_fp16 = add(x = var_473_cast_fp16, y = var_479_cast_fp16)[name = tensor("current_key_3_cast_fp16")]; + tensor var_489_pad_type_0 = const()[name = tensor("op_489_pad_type_0"), val = tensor("valid")]; + tensor var_489_strides_0 = const()[name = tensor("op_489_strides_0"), val = tensor([1, 1])]; + tensor var_489_pad_0 = const()[name = tensor("op_489_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_489_dilations_0 = const()[name = tensor("op_489_dilations_0"), val = tensor([1, 1])]; + tensor var_489_groups_0 = const()[name = tensor("op_489_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87319424))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87614400))), name = tensor("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87614528)))]; + tensor var_489_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_489_dilations_0, groups = var_489_groups_0, pad = var_489_pad_0, pad_type = var_489_pad_type_0, strides = var_489_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_15_cast_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_495_pad_type_0 = const()[name = tensor("op_495_pad_type_0"), val = tensor("valid")]; + tensor var_495_strides_0 = const()[name = tensor("op_495_strides_0"), val = tensor([1, 1])]; + tensor var_495_pad_0 = const()[name = tensor("op_495_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_495_dilations_0 = const()[name = tensor("op_495_dilations_0"), val = tensor([1, 1])]; + tensor var_495_groups_0 = const()[name = tensor("op_495_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87639744))), name = tensor("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87616128))), shape = tensor([768, 768, 1, 1])]; + tensor var_495_cast_fp16 = conv(dilations = var_495_dilations_0, groups = var_495_groups_0, pad = var_495_pad_0, pad_type = var_495_pad_type_0, strides = var_495_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_15_cast_fp16)[name = tensor("op_495_cast_fp16")]; + tensor current_value_3_cast_fp16 = add(x = var_489_cast_fp16, y = var_495_cast_fp16)[name = tensor("current_value_3_cast_fp16")]; + tensor var_502_cast_fp16 = mul(x = var_69_cast_fp16_1, y = var_192_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor var_503_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_503_cast_fp16")]; + tensor key_5_cast_fp16 = add(x = var_502_cast_fp16, y = var_503_cast_fp16)[name = tensor("key_5_cast_fp16")]; + tensor var_506_cast_fp16 = mul(x = var_84_cast_fp16_1, y = var_192_cast_fp16)[name = tensor("op_506_cast_fp16")]; + tensor var_507_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_507_cast_fp16")]; + tensor value_5_cast_fp16 = add(x = var_506_cast_fp16, y = var_507_cast_fp16)[name = tensor("value_5_cast_fp16")]; + tensor var_511 = const()[name = tensor("op_511"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_5_cast_fp16 = reshape(shape = var_511, x = query_5_cast_fp16)[name = tensor("mh_q_5_cast_fp16")]; + tensor var_513_to_fp16 = const()[name = tensor("op_513_to_fp16"), val = tensor(0x1p-3)]; + tensor var_514_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_513_to_fp16)[name = tensor("op_514_cast_fp16")]; + tensor var_517 = const()[name = tensor("op_517"), val = tensor([1, 12, 64, 448])]; + tensor var_518_cast_fp16 = reshape(shape = var_517, x = key_5_cast_fp16)[name = tensor("op_518_cast_fp16")]; + tensor mh_w_7_transpose_x_0 = const()[name = tensor("mh_w_7_transpose_x_0"), val = tensor(true)]; + tensor mh_w_7_transpose_y_0 = const()[name = tensor("mh_w_7_transpose_y_0"), val = tensor(false)]; + tensor mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_514_cast_fp16, y = var_518_cast_fp16)[name = tensor("mh_w_7_cast_fp16")]; + tensor mh_w_9_cast_fp16 = add(x = mh_w_7_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_9_cast_fp16")]; + tensor var_526_cast_fp16 = softmax(axis = var_411, x = mh_w_9_cast_fp16)[name = tensor("op_526_cast_fp16")]; + tensor var_527 = const()[name = tensor("op_527"), val = tensor([1, 12, 64, 448])]; + tensor var_528_cast_fp16 = reshape(shape = var_527, x = value_5_cast_fp16)[name = tensor("op_528_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_528_cast_fp16, y = var_526_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_531 = const()[name = tensor("op_531"), val = tensor([1, 768, 1, 1])]; + tensor input_11_cast_fp16 = reshape(shape = var_531, x = attn_5_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_541_pad_type_0 = const()[name = tensor("op_541_pad_type_0"), val = tensor("valid")]; + tensor var_541_strides_0 = const()[name = tensor("op_541_strides_0"), val = tensor([1, 1])]; + tensor var_541_pad_0 = const()[name = tensor("op_541_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_541_dilations_0 = const()[name = tensor("op_541_dilations_0"), val = tensor([1, 1])]; + tensor var_541_groups_0 = const()[name = tensor("op_541_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87713536))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88008512))), name = tensor("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88008640)))]; + tensor var_541_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_541_dilations_0, groups = var_541_groups_0, pad = var_541_pad_0, pad_type = var_541_pad_type_0, strides = var_541_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_547_pad_type_0 = const()[name = tensor("op_547_pad_type_0"), val = tensor("valid")]; + tensor var_547_strides_0 = const()[name = tensor("op_547_strides_0"), val = tensor([1, 1])]; + tensor var_547_pad_0 = const()[name = tensor("op_547_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_547_dilations_0 = const()[name = tensor("op_547_dilations_0"), val = tensor([1, 1])]; + tensor var_547_groups_0 = const()[name = tensor("op_547_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88032192))), name = tensor("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88010240))), shape = tensor([768, 768, 1, 1])]; + tensor var_547_cast_fp16 = conv(dilations = var_547_dilations_0, groups = var_547_groups_0, pad = var_547_pad_0, pad_type = var_547_pad_type_0, strides = var_547_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = tensor("op_547_cast_fp16")]; + tensor obj_21_cast_fp16 = add(x = var_541_cast_fp16, y = var_547_cast_fp16)[name = tensor("obj_21_cast_fp16")]; + tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_21_cast_fp16)[name = tensor("inputs_9_cast_fp16")]; + tensor out_9_axes_0 = const()[name = tensor("out_9_axes_0"), val = tensor([1])]; + tensor var_562_to_fp16 = const()[name = tensor("op_562_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_562_to_fp16, x = inputs_9_cast_fp16)[name = tensor("out_9_cast_fp16")]; + tensor obj_23_gamma_0_to_fp16 = const()[name = tensor("obj_23_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88105984)))]; + tensor obj_23_beta_0_to_fp16 = const()[name = tensor("obj_23_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88107584)))]; + tensor obj_23_epsilon_0_to_fp16 = const()[name = tensor("obj_23_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_23_cast_fp16 = batch_norm(beta = obj_23_beta_0_to_fp16, epsilon = obj_23_epsilon_0_to_fp16, gamma = obj_23_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor("obj_23_cast_fp16")]; + tensor var_584_pad_type_0 = const()[name = tensor("op_584_pad_type_0"), val = tensor("valid")]; + tensor var_584_strides_0 = const()[name = tensor("op_584_strides_0"), val = tensor([1, 1])]; + tensor var_584_pad_0 = const()[name = tensor("op_584_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_584_dilations_0 = const()[name = tensor("op_584_dilations_0"), val = tensor([1, 1])]; + tensor var_584_groups_0 = const()[name = tensor("op_584_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88109184))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88404160))), name = tensor("layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88404288)))]; + tensor var_584_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_584_dilations_0, groups = var_584_groups_0, pad = var_584_pad_0, pad_type = var_584_pad_type_0, strides = var_584_strides_0, weight = layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_23_cast_fp16)[name = tensor("op_584_cast_fp16")]; + tensor var_590_pad_type_0 = const()[name = tensor("op_590_pad_type_0"), val = tensor("valid")]; + tensor var_590_strides_0 = const()[name = tensor("op_590_strides_0"), val = tensor([1, 1])]; + tensor var_590_pad_0 = const()[name = tensor("op_590_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_590_dilations_0 = const()[name = tensor("op_590_dilations_0"), val = tensor([1, 1])]; + tensor var_590_groups_0 = const()[name = tensor("op_590_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88417216))), name = tensor("layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88405888))), shape = tensor([768, 768, 1, 1])]; + tensor var_590_cast_fp16 = conv(dilations = var_590_dilations_0, groups = var_590_groups_0, pad = var_590_pad_0, pad_type = var_590_pad_type_0, strides = var_590_strides_0, weight = layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_23_cast_fp16)[name = tensor("op_590_cast_fp16")]; + tensor query_7_cast_fp16 = add(x = var_584_cast_fp16, y = var_590_cast_fp16)[name = tensor("query_7_cast_fp16")]; + tensor var_599_pad_type_0 = const()[name = tensor("op_599_pad_type_0"), val = tensor("valid")]; + tensor var_599_strides_0 = const()[name = tensor("op_599_strides_0"), val = tensor([1, 1])]; + tensor var_599_pad_0 = const()[name = tensor("op_599_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_599_dilations_0 = const()[name = tensor("op_599_dilations_0"), val = tensor([1, 1])]; + tensor var_599_groups_0 = const()[name = tensor("op_599_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88491008))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88785984))), name = tensor("layers_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_599_cast_fp16 = conv(dilations = var_599_dilations_0, groups = var_599_groups_0, pad = var_599_pad_0, pad_type = var_599_pad_type_0, strides = var_599_strides_0, weight = layers_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_599_cast_fp16")]; + tensor var_605_pad_type_0 = const()[name = tensor("op_605_pad_type_0"), val = tensor("valid")]; + tensor var_605_strides_0 = const()[name = tensor("op_605_strides_0"), val = tensor([1, 1])]; + tensor var_605_pad_0 = const()[name = tensor("op_605_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_605_dilations_0 = const()[name = tensor("op_605_dilations_0"), val = tensor([1, 1])]; + tensor var_605_groups_0 = const()[name = tensor("op_605_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88795584))), name = tensor("layers_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88786112))), shape = tensor([768, 768, 1, 1])]; + tensor var_605_cast_fp16 = conv(dilations = var_605_dilations_0, groups = var_605_groups_0, pad = var_605_pad_0, pad_type = var_605_pad_type_0, strides = var_605_strides_0, weight = layers_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_605_cast_fp16")]; + tensor key_7_cast_fp16 = add(x = var_599_cast_fp16, y = var_605_cast_fp16)[name = tensor("key_7_cast_fp16")]; + tensor var_615_pad_type_0 = const()[name = tensor("op_615_pad_type_0"), val = tensor("valid")]; + tensor var_615_strides_0 = const()[name = tensor("op_615_strides_0"), val = tensor([1, 1])]; + tensor var_615_pad_0 = const()[name = tensor("op_615_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_615_dilations_0 = const()[name = tensor("op_615_dilations_0"), val = tensor([1, 1])]; + tensor var_615_groups_0 = const()[name = tensor("op_615_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88869376))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89164352))), name = tensor("layers_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89164480)))]; + tensor var_615_cast_fp16 = conv(bias = layers_1_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_615_dilations_0, groups = var_615_groups_0, pad = var_615_pad_0, pad_type = var_615_pad_type_0, strides = var_615_strides_0, weight = layers_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_615_cast_fp16")]; + tensor var_621_pad_type_0 = const()[name = tensor("op_621_pad_type_0"), val = tensor("valid")]; + tensor var_621_strides_0 = const()[name = tensor("op_621_strides_0"), val = tensor([1, 1])]; + tensor var_621_pad_0 = const()[name = tensor("op_621_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_621_dilations_0 = const()[name = tensor("op_621_dilations_0"), val = tensor([1, 1])]; + tensor var_621_groups_0 = const()[name = tensor("op_621_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89171904))), name = tensor("layers_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89166080))), shape = tensor([768, 768, 1, 1])]; + tensor var_621_cast_fp16 = conv(dilations = var_621_dilations_0, groups = var_621_groups_0, pad = var_621_pad_0, pad_type = var_621_pad_type_0, strides = var_621_strides_0, weight = layers_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_621_cast_fp16")]; + tensor value_7_cast_fp16 = add(x = var_615_cast_fp16, y = var_621_cast_fp16)[name = tensor("value_7_cast_fp16")]; + tensor var_625 = const()[name = tensor("op_625"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_7_cast_fp16 = reshape(shape = var_625, x = query_7_cast_fp16)[name = tensor("mh_q_7_cast_fp16")]; + tensor var_627_to_fp16 = const()[name = tensor("op_627_to_fp16"), val = tensor(0x1p-3)]; + tensor var_628_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_627_to_fp16)[name = tensor("op_628_cast_fp16")]; + tensor var_631 = const()[name = tensor("op_631"), val = tensor([1, 12, 64, 1500])]; + tensor var_632_cast_fp16 = reshape(shape = var_631, x = key_7_cast_fp16)[name = tensor("op_632_cast_fp16")]; + tensor mh_w_11_transpose_x_0 = const()[name = tensor("mh_w_11_transpose_x_0"), val = tensor(true)]; + tensor mh_w_11_transpose_y_0 = const()[name = tensor("mh_w_11_transpose_y_0"), val = tensor(false)]; + tensor mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_628_cast_fp16, y = var_632_cast_fp16)[name = tensor("mh_w_11_cast_fp16")]; + tensor obj_27_cast_fp16 = softmax(axis = var_411, x = mh_w_11_cast_fp16)[name = tensor("obj_27_cast_fp16")]; + tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 12, 64, 1500])]; + tensor var_637_cast_fp16 = reshape(shape = var_636, x = value_7_cast_fp16)[name = tensor("op_637_cast_fp16")]; + tensor attn_7_transpose_x_0 = const()[name = tensor("attn_7_transpose_x_0"), val = tensor(false)]; + tensor attn_7_transpose_y_0 = const()[name = tensor("attn_7_transpose_y_0"), val = tensor(true)]; + tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_637_cast_fp16, y = obj_27_cast_fp16)[name = tensor("attn_7_cast_fp16")]; + tensor var_640 = const()[name = tensor("op_640"), val = tensor([1, 768, 1, 1])]; + tensor input_13_cast_fp16 = reshape(shape = var_640, x = attn_7_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_650_pad_type_0 = const()[name = tensor("op_650_pad_type_0"), val = tensor("valid")]; + tensor var_650_strides_0 = const()[name = tensor("op_650_strides_0"), val = tensor([1, 1])]; + tensor var_650_pad_0 = const()[name = tensor("op_650_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_650_dilations_0 = const()[name = tensor("op_650_dilations_0"), val = tensor([1, 1])]; + tensor var_650_groups_0 = const()[name = tensor("op_650_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89245696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89540672))), name = tensor("layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89540800)))]; + tensor var_650_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_650_dilations_0, groups = var_650_groups_0, pad = var_650_pad_0, pad_type = var_650_pad_type_0, strides = var_650_strides_0, weight = layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = tensor("op_650_cast_fp16")]; + tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("valid")]; + tensor var_656_strides_0 = const()[name = tensor("op_656_strides_0"), val = tensor([1, 1])]; + tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_656_dilations_0 = const()[name = tensor("op_656_dilations_0"), val = tensor([1, 1])]; + tensor var_656_groups_0 = const()[name = tensor("op_656_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89548800))), name = tensor("layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89542400))), shape = tensor([768, 768, 1, 1])]; + tensor var_656_cast_fp16 = conv(dilations = var_656_dilations_0, groups = var_656_groups_0, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_656_strides_0, weight = layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_13_cast_fp16)[name = tensor("op_656_cast_fp16")]; + tensor obj_25_cast_fp16 = add(x = var_650_cast_fp16, y = var_656_cast_fp16)[name = tensor("obj_25_cast_fp16")]; + tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_25_cast_fp16)[name = tensor("inputs_11_cast_fp16")]; + tensor out_11_axes_0 = const()[name = tensor("out_11_axes_0"), val = tensor([1])]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_667_to_fp16, x = inputs_11_cast_fp16)[name = tensor("out_11_cast_fp16")]; + tensor input_15_gamma_0_to_fp16 = const()[name = tensor("input_15_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89622592)))]; + tensor input_15_beta_0_to_fp16 = const()[name = tensor("input_15_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89624192)))]; + tensor input_15_epsilon_0_to_fp16 = const()[name = tensor("input_15_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_685_pad_type_0 = const()[name = tensor("op_685_pad_type_0"), val = tensor("valid")]; + tensor var_685_strides_0 = const()[name = tensor("op_685_strides_0"), val = tensor([1, 1])]; + tensor var_685_pad_0 = const()[name = tensor("op_685_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_685_dilations_0 = const()[name = tensor("op_685_dilations_0"), val = tensor([1, 1])]; + tensor var_685_groups_0 = const()[name = tensor("op_685_groups_0"), val = tensor(1)]; + tensor layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89625792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90805504))), name = tensor("layers_1_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90805632)))]; + tensor var_685_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_685_dilations_0, groups = var_685_groups_0, pad = var_685_pad_0, pad_type = var_685_pad_type_0, strides = var_685_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = tensor("op_685_cast_fp16")]; + tensor var_691_pad_type_0 = const()[name = tensor("op_691_pad_type_0"), val = tensor("valid")]; + tensor var_691_strides_0 = const()[name = tensor("op_691_strides_0"), val = tensor([1, 1])]; + tensor var_691_pad_0 = const()[name = tensor("op_691_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_691_dilations_0 = const()[name = tensor("op_691_dilations_0"), val = tensor([1, 1])]; + tensor var_691_groups_0 = const()[name = tensor("op_691_groups_0"), val = tensor(1)]; + tensor layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90901312))), name = tensor("layers_1_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90811840))), shape = tensor([3072, 768, 1, 1])]; + tensor var_691_cast_fp16 = conv(dilations = var_691_dilations_0, groups = var_691_groups_0, pad = var_691_pad_0, pad_type = var_691_pad_type_0, strides = var_691_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = tensor("op_691_cast_fp16")]; + tensor input_17_cast_fp16 = add(x = var_685_cast_fp16, y = var_691_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor input_19_mode_0 = const()[name = tensor("input_19_mode_0"), val = tensor("EXACT")]; + tensor input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_702_pad_type_0 = const()[name = tensor("op_702_pad_type_0"), val = tensor("valid")]; + tensor var_702_strides_0 = const()[name = tensor("op_702_strides_0"), val = tensor([1, 1])]; + tensor var_702_pad_0 = const()[name = tensor("op_702_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_702_dilations_0 = const()[name = tensor("op_702_dilations_0"), val = tensor([1, 1])]; + tensor var_702_groups_0 = const()[name = tensor("op_702_groups_0"), val = tensor(1)]; + tensor layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91196288))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92376000))), name = tensor("layers_1_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92376128)))]; + tensor var_702_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_702_dilations_0, groups = var_702_groups_0, pad = var_702_pad_0, pad_type = var_702_pad_type_0, strides = var_702_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = tensor("op_702_cast_fp16")]; + tensor var_708_pad_type_0 = const()[name = tensor("op_708_pad_type_0"), val = tensor("valid")]; + tensor var_708_strides_0 = const()[name = tensor("op_708_strides_0"), val = tensor([1, 1])]; + tensor var_708_pad_0 = const()[name = tensor("op_708_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_708_dilations_0 = const()[name = tensor("op_708_dilations_0"), val = tensor([1, 1])]; + tensor var_708_groups_0 = const()[name = tensor("op_708_groups_0"), val = tensor(1)]; + tensor layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92452480))), name = tensor("layers_1_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92377728))), shape = tensor([768, 3072, 1, 1])]; + tensor var_708_cast_fp16 = conv(dilations = var_708_dilations_0, groups = var_708_groups_0, pad = var_708_pad_0, pad_type = var_708_pad_type_0, strides = var_708_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = tensor("op_708_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = var_702_cast_fp16, y = var_708_cast_fp16)[name = tensor("hidden_states_5_cast_fp16")]; + tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor("inputs_13_cast_fp16")]; + tensor var_720 = const()[name = tensor("op_720"), val = tensor(3)]; + tensor out_13_axes_0 = const()[name = tensor("out_13_axes_0"), val = tensor([1])]; + tensor var_745_to_fp16 = const()[name = tensor("op_745_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_745_to_fp16, x = inputs_13_cast_fp16)[name = tensor("out_13_cast_fp16")]; + tensor obj_29_gamma_0_to_fp16 = const()[name = tensor("obj_29_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92747456)))]; + tensor obj_29_beta_0_to_fp16 = const()[name = tensor("obj_29_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92749056)))]; + tensor obj_29_epsilon_0_to_fp16 = const()[name = tensor("obj_29_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor("obj_29_cast_fp16")]; + tensor var_767_pad_type_0 = const()[name = tensor("op_767_pad_type_0"), val = tensor("valid")]; + tensor var_767_strides_0 = const()[name = tensor("op_767_strides_0"), val = tensor([1, 1])]; + tensor var_767_pad_0 = const()[name = tensor("op_767_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_767_dilations_0 = const()[name = tensor("op_767_dilations_0"), val = tensor([1, 1])]; + tensor var_767_groups_0 = const()[name = tensor("op_767_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92750656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93045632))), name = tensor("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93045760)))]; + tensor var_767_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_767_dilations_0, groups = var_767_groups_0, pad = var_767_pad_0, pad_type = var_767_pad_type_0, strides = var_767_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_767_cast_fp16")]; + tensor var_773_pad_type_0 = const()[name = tensor("op_773_pad_type_0"), val = tensor("valid")]; + tensor var_773_strides_0 = const()[name = tensor("op_773_strides_0"), val = tensor([1, 1])]; + tensor var_773_pad_0 = const()[name = tensor("op_773_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_773_dilations_0 = const()[name = tensor("op_773_dilations_0"), val = tensor([1, 1])]; + tensor var_773_groups_0 = const()[name = tensor("op_773_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93077888))), name = tensor("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93047360))), shape = tensor([768, 768, 1, 1])]; + tensor var_773_cast_fp16 = conv(dilations = var_773_dilations_0, groups = var_773_groups_0, pad = var_773_pad_0, pad_type = var_773_pad_type_0, strides = var_773_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor query_9_cast_fp16 = add(x = var_767_cast_fp16, y = var_773_cast_fp16)[name = tensor("query_9_cast_fp16")]; + tensor var_782_pad_type_0 = const()[name = tensor("op_782_pad_type_0"), val = tensor("valid")]; + tensor var_782_strides_0 = const()[name = tensor("op_782_strides_0"), val = tensor([1, 1])]; + tensor var_782_pad_0 = const()[name = tensor("op_782_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_782_dilations_0 = const()[name = tensor("op_782_dilations_0"), val = tensor([1, 1])]; + tensor var_782_groups_0 = const()[name = tensor("op_782_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93151680))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93446656))), name = tensor("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_782_cast_fp16 = conv(dilations = var_782_dilations_0, groups = var_782_groups_0, pad = var_782_pad_0, pad_type = var_782_pad_type_0, strides = var_782_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_782_cast_fp16")]; + tensor var_788_pad_type_0 = const()[name = tensor("op_788_pad_type_0"), val = tensor("valid")]; + tensor var_788_strides_0 = const()[name = tensor("op_788_strides_0"), val = tensor([1, 1])]; + tensor var_788_pad_0 = const()[name = tensor("op_788_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_788_dilations_0 = const()[name = tensor("op_788_dilations_0"), val = tensor([1, 1])]; + tensor var_788_groups_0 = const()[name = tensor("op_788_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93478336))), name = tensor("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93446784))), shape = tensor([768, 768, 1, 1])]; + tensor var_788_cast_fp16 = conv(dilations = var_788_dilations_0, groups = var_788_groups_0, pad = var_788_pad_0, pad_type = var_788_pad_type_0, strides = var_788_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_788_cast_fp16")]; + tensor current_key_5_cast_fp16 = add(x = var_782_cast_fp16, y = var_788_cast_fp16)[name = tensor("current_key_5_cast_fp16")]; + tensor var_798_pad_type_0 = const()[name = tensor("op_798_pad_type_0"), val = tensor("valid")]; + tensor var_798_strides_0 = const()[name = tensor("op_798_strides_0"), val = tensor([1, 1])]; + tensor var_798_pad_0 = const()[name = tensor("op_798_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_798_dilations_0 = const()[name = tensor("op_798_dilations_0"), val = tensor([1, 1])]; + tensor var_798_groups_0 = const()[name = tensor("op_798_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93552128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93847104))), name = tensor("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93847232)))]; + tensor var_798_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_798_dilations_0, groups = var_798_groups_0, pad = var_798_pad_0, pad_type = var_798_pad_type_0, strides = var_798_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_798_cast_fp16")]; + tensor var_804_pad_type_0 = const()[name = tensor("op_804_pad_type_0"), val = tensor("valid")]; + tensor var_804_strides_0 = const()[name = tensor("op_804_strides_0"), val = tensor([1, 1])]; + tensor var_804_pad_0 = const()[name = tensor("op_804_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_804_dilations_0 = const()[name = tensor("op_804_dilations_0"), val = tensor([1, 1])]; + tensor var_804_groups_0 = const()[name = tensor("op_804_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93881856))), name = tensor("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93848832))), shape = tensor([768, 768, 1, 1])]; + tensor var_804_cast_fp16 = conv(dilations = var_804_dilations_0, groups = var_804_groups_0, pad = var_804_pad_0, pad_type = var_804_pad_type_0, strides = var_804_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_804_cast_fp16")]; + tensor current_value_5_cast_fp16 = add(x = var_798_cast_fp16, y = var_804_cast_fp16)[name = tensor("current_value_5_cast_fp16")]; + tensor var_811_cast_fp16 = mul(x = var_69_cast_fp16_2, y = var_192_cast_fp16)[name = tensor("op_811_cast_fp16")]; + tensor var_812_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_812_cast_fp16")]; + tensor key_9_cast_fp16 = add(x = var_811_cast_fp16, y = var_812_cast_fp16)[name = tensor("key_9_cast_fp16")]; + tensor var_815_cast_fp16 = mul(x = var_84_cast_fp16_2, y = var_192_cast_fp16)[name = tensor("op_815_cast_fp16")]; + tensor var_816_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_816_cast_fp16")]; + tensor value_9_cast_fp16 = add(x = var_815_cast_fp16, y = var_816_cast_fp16)[name = tensor("value_9_cast_fp16")]; + tensor var_820 = const()[name = tensor("op_820"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_9_cast_fp16 = reshape(shape = var_820, x = query_9_cast_fp16)[name = tensor("mh_q_9_cast_fp16")]; + tensor var_822_to_fp16 = const()[name = tensor("op_822_to_fp16"), val = tensor(0x1p-3)]; + tensor var_823_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_822_to_fp16)[name = tensor("op_823_cast_fp16")]; + tensor var_826 = const()[name = tensor("op_826"), val = tensor([1, 12, 64, 448])]; + tensor var_827_cast_fp16 = reshape(shape = var_826, x = key_9_cast_fp16)[name = tensor("op_827_cast_fp16")]; + tensor mh_w_13_transpose_x_0 = const()[name = tensor("mh_w_13_transpose_x_0"), val = tensor(true)]; + tensor mh_w_13_transpose_y_0 = const()[name = tensor("mh_w_13_transpose_y_0"), val = tensor(false)]; + tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_823_cast_fp16, y = var_827_cast_fp16)[name = tensor("mh_w_13_cast_fp16")]; + tensor mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_15_cast_fp16")]; + tensor var_835_cast_fp16 = softmax(axis = var_720, x = mh_w_15_cast_fp16)[name = tensor("op_835_cast_fp16")]; + tensor var_836 = const()[name = tensor("op_836"), val = tensor([1, 12, 64, 448])]; + tensor var_837_cast_fp16 = reshape(shape = var_836, x = value_9_cast_fp16)[name = tensor("op_837_cast_fp16")]; + tensor attn_9_transpose_x_0 = const()[name = tensor("attn_9_transpose_x_0"), val = tensor(false)]; + tensor attn_9_transpose_y_0 = const()[name = tensor("attn_9_transpose_y_0"), val = tensor(true)]; + tensor attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_837_cast_fp16, y = var_835_cast_fp16)[name = tensor("attn_9_cast_fp16")]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 768, 1, 1])]; + tensor input_21_cast_fp16 = reshape(shape = var_840, x = attn_9_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_850_pad_type_0 = const()[name = tensor("op_850_pad_type_0"), val = tensor("valid")]; + tensor var_850_strides_0 = const()[name = tensor("op_850_strides_0"), val = tensor([1, 1])]; + tensor var_850_pad_0 = const()[name = tensor("op_850_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_850_dilations_0 = const()[name = tensor("op_850_dilations_0"), val = tensor([1, 1])]; + tensor var_850_groups_0 = const()[name = tensor("op_850_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93955648))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94250624))), name = tensor("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94250752)))]; + tensor var_850_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_850_dilations_0, groups = var_850_groups_0, pad = var_850_pad_0, pad_type = var_850_pad_type_0, strides = var_850_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = tensor("op_850_cast_fp16")]; + tensor var_856_pad_type_0 = const()[name = tensor("op_856_pad_type_0"), val = tensor("valid")]; + tensor var_856_strides_0 = const()[name = tensor("op_856_strides_0"), val = tensor([1, 1])]; + tensor var_856_pad_0 = const()[name = tensor("op_856_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_856_dilations_0 = const()[name = tensor("op_856_dilations_0"), val = tensor([1, 1])]; + tensor var_856_groups_0 = const()[name = tensor("op_856_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94278272))), name = tensor("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94252352))), shape = tensor([768, 768, 1, 1])]; + tensor var_856_cast_fp16 = conv(dilations = var_856_dilations_0, groups = var_856_groups_0, pad = var_856_pad_0, pad_type = var_856_pad_type_0, strides = var_856_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_21_cast_fp16)[name = tensor("op_856_cast_fp16")]; + tensor obj_35_cast_fp16 = add(x = var_850_cast_fp16, y = var_856_cast_fp16)[name = tensor("obj_35_cast_fp16")]; + tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_35_cast_fp16)[name = tensor("inputs_15_cast_fp16")]; + tensor out_15_axes_0 = const()[name = tensor("out_15_axes_0"), val = tensor([1])]; + tensor var_871_to_fp16 = const()[name = tensor("op_871_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_871_to_fp16, x = inputs_15_cast_fp16)[name = tensor("out_15_cast_fp16")]; + tensor obj_37_gamma_0_to_fp16 = const()[name = tensor("obj_37_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94352064)))]; + tensor obj_37_beta_0_to_fp16 = const()[name = tensor("obj_37_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94353664)))]; + tensor obj_37_epsilon_0_to_fp16 = const()[name = tensor("obj_37_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor("obj_37_cast_fp16")]; + tensor var_893_pad_type_0 = const()[name = tensor("op_893_pad_type_0"), val = tensor("valid")]; + tensor var_893_strides_0 = const()[name = tensor("op_893_strides_0"), val = tensor([1, 1])]; + tensor var_893_pad_0 = const()[name = tensor("op_893_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_893_dilations_0 = const()[name = tensor("op_893_dilations_0"), val = tensor([1, 1])]; + tensor var_893_groups_0 = const()[name = tensor("op_893_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94355264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94650240))), name = tensor("layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94650368)))]; + tensor var_893_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_893_dilations_0, groups = var_893_groups_0, pad = var_893_pad_0, pad_type = var_893_pad_type_0, strides = var_893_strides_0, weight = layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_893_cast_fp16")]; + tensor var_899_pad_type_0 = const()[name = tensor("op_899_pad_type_0"), val = tensor("valid")]; + tensor var_899_strides_0 = const()[name = tensor("op_899_strides_0"), val = tensor([1, 1])]; + tensor var_899_pad_0 = const()[name = tensor("op_899_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_899_dilations_0 = const()[name = tensor("op_899_dilations_0"), val = tensor([1, 1])]; + tensor var_899_groups_0 = const()[name = tensor("op_899_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94662720))), name = tensor("layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94651968))), shape = tensor([768, 768, 1, 1])]; + tensor var_899_cast_fp16 = conv(dilations = var_899_dilations_0, groups = var_899_groups_0, pad = var_899_pad_0, pad_type = var_899_pad_type_0, strides = var_899_strides_0, weight = layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_899_cast_fp16")]; + tensor query_11_cast_fp16 = add(x = var_893_cast_fp16, y = var_899_cast_fp16)[name = tensor("query_11_cast_fp16")]; + tensor var_908_pad_type_0 = const()[name = tensor("op_908_pad_type_0"), val = tensor("valid")]; + tensor var_908_strides_0 = const()[name = tensor("op_908_strides_0"), val = tensor([1, 1])]; + tensor var_908_pad_0 = const()[name = tensor("op_908_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_908_dilations_0 = const()[name = tensor("op_908_dilations_0"), val = tensor([1, 1])]; + tensor var_908_groups_0 = const()[name = tensor("op_908_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94736512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95031488))), name = tensor("layers_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_908_cast_fp16 = conv(dilations = var_908_dilations_0, groups = var_908_groups_0, pad = var_908_pad_0, pad_type = var_908_pad_type_0, strides = var_908_strides_0, weight = layers_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_908_cast_fp16")]; + tensor var_914_pad_type_0 = const()[name = tensor("op_914_pad_type_0"), val = tensor("valid")]; + tensor var_914_strides_0 = const()[name = tensor("op_914_strides_0"), val = tensor([1, 1])]; + tensor var_914_pad_0 = const()[name = tensor("op_914_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_914_dilations_0 = const()[name = tensor("op_914_dilations_0"), val = tensor([1, 1])]; + tensor var_914_groups_0 = const()[name = tensor("op_914_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95040832))), name = tensor("layers_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95031616))), shape = tensor([768, 768, 1, 1])]; + tensor var_914_cast_fp16 = conv(dilations = var_914_dilations_0, groups = var_914_groups_0, pad = var_914_pad_0, pad_type = var_914_pad_type_0, strides = var_914_strides_0, weight = layers_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_914_cast_fp16")]; + tensor key_11_cast_fp16 = add(x = var_908_cast_fp16, y = var_914_cast_fp16)[name = tensor("key_11_cast_fp16")]; + tensor var_924_pad_type_0 = const()[name = tensor("op_924_pad_type_0"), val = tensor("valid")]; + tensor var_924_strides_0 = const()[name = tensor("op_924_strides_0"), val = tensor([1, 1])]; + tensor var_924_pad_0 = const()[name = tensor("op_924_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_924_dilations_0 = const()[name = tensor("op_924_dilations_0"), val = tensor([1, 1])]; + tensor var_924_groups_0 = const()[name = tensor("op_924_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95114624))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95409600))), name = tensor("layers_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95409728)))]; + tensor var_924_cast_fp16 = conv(bias = layers_2_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_924_dilations_0, groups = var_924_groups_0, pad = var_924_pad_0, pad_type = var_924_pad_type_0, strides = var_924_strides_0, weight = layers_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_924_cast_fp16")]; + tensor var_930_pad_type_0 = const()[name = tensor("op_930_pad_type_0"), val = tensor("valid")]; + tensor var_930_strides_0 = const()[name = tensor("op_930_strides_0"), val = tensor([1, 1])]; + tensor var_930_pad_0 = const()[name = tensor("op_930_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_930_dilations_0 = const()[name = tensor("op_930_dilations_0"), val = tensor([1, 1])]; + tensor var_930_groups_0 = const()[name = tensor("op_930_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95420224))), name = tensor("layers_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95411328))), shape = tensor([768, 768, 1, 1])]; + tensor var_930_cast_fp16 = conv(dilations = var_930_dilations_0, groups = var_930_groups_0, pad = var_930_pad_0, pad_type = var_930_pad_type_0, strides = var_930_strides_0, weight = layers_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_930_cast_fp16")]; + tensor value_11_cast_fp16 = add(x = var_924_cast_fp16, y = var_930_cast_fp16)[name = tensor("value_11_cast_fp16")]; + tensor var_934 = const()[name = tensor("op_934"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_11_cast_fp16 = reshape(shape = var_934, x = query_11_cast_fp16)[name = tensor("mh_q_11_cast_fp16")]; + tensor var_936_to_fp16 = const()[name = tensor("op_936_to_fp16"), val = tensor(0x1p-3)]; + tensor var_937_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_936_to_fp16)[name = tensor("op_937_cast_fp16")]; + tensor var_940 = const()[name = tensor("op_940"), val = tensor([1, 12, 64, 1500])]; + tensor var_941_cast_fp16 = reshape(shape = var_940, x = key_11_cast_fp16)[name = tensor("op_941_cast_fp16")]; + tensor mh_w_17_transpose_x_0 = const()[name = tensor("mh_w_17_transpose_x_0"), val = tensor(true)]; + tensor mh_w_17_transpose_y_0 = const()[name = tensor("mh_w_17_transpose_y_0"), val = tensor(false)]; + tensor mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_937_cast_fp16, y = var_941_cast_fp16)[name = tensor("mh_w_17_cast_fp16")]; + tensor obj_41_cast_fp16 = softmax(axis = var_720, x = mh_w_17_cast_fp16)[name = tensor("obj_41_cast_fp16")]; + tensor var_945 = const()[name = tensor("op_945"), val = tensor([1, 12, 64, 1500])]; + tensor var_946_cast_fp16 = reshape(shape = var_945, x = value_11_cast_fp16)[name = tensor("op_946_cast_fp16")]; + tensor attn_11_transpose_x_0 = const()[name = tensor("attn_11_transpose_x_0"), val = tensor(false)]; + tensor attn_11_transpose_y_0 = const()[name = tensor("attn_11_transpose_y_0"), val = tensor(true)]; + tensor attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_946_cast_fp16, y = obj_41_cast_fp16)[name = tensor("attn_11_cast_fp16")]; + tensor var_949 = const()[name = tensor("op_949"), val = tensor([1, 768, 1, 1])]; + tensor input_23_cast_fp16 = reshape(shape = var_949, x = attn_11_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_959_pad_type_0 = const()[name = tensor("op_959_pad_type_0"), val = tensor("valid")]; + tensor var_959_strides_0 = const()[name = tensor("op_959_strides_0"), val = tensor([1, 1])]; + tensor var_959_pad_0 = const()[name = tensor("op_959_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_959_dilations_0 = const()[name = tensor("op_959_dilations_0"), val = tensor([1, 1])]; + tensor var_959_groups_0 = const()[name = tensor("op_959_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95494016))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95788992))), name = tensor("layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95789120)))]; + tensor var_959_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_959_dilations_0, groups = var_959_groups_0, pad = var_959_pad_0, pad_type = var_959_pad_type_0, strides = var_959_strides_0, weight = layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = tensor("op_959_cast_fp16")]; + tensor var_965_pad_type_0 = const()[name = tensor("op_965_pad_type_0"), val = tensor("valid")]; + tensor var_965_strides_0 = const()[name = tensor("op_965_strides_0"), val = tensor([1, 1])]; + tensor var_965_pad_0 = const()[name = tensor("op_965_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_965_dilations_0 = const()[name = tensor("op_965_dilations_0"), val = tensor([1, 1])]; + tensor var_965_groups_0 = const()[name = tensor("op_965_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95800192))), name = tensor("layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95790720))), shape = tensor([768, 768, 1, 1])]; + tensor var_965_cast_fp16 = conv(dilations = var_965_dilations_0, groups = var_965_groups_0, pad = var_965_pad_0, pad_type = var_965_pad_type_0, strides = var_965_strides_0, weight = layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = tensor("op_965_cast_fp16")]; + tensor obj_39_cast_fp16 = add(x = var_959_cast_fp16, y = var_965_cast_fp16)[name = tensor("obj_39_cast_fp16")]; + tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_39_cast_fp16)[name = tensor("inputs_17_cast_fp16")]; + tensor out_17_axes_0 = const()[name = tensor("out_17_axes_0"), val = tensor([1])]; + tensor var_976_to_fp16 = const()[name = tensor("op_976_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_976_to_fp16, x = inputs_17_cast_fp16)[name = tensor("out_17_cast_fp16")]; + tensor input_25_gamma_0_to_fp16 = const()[name = tensor("input_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95873984)))]; + tensor input_25_beta_0_to_fp16 = const()[name = tensor("input_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95875584)))]; + tensor input_25_epsilon_0_to_fp16 = const()[name = tensor("input_25_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_994_pad_type_0 = const()[name = tensor("op_994_pad_type_0"), val = tensor("valid")]; + tensor var_994_strides_0 = const()[name = tensor("op_994_strides_0"), val = tensor([1, 1])]; + tensor var_994_pad_0 = const()[name = tensor("op_994_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_994_dilations_0 = const()[name = tensor("op_994_dilations_0"), val = tensor([1, 1])]; + tensor var_994_groups_0 = const()[name = tensor("op_994_groups_0"), val = tensor(1)]; + tensor layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95877184))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97056896))), name = tensor("layers_2_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97057024)))]; + tensor var_994_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_994_dilations_0, groups = var_994_groups_0, pad = var_994_pad_0, pad_type = var_994_pad_type_0, strides = var_994_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = tensor("op_994_cast_fp16")]; + tensor var_1000_pad_type_0 = const()[name = tensor("op_1000_pad_type_0"), val = tensor("valid")]; + tensor var_1000_strides_0 = const()[name = tensor("op_1000_strides_0"), val = tensor([1, 1])]; + tensor var_1000_pad_0 = const()[name = tensor("op_1000_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1000_dilations_0 = const()[name = tensor("op_1000_dilations_0"), val = tensor([1, 1])]; + tensor var_1000_groups_0 = const()[name = tensor("op_1000_groups_0"), val = tensor(1)]; + tensor layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97169408))), name = tensor("layers_2_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97063232))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1000_cast_fp16 = conv(dilations = var_1000_dilations_0, groups = var_1000_groups_0, pad = var_1000_pad_0, pad_type = var_1000_pad_type_0, strides = var_1000_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor input_27_cast_fp16 = add(x = var_994_cast_fp16, y = var_1000_cast_fp16)[name = tensor("input_27_cast_fp16")]; + tensor input_29_mode_0 = const()[name = tensor("input_29_mode_0"), val = tensor("EXACT")]; + tensor input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = tensor("input_29_cast_fp16")]; + tensor var_1011_pad_type_0 = const()[name = tensor("op_1011_pad_type_0"), val = tensor("valid")]; + tensor var_1011_strides_0 = const()[name = tensor("op_1011_strides_0"), val = tensor([1, 1])]; + tensor var_1011_pad_0 = const()[name = tensor("op_1011_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1011_dilations_0 = const()[name = tensor("op_1011_dilations_0"), val = tensor([1, 1])]; + tensor var_1011_groups_0 = const()[name = tensor("op_1011_groups_0"), val = tensor(1)]; + tensor layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97464384))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98644096))), name = tensor("layers_2_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98644224)))]; + tensor var_1011_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_1011_dilations_0, groups = var_1011_groups_0, pad = var_1011_pad_0, pad_type = var_1011_pad_type_0, strides = var_1011_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = tensor("op_1011_cast_fp16")]; + tensor var_1017_pad_type_0 = const()[name = tensor("op_1017_pad_type_0"), val = tensor("valid")]; + tensor var_1017_strides_0 = const()[name = tensor("op_1017_strides_0"), val = tensor([1, 1])]; + tensor var_1017_pad_0 = const()[name = tensor("op_1017_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1017_dilations_0 = const()[name = tensor("op_1017_dilations_0"), val = tensor([1, 1])]; + tensor var_1017_groups_0 = const()[name = tensor("op_1017_groups_0"), val = tensor(1)]; + tensor layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98757248))), name = tensor("layers_2_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98645824))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1017_cast_fp16 = conv(dilations = var_1017_dilations_0, groups = var_1017_groups_0, pad = var_1017_pad_0, pad_type = var_1017_pad_type_0, strides = var_1017_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_29_cast_fp16)[name = tensor("op_1017_cast_fp16")]; + tensor hidden_states_7_cast_fp16 = add(x = var_1011_cast_fp16, y = var_1017_cast_fp16)[name = tensor("hidden_states_7_cast_fp16")]; + tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor("inputs_19_cast_fp16")]; + tensor var_1029 = const()[name = tensor("op_1029"), val = tensor(3)]; + tensor out_19_axes_0 = const()[name = tensor("out_19_axes_0"), val = tensor([1])]; + tensor var_1054_to_fp16 = const()[name = tensor("op_1054_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_1054_to_fp16, x = inputs_19_cast_fp16)[name = tensor("out_19_cast_fp16")]; + tensor obj_43_gamma_0_to_fp16 = const()[name = tensor("obj_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99052224)))]; + tensor obj_43_beta_0_to_fp16 = const()[name = tensor("obj_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99053824)))]; + tensor obj_43_epsilon_0_to_fp16 = const()[name = tensor("obj_43_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor("obj_43_cast_fp16")]; + tensor var_1076_pad_type_0 = const()[name = tensor("op_1076_pad_type_0"), val = tensor("valid")]; + tensor var_1076_strides_0 = const()[name = tensor("op_1076_strides_0"), val = tensor([1, 1])]; + tensor var_1076_pad_0 = const()[name = tensor("op_1076_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1076_dilations_0 = const()[name = tensor("op_1076_dilations_0"), val = tensor([1, 1])]; + tensor var_1076_groups_0 = const()[name = tensor("op_1076_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99055424))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99350400))), name = tensor("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99350528)))]; + tensor var_1076_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1076_dilations_0, groups = var_1076_groups_0, pad = var_1076_pad_0, pad_type = var_1076_pad_type_0, strides = var_1076_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = tensor("op_1076_cast_fp16")]; + tensor var_1082_pad_type_0 = const()[name = tensor("op_1082_pad_type_0"), val = tensor("valid")]; + tensor var_1082_strides_0 = const()[name = tensor("op_1082_strides_0"), val = tensor([1, 1])]; + tensor var_1082_pad_0 = const()[name = tensor("op_1082_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1082_dilations_0 = const()[name = tensor("op_1082_dilations_0"), val = tensor([1, 1])]; + tensor var_1082_groups_0 = const()[name = tensor("op_1082_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99370880))), name = tensor("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99352128))), shape = tensor([768, 768, 1, 1])]; + tensor var_1082_cast_fp16 = conv(dilations = var_1082_dilations_0, groups = var_1082_groups_0, pad = var_1082_pad_0, pad_type = var_1082_pad_type_0, strides = var_1082_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = tensor("op_1082_cast_fp16")]; + tensor query_13_cast_fp16 = add(x = var_1076_cast_fp16, y = var_1082_cast_fp16)[name = tensor("query_13_cast_fp16")]; + tensor var_1091_pad_type_0 = const()[name = tensor("op_1091_pad_type_0"), val = tensor("valid")]; + tensor var_1091_strides_0 = const()[name = tensor("op_1091_strides_0"), val = tensor([1, 1])]; + tensor var_1091_pad_0 = const()[name = tensor("op_1091_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1091_dilations_0 = const()[name = tensor("op_1091_dilations_0"), val = tensor([1, 1])]; + tensor var_1091_groups_0 = const()[name = tensor("op_1091_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99444672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99739648))), name = tensor("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1091_cast_fp16 = conv(dilations = var_1091_dilations_0, groups = var_1091_groups_0, pad = var_1091_pad_0, pad_type = var_1091_pad_type_0, strides = var_1091_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = tensor("op_1091_cast_fp16")]; + tensor var_1097_pad_type_0 = const()[name = tensor("op_1097_pad_type_0"), val = tensor("valid")]; + tensor var_1097_strides_0 = const()[name = tensor("op_1097_strides_0"), val = tensor([1, 1])]; + tensor var_1097_pad_0 = const()[name = tensor("op_1097_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1097_dilations_0 = const()[name = tensor("op_1097_dilations_0"), val = tensor([1, 1])]; + tensor var_1097_groups_0 = const()[name = tensor("op_1097_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99756736))), name = tensor("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99739776))), shape = tensor([768, 768, 1, 1])]; + tensor var_1097_cast_fp16 = conv(dilations = var_1097_dilations_0, groups = var_1097_groups_0, pad = var_1097_pad_0, pad_type = var_1097_pad_type_0, strides = var_1097_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = tensor("op_1097_cast_fp16")]; + tensor current_key_7_cast_fp16 = add(x = var_1091_cast_fp16, y = var_1097_cast_fp16)[name = tensor("current_key_7_cast_fp16")]; + tensor var_1107_pad_type_0 = const()[name = tensor("op_1107_pad_type_0"), val = tensor("valid")]; + tensor var_1107_strides_0 = const()[name = tensor("op_1107_strides_0"), val = tensor([1, 1])]; + tensor var_1107_pad_0 = const()[name = tensor("op_1107_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1107_dilations_0 = const()[name = tensor("op_1107_dilations_0"), val = tensor([1, 1])]; + tensor var_1107_groups_0 = const()[name = tensor("op_1107_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99830528))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100125504))), name = tensor("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100125632)))]; + tensor var_1107_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1107_dilations_0, groups = var_1107_groups_0, pad = var_1107_pad_0, pad_type = var_1107_pad_type_0, strides = var_1107_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = tensor("op_1107_cast_fp16")]; + tensor var_1113_pad_type_0 = const()[name = tensor("op_1113_pad_type_0"), val = tensor("valid")]; + tensor var_1113_strides_0 = const()[name = tensor("op_1113_strides_0"), val = tensor([1, 1])]; + tensor var_1113_pad_0 = const()[name = tensor("op_1113_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1113_dilations_0 = const()[name = tensor("op_1113_dilations_0"), val = tensor([1, 1])]; + tensor var_1113_groups_0 = const()[name = tensor("op_1113_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100143360))), name = tensor("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100127232))), shape = tensor([768, 768, 1, 1])]; + tensor var_1113_cast_fp16 = conv(dilations = var_1113_dilations_0, groups = var_1113_groups_0, pad = var_1113_pad_0, pad_type = var_1113_pad_type_0, strides = var_1113_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = tensor("op_1113_cast_fp16")]; + tensor current_value_7_cast_fp16 = add(x = var_1107_cast_fp16, y = var_1113_cast_fp16)[name = tensor("current_value_7_cast_fp16")]; + tensor var_1120_cast_fp16 = mul(x = var_69_cast_fp16_3, y = var_192_cast_fp16)[name = tensor("op_1120_cast_fp16")]; + tensor var_1121_cast_fp16 = mul(x = current_key_7_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_1121_cast_fp16")]; + tensor key_13_cast_fp16 = add(x = var_1120_cast_fp16, y = var_1121_cast_fp16)[name = tensor("key_13_cast_fp16")]; + tensor var_1124_cast_fp16 = mul(x = var_84_cast_fp16_3, y = var_192_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1125_cast_fp16 = mul(x = current_value_7_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_1125_cast_fp16")]; + tensor value_13_cast_fp16 = add(x = var_1124_cast_fp16, y = var_1125_cast_fp16)[name = tensor("value_13_cast_fp16")]; + tensor var_1129 = const()[name = tensor("op_1129"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_13_cast_fp16 = reshape(shape = var_1129, x = query_13_cast_fp16)[name = tensor("mh_q_13_cast_fp16")]; + tensor var_1131_to_fp16 = const()[name = tensor("op_1131_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1132_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1131_to_fp16)[name = tensor("op_1132_cast_fp16")]; + tensor var_1135 = const()[name = tensor("op_1135"), val = tensor([1, 12, 64, 448])]; + tensor var_1136_cast_fp16 = reshape(shape = var_1135, x = key_13_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor mh_w_19_transpose_x_0 = const()[name = tensor("mh_w_19_transpose_x_0"), val = tensor(true)]; + tensor mh_w_19_transpose_y_0 = const()[name = tensor("mh_w_19_transpose_y_0"), val = tensor(false)]; + tensor mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_1132_cast_fp16, y = var_1136_cast_fp16)[name = tensor("mh_w_19_cast_fp16")]; + tensor mh_w_21_cast_fp16 = add(x = mh_w_19_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_21_cast_fp16")]; + tensor var_1144_cast_fp16 = softmax(axis = var_1029, x = mh_w_21_cast_fp16)[name = tensor("op_1144_cast_fp16")]; + tensor var_1145 = const()[name = tensor("op_1145"), val = tensor([1, 12, 64, 448])]; + tensor var_1146_cast_fp16 = reshape(shape = var_1145, x = value_13_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor attn_13_transpose_x_0 = const()[name = tensor("attn_13_transpose_x_0"), val = tensor(false)]; + tensor attn_13_transpose_y_0 = const()[name = tensor("attn_13_transpose_y_0"), val = tensor(true)]; + tensor attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1146_cast_fp16, y = var_1144_cast_fp16)[name = tensor("attn_13_cast_fp16")]; + tensor var_1149 = const()[name = tensor("op_1149"), val = tensor([1, 768, 1, 1])]; + tensor input_31_cast_fp16 = reshape(shape = var_1149, x = attn_13_cast_fp16)[name = tensor("input_31_cast_fp16")]; + tensor var_1159_pad_type_0 = const()[name = tensor("op_1159_pad_type_0"), val = tensor("valid")]; + tensor var_1159_strides_0 = const()[name = tensor("op_1159_strides_0"), val = tensor([1, 1])]; + tensor var_1159_pad_0 = const()[name = tensor("op_1159_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1159_dilations_0 = const()[name = tensor("op_1159_dilations_0"), val = tensor([1, 1])]; + tensor var_1159_groups_0 = const()[name = tensor("op_1159_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100217152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100512128))), name = tensor("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100512256)))]; + tensor var_1159_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1159_dilations_0, groups = var_1159_groups_0, pad = var_1159_pad_0, pad_type = var_1159_pad_type_0, strides = var_1159_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = tensor("op_1159_cast_fp16")]; + tensor var_1165_pad_type_0 = const()[name = tensor("op_1165_pad_type_0"), val = tensor("valid")]; + tensor var_1165_strides_0 = const()[name = tensor("op_1165_strides_0"), val = tensor([1, 1])]; + tensor var_1165_pad_0 = const()[name = tensor("op_1165_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1165_dilations_0 = const()[name = tensor("op_1165_dilations_0"), val = tensor([1, 1])]; + tensor var_1165_groups_0 = const()[name = tensor("op_1165_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100534080))), name = tensor("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100513856))), shape = tensor([768, 768, 1, 1])]; + tensor var_1165_cast_fp16 = conv(dilations = var_1165_dilations_0, groups = var_1165_groups_0, pad = var_1165_pad_0, pad_type = var_1165_pad_type_0, strides = var_1165_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = tensor("op_1165_cast_fp16")]; + tensor obj_49_cast_fp16 = add(x = var_1159_cast_fp16, y = var_1165_cast_fp16)[name = tensor("obj_49_cast_fp16")]; + tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_49_cast_fp16)[name = tensor("inputs_21_cast_fp16")]; + tensor out_21_axes_0 = const()[name = tensor("out_21_axes_0"), val = tensor([1])]; + tensor var_1180_to_fp16 = const()[name = tensor("op_1180_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1180_to_fp16, x = inputs_21_cast_fp16)[name = tensor("out_21_cast_fp16")]; + tensor obj_51_gamma_0_to_fp16 = const()[name = tensor("obj_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100607872)))]; + tensor obj_51_beta_0_to_fp16 = const()[name = tensor("obj_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100609472)))]; + tensor obj_51_epsilon_0_to_fp16 = const()[name = tensor("obj_51_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor("obj_51_cast_fp16")]; + tensor var_1202_pad_type_0 = const()[name = tensor("op_1202_pad_type_0"), val = tensor("valid")]; + tensor var_1202_strides_0 = const()[name = tensor("op_1202_strides_0"), val = tensor([1, 1])]; + tensor var_1202_pad_0 = const()[name = tensor("op_1202_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1202_dilations_0 = const()[name = tensor("op_1202_dilations_0"), val = tensor([1, 1])]; + tensor var_1202_groups_0 = const()[name = tensor("op_1202_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100611072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100906048))), name = tensor("layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100906176)))]; + tensor var_1202_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1202_dilations_0, groups = var_1202_groups_0, pad = var_1202_pad_0, pad_type = var_1202_pad_type_0, strides = var_1202_strides_0, weight = layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_51_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor var_1208_pad_type_0 = const()[name = tensor("op_1208_pad_type_0"), val = tensor("valid")]; + tensor var_1208_strides_0 = const()[name = tensor("op_1208_strides_0"), val = tensor([1, 1])]; + tensor var_1208_pad_0 = const()[name = tensor("op_1208_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1208_dilations_0 = const()[name = tensor("op_1208_dilations_0"), val = tensor([1, 1])]; + tensor var_1208_groups_0 = const()[name = tensor("op_1208_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100924672))), name = tensor("layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100907776))), shape = tensor([768, 768, 1, 1])]; + tensor var_1208_cast_fp16 = conv(dilations = var_1208_dilations_0, groups = var_1208_groups_0, pad = var_1208_pad_0, pad_type = var_1208_pad_type_0, strides = var_1208_strides_0, weight = layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_51_cast_fp16)[name = tensor("op_1208_cast_fp16")]; + tensor query_15_cast_fp16 = add(x = var_1202_cast_fp16, y = var_1208_cast_fp16)[name = tensor("query_15_cast_fp16")]; + tensor var_1217_pad_type_0 = const()[name = tensor("op_1217_pad_type_0"), val = tensor("valid")]; + tensor var_1217_strides_0 = const()[name = tensor("op_1217_strides_0"), val = tensor([1, 1])]; + tensor var_1217_pad_0 = const()[name = tensor("op_1217_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1217_dilations_0 = const()[name = tensor("op_1217_dilations_0"), val = tensor([1, 1])]; + tensor var_1217_groups_0 = const()[name = tensor("op_1217_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100998464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101293440))), name = tensor("layers_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1217_cast_fp16 = conv(dilations = var_1217_dilations_0, groups = var_1217_groups_0, pad = var_1217_pad_0, pad_type = var_1217_pad_type_0, strides = var_1217_strides_0, weight = layers_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_1217_cast_fp16")]; + tensor var_1223_pad_type_0 = const()[name = tensor("op_1223_pad_type_0"), val = tensor("valid")]; + tensor var_1223_strides_0 = const()[name = tensor("op_1223_strides_0"), val = tensor([1, 1])]; + tensor var_1223_pad_0 = const()[name = tensor("op_1223_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1223_dilations_0 = const()[name = tensor("op_1223_dilations_0"), val = tensor([1, 1])]; + tensor var_1223_groups_0 = const()[name = tensor("op_1223_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101309056))), name = tensor("layers_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101293568))), shape = tensor([768, 768, 1, 1])]; + tensor var_1223_cast_fp16 = conv(dilations = var_1223_dilations_0, groups = var_1223_groups_0, pad = var_1223_pad_0, pad_type = var_1223_pad_type_0, strides = var_1223_strides_0, weight = layers_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_1223_cast_fp16")]; + tensor key_15_cast_fp16 = add(x = var_1217_cast_fp16, y = var_1223_cast_fp16)[name = tensor("key_15_cast_fp16")]; + tensor var_1233_pad_type_0 = const()[name = tensor("op_1233_pad_type_0"), val = tensor("valid")]; + tensor var_1233_strides_0 = const()[name = tensor("op_1233_strides_0"), val = tensor([1, 1])]; + tensor var_1233_pad_0 = const()[name = tensor("op_1233_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1233_dilations_0 = const()[name = tensor("op_1233_dilations_0"), val = tensor([1, 1])]; + tensor var_1233_groups_0 = const()[name = tensor("op_1233_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101382848))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101677824))), name = tensor("layers_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101677952)))]; + tensor var_1233_cast_fp16 = conv(bias = layers_3_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1233_dilations_0, groups = var_1233_groups_0, pad = var_1233_pad_0, pad_type = var_1233_pad_type_0, strides = var_1233_strides_0, weight = layers_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_1233_cast_fp16")]; + tensor var_1239_pad_type_0 = const()[name = tensor("op_1239_pad_type_0"), val = tensor("valid")]; + tensor var_1239_strides_0 = const()[name = tensor("op_1239_strides_0"), val = tensor([1, 1])]; + tensor var_1239_pad_0 = const()[name = tensor("op_1239_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1239_dilations_0 = const()[name = tensor("op_1239_dilations_0"), val = tensor([1, 1])]; + tensor var_1239_groups_0 = const()[name = tensor("op_1239_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101692352))), name = tensor("layers_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101679552))), shape = tensor([768, 768, 1, 1])]; + tensor var_1239_cast_fp16 = conv(dilations = var_1239_dilations_0, groups = var_1239_groups_0, pad = var_1239_pad_0, pad_type = var_1239_pad_type_0, strides = var_1239_strides_0, weight = layers_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_1239_cast_fp16")]; + tensor value_15_cast_fp16 = add(x = var_1233_cast_fp16, y = var_1239_cast_fp16)[name = tensor("value_15_cast_fp16")]; + tensor var_1243 = const()[name = tensor("op_1243"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_15_cast_fp16 = reshape(shape = var_1243, x = query_15_cast_fp16)[name = tensor("mh_q_15_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1246_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1249 = const()[name = tensor("op_1249"), val = tensor([1, 12, 64, 1500])]; + tensor var_1250_cast_fp16 = reshape(shape = var_1249, x = key_15_cast_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor mh_w_23_transpose_x_0 = const()[name = tensor("mh_w_23_transpose_x_0"), val = tensor(true)]; + tensor mh_w_23_transpose_y_0 = const()[name = tensor("mh_w_23_transpose_y_0"), val = tensor(false)]; + tensor mh_w_23_cast_fp16 = matmul(transpose_x = mh_w_23_transpose_x_0, transpose_y = mh_w_23_transpose_y_0, x = var_1246_cast_fp16, y = var_1250_cast_fp16)[name = tensor("mh_w_23_cast_fp16")]; + tensor obj_55_cast_fp16 = softmax(axis = var_1029, x = mh_w_23_cast_fp16)[name = tensor("obj_55_cast_fp16")]; + tensor var_1254 = const()[name = tensor("op_1254"), val = tensor([1, 12, 64, 1500])]; + tensor var_1255_cast_fp16 = reshape(shape = var_1254, x = value_15_cast_fp16)[name = tensor("op_1255_cast_fp16")]; + tensor attn_15_transpose_x_0 = const()[name = tensor("attn_15_transpose_x_0"), val = tensor(false)]; + tensor attn_15_transpose_y_0 = const()[name = tensor("attn_15_transpose_y_0"), val = tensor(true)]; + tensor attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1255_cast_fp16, y = obj_55_cast_fp16)[name = tensor("attn_15_cast_fp16")]; + tensor var_1258 = const()[name = tensor("op_1258"), val = tensor([1, 768, 1, 1])]; + tensor input_33_cast_fp16 = reshape(shape = var_1258, x = attn_15_cast_fp16)[name = tensor("input_33_cast_fp16")]; + tensor var_1268_pad_type_0 = const()[name = tensor("op_1268_pad_type_0"), val = tensor("valid")]; + tensor var_1268_strides_0 = const()[name = tensor("op_1268_strides_0"), val = tensor([1, 1])]; + tensor var_1268_pad_0 = const()[name = tensor("op_1268_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1268_dilations_0 = const()[name = tensor("op_1268_dilations_0"), val = tensor([1, 1])]; + tensor var_1268_groups_0 = const()[name = tensor("op_1268_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101766144))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102061120))), name = tensor("layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102061248)))]; + tensor var_1268_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1268_dilations_0, groups = var_1268_groups_0, pad = var_1268_pad_0, pad_type = var_1268_pad_type_0, strides = var_1268_strides_0, weight = layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = tensor("op_1268_cast_fp16")]; + tensor var_1274_pad_type_0 = const()[name = tensor("op_1274_pad_type_0"), val = tensor("valid")]; + tensor var_1274_strides_0 = const()[name = tensor("op_1274_strides_0"), val = tensor([1, 1])]; + tensor var_1274_pad_0 = const()[name = tensor("op_1274_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1274_dilations_0 = const()[name = tensor("op_1274_dilations_0"), val = tensor([1, 1])]; + tensor var_1274_groups_0 = const()[name = tensor("op_1274_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102076160))), name = tensor("layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102062848))), shape = tensor([768, 768, 1, 1])]; + tensor var_1274_cast_fp16 = conv(dilations = var_1274_dilations_0, groups = var_1274_groups_0, pad = var_1274_pad_0, pad_type = var_1274_pad_type_0, strides = var_1274_strides_0, weight = layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor obj_53_cast_fp16 = add(x = var_1268_cast_fp16, y = var_1274_cast_fp16)[name = tensor("obj_53_cast_fp16")]; + tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_53_cast_fp16)[name = tensor("inputs_23_cast_fp16")]; + tensor out_23_axes_0 = const()[name = tensor("out_23_axes_0"), val = tensor([1])]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1285_to_fp16, x = inputs_23_cast_fp16)[name = tensor("out_23_cast_fp16")]; + tensor input_35_gamma_0_to_fp16 = const()[name = tensor("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102149952)))]; + tensor input_35_beta_0_to_fp16 = const()[name = tensor("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102151552)))]; + tensor input_35_epsilon_0_to_fp16 = const()[name = tensor("input_35_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor("input_35_cast_fp16")]; + tensor var_1303_pad_type_0 = const()[name = tensor("op_1303_pad_type_0"), val = tensor("valid")]; + tensor var_1303_strides_0 = const()[name = tensor("op_1303_strides_0"), val = tensor([1, 1])]; + tensor var_1303_pad_0 = const()[name = tensor("op_1303_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1303_dilations_0 = const()[name = tensor("op_1303_dilations_0"), val = tensor([1, 1])]; + tensor var_1303_groups_0 = const()[name = tensor("op_1303_groups_0"), val = tensor(1)]; + tensor layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102153152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103332864))), name = tensor("layers_3_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103332992)))]; + tensor var_1303_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_1303_dilations_0, groups = var_1303_groups_0, pad = var_1303_pad_0, pad_type = var_1303_pad_type_0, strides = var_1303_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = tensor("op_1303_cast_fp16")]; + tensor var_1309_pad_type_0 = const()[name = tensor("op_1309_pad_type_0"), val = tensor("valid")]; + tensor var_1309_strides_0 = const()[name = tensor("op_1309_strides_0"), val = tensor([1, 1])]; + tensor var_1309_pad_0 = const()[name = tensor("op_1309_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1309_dilations_0 = const()[name = tensor("op_1309_dilations_0"), val = tensor([1, 1])]; + tensor var_1309_groups_0 = const()[name = tensor("op_1309_groups_0"), val = tensor(1)]; + tensor layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103404032))), name = tensor("layers_3_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103339200))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1309_cast_fp16 = conv(dilations = var_1309_dilations_0, groups = var_1309_groups_0, pad = var_1309_pad_0, pad_type = var_1309_pad_type_0, strides = var_1309_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = tensor("op_1309_cast_fp16")]; + tensor input_37_cast_fp16 = add(x = var_1303_cast_fp16, y = var_1309_cast_fp16)[name = tensor("input_37_cast_fp16")]; + tensor input_39_mode_0 = const()[name = tensor("input_39_mode_0"), val = tensor("EXACT")]; + tensor input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor("input_39_cast_fp16")]; + tensor var_1320_pad_type_0 = const()[name = tensor("op_1320_pad_type_0"), val = tensor("valid")]; + tensor var_1320_strides_0 = const()[name = tensor("op_1320_strides_0"), val = tensor([1, 1])]; + tensor var_1320_pad_0 = const()[name = tensor("op_1320_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1320_dilations_0 = const()[name = tensor("op_1320_dilations_0"), val = tensor([1, 1])]; + tensor var_1320_groups_0 = const()[name = tensor("op_1320_groups_0"), val = tensor(1)]; + tensor layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103699008))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(104878720))), name = tensor("layers_3_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(104878848)))]; + tensor var_1320_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_1320_dilations_0, groups = var_1320_groups_0, pad = var_1320_pad_0, pad_type = var_1320_pad_type_0, strides = var_1320_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1326_pad_type_0 = const()[name = tensor("op_1326_pad_type_0"), val = tensor("valid")]; + tensor var_1326_strides_0 = const()[name = tensor("op_1326_strides_0"), val = tensor([1, 1])]; + tensor var_1326_pad_0 = const()[name = tensor("op_1326_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1326_dilations_0 = const()[name = tensor("op_1326_dilations_0"), val = tensor([1, 1])]; + tensor var_1326_groups_0 = const()[name = tensor("op_1326_groups_0"), val = tensor(1)]; + tensor layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(104937984))), name = tensor("layers_3_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(104880448))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1326_cast_fp16 = conv(dilations = var_1326_dilations_0, groups = var_1326_groups_0, pad = var_1326_pad_0, pad_type = var_1326_pad_type_0, strides = var_1326_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = tensor("op_1326_cast_fp16")]; + tensor hidden_states_9_cast_fp16 = add(x = var_1320_cast_fp16, y = var_1326_cast_fp16)[name = tensor("hidden_states_9_cast_fp16")]; + tensor inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor("inputs_25_cast_fp16")]; + tensor var_1338 = const()[name = tensor("op_1338"), val = tensor(3)]; + tensor out_25_axes_0 = const()[name = tensor("out_25_axes_0"), val = tensor([1])]; + tensor var_1363_to_fp16 = const()[name = tensor("op_1363_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1363_to_fp16, x = inputs_25_cast_fp16)[name = tensor("out_25_cast_fp16")]; + tensor obj_57_gamma_0_to_fp16 = const()[name = tensor("obj_57_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105232960)))]; + tensor obj_57_beta_0_to_fp16 = const()[name = tensor("obj_57_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105234560)))]; + tensor obj_57_epsilon_0_to_fp16 = const()[name = tensor("obj_57_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_57_cast_fp16 = batch_norm(beta = obj_57_beta_0_to_fp16, epsilon = obj_57_epsilon_0_to_fp16, gamma = obj_57_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor("obj_57_cast_fp16")]; + tensor var_1385_pad_type_0 = const()[name = tensor("op_1385_pad_type_0"), val = tensor("valid")]; + tensor var_1385_strides_0 = const()[name = tensor("op_1385_strides_0"), val = tensor([1, 1])]; + tensor var_1385_pad_0 = const()[name = tensor("op_1385_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1385_dilations_0 = const()[name = tensor("op_1385_dilations_0"), val = tensor([1, 1])]; + tensor var_1385_groups_0 = const()[name = tensor("op_1385_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105236160))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105531136))), name = tensor("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105531264)))]; + tensor var_1385_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1385_dilations_0, groups = var_1385_groups_0, pad = var_1385_pad_0, pad_type = var_1385_pad_type_0, strides = var_1385_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = tensor("op_1385_cast_fp16")]; + tensor var_1391_pad_type_0 = const()[name = tensor("op_1391_pad_type_0"), val = tensor("valid")]; + tensor var_1391_strides_0 = const()[name = tensor("op_1391_strides_0"), val = tensor([1, 1])]; + tensor var_1391_pad_0 = const()[name = tensor("op_1391_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1391_dilations_0 = const()[name = tensor("op_1391_dilations_0"), val = tensor([1, 1])]; + tensor var_1391_groups_0 = const()[name = tensor("op_1391_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105543104))), name = tensor("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105532864))), shape = tensor([768, 768, 1, 1])]; + tensor var_1391_cast_fp16 = conv(dilations = var_1391_dilations_0, groups = var_1391_groups_0, pad = var_1391_pad_0, pad_type = var_1391_pad_type_0, strides = var_1391_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = tensor("op_1391_cast_fp16")]; + tensor query_17_cast_fp16 = add(x = var_1385_cast_fp16, y = var_1391_cast_fp16)[name = tensor("query_17_cast_fp16")]; + tensor var_1400_pad_type_0 = const()[name = tensor("op_1400_pad_type_0"), val = tensor("valid")]; + tensor var_1400_strides_0 = const()[name = tensor("op_1400_strides_0"), val = tensor([1, 1])]; + tensor var_1400_pad_0 = const()[name = tensor("op_1400_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1400_dilations_0 = const()[name = tensor("op_1400_dilations_0"), val = tensor([1, 1])]; + tensor var_1400_groups_0 = const()[name = tensor("op_1400_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105616896))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105911872))), name = tensor("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1400_cast_fp16 = conv(dilations = var_1400_dilations_0, groups = var_1400_groups_0, pad = var_1400_pad_0, pad_type = var_1400_pad_type_0, strides = var_1400_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = tensor("op_1400_cast_fp16")]; + tensor var_1406_pad_type_0 = const()[name = tensor("op_1406_pad_type_0"), val = tensor("valid")]; + tensor var_1406_strides_0 = const()[name = tensor("op_1406_strides_0"), val = tensor([1, 1])]; + tensor var_1406_pad_0 = const()[name = tensor("op_1406_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1406_dilations_0 = const()[name = tensor("op_1406_dilations_0"), val = tensor([1, 1])]; + tensor var_1406_groups_0 = const()[name = tensor("op_1406_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105923072))), name = tensor("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105912000))), shape = tensor([768, 768, 1, 1])]; + tensor var_1406_cast_fp16 = conv(dilations = var_1406_dilations_0, groups = var_1406_groups_0, pad = var_1406_pad_0, pad_type = var_1406_pad_type_0, strides = var_1406_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = tensor("op_1406_cast_fp16")]; + tensor current_key_9_cast_fp16 = add(x = var_1400_cast_fp16, y = var_1406_cast_fp16)[name = tensor("current_key_9_cast_fp16")]; + tensor var_1416_pad_type_0 = const()[name = tensor("op_1416_pad_type_0"), val = tensor("valid")]; + tensor var_1416_strides_0 = const()[name = tensor("op_1416_strides_0"), val = tensor([1, 1])]; + tensor var_1416_pad_0 = const()[name = tensor("op_1416_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1416_dilations_0 = const()[name = tensor("op_1416_dilations_0"), val = tensor([1, 1])]; + tensor var_1416_groups_0 = const()[name = tensor("op_1416_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105996864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106291840))), name = tensor("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106291968)))]; + tensor var_1416_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1416_dilations_0, groups = var_1416_groups_0, pad = var_1416_pad_0, pad_type = var_1416_pad_type_0, strides = var_1416_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = tensor("op_1416_cast_fp16")]; + tensor var_1422_pad_type_0 = const()[name = tensor("op_1422_pad_type_0"), val = tensor("valid")]; + tensor var_1422_strides_0 = const()[name = tensor("op_1422_strides_0"), val = tensor([1, 1])]; + tensor var_1422_pad_0 = const()[name = tensor("op_1422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1422_dilations_0 = const()[name = tensor("op_1422_dilations_0"), val = tensor([1, 1])]; + tensor var_1422_groups_0 = const()[name = tensor("op_1422_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106303360))), name = tensor("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106293568))), shape = tensor([768, 768, 1, 1])]; + tensor var_1422_cast_fp16 = conv(dilations = var_1422_dilations_0, groups = var_1422_groups_0, pad = var_1422_pad_0, pad_type = var_1422_pad_type_0, strides = var_1422_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor current_value_9_cast_fp16 = add(x = var_1416_cast_fp16, y = var_1422_cast_fp16)[name = tensor("current_value_9_cast_fp16")]; + tensor var_1429_cast_fp16 = mul(x = var_69_cast_fp16_4, y = var_192_cast_fp16)[name = tensor("op_1429_cast_fp16")]; + tensor var_1430_cast_fp16 = mul(x = current_key_9_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_1430_cast_fp16")]; + tensor key_17_cast_fp16 = add(x = var_1429_cast_fp16, y = var_1430_cast_fp16)[name = tensor("key_17_cast_fp16")]; + tensor var_1433_cast_fp16 = mul(x = var_84_cast_fp16_4, y = var_192_cast_fp16)[name = tensor("op_1433_cast_fp16")]; + tensor var_1434_cast_fp16 = mul(x = current_value_9_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_1434_cast_fp16")]; + tensor value_17_cast_fp16 = add(x = var_1433_cast_fp16, y = var_1434_cast_fp16)[name = tensor("value_17_cast_fp16")]; + tensor var_1438 = const()[name = tensor("op_1438"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_17_cast_fp16 = reshape(shape = var_1438, x = query_17_cast_fp16)[name = tensor("mh_q_17_cast_fp16")]; + tensor var_1440_to_fp16 = const()[name = tensor("op_1440_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1441_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1440_to_fp16)[name = tensor("op_1441_cast_fp16")]; + tensor var_1444 = const()[name = tensor("op_1444"), val = tensor([1, 12, 64, 448])]; + tensor var_1445_cast_fp16 = reshape(shape = var_1444, x = key_17_cast_fp16)[name = tensor("op_1445_cast_fp16")]; + tensor mh_w_25_transpose_x_0 = const()[name = tensor("mh_w_25_transpose_x_0"), val = tensor(true)]; + tensor mh_w_25_transpose_y_0 = const()[name = tensor("mh_w_25_transpose_y_0"), val = tensor(false)]; + tensor mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_1441_cast_fp16, y = var_1445_cast_fp16)[name = tensor("mh_w_25_cast_fp16")]; + tensor mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_27_cast_fp16")]; + tensor var_1453_cast_fp16 = softmax(axis = var_1338, x = mh_w_27_cast_fp16)[name = tensor("op_1453_cast_fp16")]; + tensor var_1454 = const()[name = tensor("op_1454"), val = tensor([1, 12, 64, 448])]; + tensor var_1455_cast_fp16 = reshape(shape = var_1454, x = value_17_cast_fp16)[name = tensor("op_1455_cast_fp16")]; + tensor attn_17_transpose_x_0 = const()[name = tensor("attn_17_transpose_x_0"), val = tensor(false)]; + tensor attn_17_transpose_y_0 = const()[name = tensor("attn_17_transpose_y_0"), val = tensor(true)]; + tensor attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1455_cast_fp16, y = var_1453_cast_fp16)[name = tensor("attn_17_cast_fp16")]; + tensor var_1458 = const()[name = tensor("op_1458"), val = tensor([1, 768, 1, 1])]; + tensor input_41_cast_fp16 = reshape(shape = var_1458, x = attn_17_cast_fp16)[name = tensor("input_41_cast_fp16")]; + tensor var_1468_pad_type_0 = const()[name = tensor("op_1468_pad_type_0"), val = tensor("valid")]; + tensor var_1468_strides_0 = const()[name = tensor("op_1468_strides_0"), val = tensor([1, 1])]; + tensor var_1468_pad_0 = const()[name = tensor("op_1468_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1468_dilations_0 = const()[name = tensor("op_1468_dilations_0"), val = tensor([1, 1])]; + tensor var_1468_groups_0 = const()[name = tensor("op_1468_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106377152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106672128))), name = tensor("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106672256)))]; + tensor var_1468_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1468_dilations_0, groups = var_1468_groups_0, pad = var_1468_pad_0, pad_type = var_1468_pad_type_0, strides = var_1468_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = tensor("op_1468_cast_fp16")]; + tensor var_1474_pad_type_0 = const()[name = tensor("op_1474_pad_type_0"), val = tensor("valid")]; + tensor var_1474_strides_0 = const()[name = tensor("op_1474_strides_0"), val = tensor([1, 1])]; + tensor var_1474_pad_0 = const()[name = tensor("op_1474_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1474_dilations_0 = const()[name = tensor("op_1474_dilations_0"), val = tensor([1, 1])]; + tensor var_1474_groups_0 = const()[name = tensor("op_1474_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106686528))), name = tensor("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106673856))), shape = tensor([768, 768, 1, 1])]; + tensor var_1474_cast_fp16 = conv(dilations = var_1474_dilations_0, groups = var_1474_groups_0, pad = var_1474_pad_0, pad_type = var_1474_pad_type_0, strides = var_1474_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = tensor("op_1474_cast_fp16")]; + tensor obj_63_cast_fp16 = add(x = var_1468_cast_fp16, y = var_1474_cast_fp16)[name = tensor("obj_63_cast_fp16")]; + tensor inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_63_cast_fp16)[name = tensor("inputs_27_cast_fp16")]; + tensor out_27_axes_0 = const()[name = tensor("out_27_axes_0"), val = tensor([1])]; + tensor var_1489_to_fp16 = const()[name = tensor("op_1489_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1489_to_fp16, x = inputs_27_cast_fp16)[name = tensor("out_27_cast_fp16")]; + tensor obj_65_gamma_0_to_fp16 = const()[name = tensor("obj_65_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106760320)))]; + tensor obj_65_beta_0_to_fp16 = const()[name = tensor("obj_65_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106761920)))]; + tensor obj_65_epsilon_0_to_fp16 = const()[name = tensor("obj_65_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_65_cast_fp16 = batch_norm(beta = obj_65_beta_0_to_fp16, epsilon = obj_65_epsilon_0_to_fp16, gamma = obj_65_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor("obj_65_cast_fp16")]; + tensor var_1511_pad_type_0 = const()[name = tensor("op_1511_pad_type_0"), val = tensor("valid")]; + tensor var_1511_strides_0 = const()[name = tensor("op_1511_strides_0"), val = tensor([1, 1])]; + tensor var_1511_pad_0 = const()[name = tensor("op_1511_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1511_dilations_0 = const()[name = tensor("op_1511_dilations_0"), val = tensor([1, 1])]; + tensor var_1511_groups_0 = const()[name = tensor("op_1511_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106763520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107058496))), name = tensor("layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107058624)))]; + tensor var_1511_cast_fp16 = conv(bias = layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1511_dilations_0, groups = var_1511_groups_0, pad = var_1511_pad_0, pad_type = var_1511_pad_type_0, strides = var_1511_strides_0, weight = layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_65_cast_fp16)[name = tensor("op_1511_cast_fp16")]; + tensor var_1517_pad_type_0 = const()[name = tensor("op_1517_pad_type_0"), val = tensor("valid")]; + tensor var_1517_strides_0 = const()[name = tensor("op_1517_strides_0"), val = tensor([1, 1])]; + tensor var_1517_pad_0 = const()[name = tensor("op_1517_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1517_dilations_0 = const()[name = tensor("op_1517_dilations_0"), val = tensor([1, 1])]; + tensor var_1517_groups_0 = const()[name = tensor("op_1517_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107073728))), name = tensor("layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107060224))), shape = tensor([768, 768, 1, 1])]; + tensor var_1517_cast_fp16 = conv(dilations = var_1517_dilations_0, groups = var_1517_groups_0, pad = var_1517_pad_0, pad_type = var_1517_pad_type_0, strides = var_1517_strides_0, weight = layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_65_cast_fp16)[name = tensor("op_1517_cast_fp16")]; + tensor query_19_cast_fp16 = add(x = var_1511_cast_fp16, y = var_1517_cast_fp16)[name = tensor("query_19_cast_fp16")]; + tensor var_1526_pad_type_0 = const()[name = tensor("op_1526_pad_type_0"), val = tensor("valid")]; + tensor var_1526_strides_0 = const()[name = tensor("op_1526_strides_0"), val = tensor([1, 1])]; + tensor var_1526_pad_0 = const()[name = tensor("op_1526_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1526_dilations_0 = const()[name = tensor("op_1526_dilations_0"), val = tensor([1, 1])]; + tensor var_1526_groups_0 = const()[name = tensor("op_1526_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107147520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107442496))), name = tensor("layers_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1526_cast_fp16 = conv(dilations = var_1526_dilations_0, groups = var_1526_groups_0, pad = var_1526_pad_0, pad_type = var_1526_pad_type_0, strides = var_1526_strides_0, weight = layers_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_1526_cast_fp16")]; + tensor var_1532_pad_type_0 = const()[name = tensor("op_1532_pad_type_0"), val = tensor("valid")]; + tensor var_1532_strides_0 = const()[name = tensor("op_1532_strides_0"), val = tensor([1, 1])]; + tensor var_1532_pad_0 = const()[name = tensor("op_1532_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1532_dilations_0 = const()[name = tensor("op_1532_dilations_0"), val = tensor([1, 1])]; + tensor var_1532_groups_0 = const()[name = tensor("op_1532_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107456832))), name = tensor("layers_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107442624))), shape = tensor([768, 768, 1, 1])]; + tensor var_1532_cast_fp16 = conv(dilations = var_1532_dilations_0, groups = var_1532_groups_0, pad = var_1532_pad_0, pad_type = var_1532_pad_type_0, strides = var_1532_strides_0, weight = layers_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_1532_cast_fp16")]; + tensor key_19_cast_fp16 = add(x = var_1526_cast_fp16, y = var_1532_cast_fp16)[name = tensor("key_19_cast_fp16")]; + tensor var_1542_pad_type_0 = const()[name = tensor("op_1542_pad_type_0"), val = tensor("valid")]; + tensor var_1542_strides_0 = const()[name = tensor("op_1542_strides_0"), val = tensor([1, 1])]; + tensor var_1542_pad_0 = const()[name = tensor("op_1542_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1542_dilations_0 = const()[name = tensor("op_1542_dilations_0"), val = tensor([1, 1])]; + tensor var_1542_groups_0 = const()[name = tensor("op_1542_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107530624))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107825600))), name = tensor("layers_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107825728)))]; + tensor var_1542_cast_fp16 = conv(bias = layers_4_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1542_dilations_0, groups = var_1542_groups_0, pad = var_1542_pad_0, pad_type = var_1542_pad_type_0, strides = var_1542_strides_0, weight = layers_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_1542_cast_fp16")]; + tensor var_1548_pad_type_0 = const()[name = tensor("op_1548_pad_type_0"), val = tensor("valid")]; + tensor var_1548_strides_0 = const()[name = tensor("op_1548_strides_0"), val = tensor([1, 1])]; + tensor var_1548_pad_0 = const()[name = tensor("op_1548_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1548_dilations_0 = const()[name = tensor("op_1548_dilations_0"), val = tensor([1, 1])]; + tensor var_1548_groups_0 = const()[name = tensor("op_1548_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107837312))), name = tensor("layers_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107827328))), shape = tensor([768, 768, 1, 1])]; + tensor var_1548_cast_fp16 = conv(dilations = var_1548_dilations_0, groups = var_1548_groups_0, pad = var_1548_pad_0, pad_type = var_1548_pad_type_0, strides = var_1548_strides_0, weight = layers_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_1548_cast_fp16")]; + tensor value_19_cast_fp16 = add(x = var_1542_cast_fp16, y = var_1548_cast_fp16)[name = tensor("value_19_cast_fp16")]; + tensor var_1552 = const()[name = tensor("op_1552"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_19_cast_fp16 = reshape(shape = var_1552, x = query_19_cast_fp16)[name = tensor("mh_q_19_cast_fp16")]; + tensor var_1554_to_fp16 = const()[name = tensor("op_1554_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1555_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1554_to_fp16)[name = tensor("op_1555_cast_fp16")]; + tensor var_1558 = const()[name = tensor("op_1558"), val = tensor([1, 12, 64, 1500])]; + tensor var_1559_cast_fp16 = reshape(shape = var_1558, x = key_19_cast_fp16)[name = tensor("op_1559_cast_fp16")]; + tensor mh_w_29_transpose_x_0 = const()[name = tensor("mh_w_29_transpose_x_0"), val = tensor(true)]; + tensor mh_w_29_transpose_y_0 = const()[name = tensor("mh_w_29_transpose_y_0"), val = tensor(false)]; + tensor mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_1555_cast_fp16, y = var_1559_cast_fp16)[name = tensor("mh_w_29_cast_fp16")]; + tensor obj_69_cast_fp16 = softmax(axis = var_1338, x = mh_w_29_cast_fp16)[name = tensor("obj_69_cast_fp16")]; + tensor var_1563 = const()[name = tensor("op_1563"), val = tensor([1, 12, 64, 1500])]; + tensor var_1564_cast_fp16 = reshape(shape = var_1563, x = value_19_cast_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor attn_19_transpose_x_0 = const()[name = tensor("attn_19_transpose_x_0"), val = tensor(false)]; + tensor attn_19_transpose_y_0 = const()[name = tensor("attn_19_transpose_y_0"), val = tensor(true)]; + tensor attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1564_cast_fp16, y = obj_69_cast_fp16)[name = tensor("attn_19_cast_fp16")]; + tensor var_1567 = const()[name = tensor("op_1567"), val = tensor([1, 768, 1, 1])]; + tensor input_43_cast_fp16 = reshape(shape = var_1567, x = attn_19_cast_fp16)[name = tensor("input_43_cast_fp16")]; + tensor var_1577_pad_type_0 = const()[name = tensor("op_1577_pad_type_0"), val = tensor("valid")]; + tensor var_1577_strides_0 = const()[name = tensor("op_1577_strides_0"), val = tensor([1, 1])]; + tensor var_1577_pad_0 = const()[name = tensor("op_1577_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1577_dilations_0 = const()[name = tensor("op_1577_dilations_0"), val = tensor([1, 1])]; + tensor var_1577_groups_0 = const()[name = tensor("op_1577_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107911104))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108206080))), name = tensor("layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108206208)))]; + tensor var_1577_cast_fp16 = conv(bias = layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1577_dilations_0, groups = var_1577_groups_0, pad = var_1577_pad_0, pad_type = var_1577_pad_type_0, strides = var_1577_strides_0, weight = layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = tensor("op_1577_cast_fp16")]; + tensor var_1583_pad_type_0 = const()[name = tensor("op_1583_pad_type_0"), val = tensor("valid")]; + tensor var_1583_strides_0 = const()[name = tensor("op_1583_strides_0"), val = tensor([1, 1])]; + tensor var_1583_pad_0 = const()[name = tensor("op_1583_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1583_dilations_0 = const()[name = tensor("op_1583_dilations_0"), val = tensor([1, 1])]; + tensor var_1583_groups_0 = const()[name = tensor("op_1583_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108217664))), name = tensor("layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108207808))), shape = tensor([768, 768, 1, 1])]; + tensor var_1583_cast_fp16 = conv(dilations = var_1583_dilations_0, groups = var_1583_groups_0, pad = var_1583_pad_0, pad_type = var_1583_pad_type_0, strides = var_1583_strides_0, weight = layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = tensor("op_1583_cast_fp16")]; + tensor obj_67_cast_fp16 = add(x = var_1577_cast_fp16, y = var_1583_cast_fp16)[name = tensor("obj_67_cast_fp16")]; + tensor inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = obj_67_cast_fp16)[name = tensor("inputs_29_cast_fp16")]; + tensor out_29_axes_0 = const()[name = tensor("out_29_axes_0"), val = tensor([1])]; + tensor var_1594_to_fp16 = const()[name = tensor("op_1594_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1594_to_fp16, x = inputs_29_cast_fp16)[name = tensor("out_29_cast_fp16")]; + tensor input_45_gamma_0_to_fp16 = const()[name = tensor("input_45_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108291456)))]; + tensor input_45_beta_0_to_fp16 = const()[name = tensor("input_45_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108293056)))]; + tensor input_45_epsilon_0_to_fp16 = const()[name = tensor("input_45_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_45_cast_fp16 = batch_norm(beta = input_45_beta_0_to_fp16, epsilon = input_45_epsilon_0_to_fp16, gamma = input_45_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor("input_45_cast_fp16")]; + tensor var_1612_pad_type_0 = const()[name = tensor("op_1612_pad_type_0"), val = tensor("valid")]; + tensor var_1612_strides_0 = const()[name = tensor("op_1612_strides_0"), val = tensor([1, 1])]; + tensor var_1612_pad_0 = const()[name = tensor("op_1612_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1612_dilations_0 = const()[name = tensor("op_1612_dilations_0"), val = tensor([1, 1])]; + tensor var_1612_groups_0 = const()[name = tensor("op_1612_groups_0"), val = tensor(1)]; + tensor layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108294656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109474368))), name = tensor("layers_4_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109474496)))]; + tensor var_1612_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_1612_dilations_0, groups = var_1612_groups_0, pad = var_1612_pad_0, pad_type = var_1612_pad_type_0, strides = var_1612_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_45_cast_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1618_pad_type_0 = const()[name = tensor("op_1618_pad_type_0"), val = tensor("valid")]; + tensor var_1618_strides_0 = const()[name = tensor("op_1618_strides_0"), val = tensor([1, 1])]; + tensor var_1618_pad_0 = const()[name = tensor("op_1618_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1618_dilations_0 = const()[name = tensor("op_1618_dilations_0"), val = tensor([1, 1])]; + tensor var_1618_groups_0 = const()[name = tensor("op_1618_groups_0"), val = tensor(1)]; + tensor layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109515712))), name = tensor("layers_4_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109480704))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1618_cast_fp16 = conv(dilations = var_1618_dilations_0, groups = var_1618_groups_0, pad = var_1618_pad_0, pad_type = var_1618_pad_type_0, strides = var_1618_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_45_cast_fp16)[name = tensor("op_1618_cast_fp16")]; + tensor input_47_cast_fp16 = add(x = var_1612_cast_fp16, y = var_1618_cast_fp16)[name = tensor("input_47_cast_fp16")]; + tensor input_49_mode_0 = const()[name = tensor("input_49_mode_0"), val = tensor("EXACT")]; + tensor input_49_cast_fp16 = gelu(mode = input_49_mode_0, x = input_47_cast_fp16)[name = tensor("input_49_cast_fp16")]; + tensor var_1629_pad_type_0 = const()[name = tensor("op_1629_pad_type_0"), val = tensor("valid")]; + tensor var_1629_strides_0 = const()[name = tensor("op_1629_strides_0"), val = tensor([1, 1])]; + tensor var_1629_pad_0 = const()[name = tensor("op_1629_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1629_dilations_0 = const()[name = tensor("op_1629_dilations_0"), val = tensor([1, 1])]; + tensor var_1629_groups_0 = const()[name = tensor("op_1629_groups_0"), val = tensor(1)]; + tensor layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109810688))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110990400))), name = tensor("layers_4_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110990528)))]; + tensor var_1629_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_1629_dilations_0, groups = var_1629_groups_0, pad = var_1629_pad_0, pad_type = var_1629_pad_type_0, strides = var_1629_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = tensor("op_1629_cast_fp16")]; + tensor var_1635_pad_type_0 = const()[name = tensor("op_1635_pad_type_0"), val = tensor("valid")]; + tensor var_1635_strides_0 = const()[name = tensor("op_1635_strides_0"), val = tensor([1, 1])]; + tensor var_1635_pad_0 = const()[name = tensor("op_1635_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1635_dilations_0 = const()[name = tensor("op_1635_dilations_0"), val = tensor([1, 1])]; + tensor var_1635_groups_0 = const()[name = tensor("op_1635_groups_0"), val = tensor(1)]; + tensor layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111023424))), name = tensor("layers_4_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110992128))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1635_cast_fp16 = conv(dilations = var_1635_dilations_0, groups = var_1635_groups_0, pad = var_1635_pad_0, pad_type = var_1635_pad_type_0, strides = var_1635_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = tensor("op_1635_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = var_1629_cast_fp16, y = var_1635_cast_fp16)[name = tensor("hidden_states_11_cast_fp16")]; + tensor inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor("inputs_31_cast_fp16")]; + tensor var_1647 = const()[name = tensor("op_1647"), val = tensor(3)]; + tensor out_31_axes_0 = const()[name = tensor("out_31_axes_0"), val = tensor([1])]; + tensor var_1672_to_fp16 = const()[name = tensor("op_1672_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1672_to_fp16, x = inputs_31_cast_fp16)[name = tensor("out_31_cast_fp16")]; + tensor obj_71_gamma_0_to_fp16 = const()[name = tensor("obj_71_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111318400)))]; + tensor obj_71_beta_0_to_fp16 = const()[name = tensor("obj_71_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111320000)))]; + tensor obj_71_epsilon_0_to_fp16 = const()[name = tensor("obj_71_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_71_cast_fp16 = batch_norm(beta = obj_71_beta_0_to_fp16, epsilon = obj_71_epsilon_0_to_fp16, gamma = obj_71_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor("obj_71_cast_fp16")]; + tensor var_1694_pad_type_0 = const()[name = tensor("op_1694_pad_type_0"), val = tensor("valid")]; + tensor var_1694_strides_0 = const()[name = tensor("op_1694_strides_0"), val = tensor([1, 1])]; + tensor var_1694_pad_0 = const()[name = tensor("op_1694_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1694_dilations_0 = const()[name = tensor("op_1694_dilations_0"), val = tensor([1, 1])]; + tensor var_1694_groups_0 = const()[name = tensor("op_1694_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111321600))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111616576))), name = tensor("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111616704)))]; + tensor var_1694_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1694_dilations_0, groups = var_1694_groups_0, pad = var_1694_pad_0, pad_type = var_1694_pad_type_0, strides = var_1694_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_71_cast_fp16)[name = tensor("op_1694_cast_fp16")]; + tensor var_1700_pad_type_0 = const()[name = tensor("op_1700_pad_type_0"), val = tensor("valid")]; + tensor var_1700_strides_0 = const()[name = tensor("op_1700_strides_0"), val = tensor([1, 1])]; + tensor var_1700_pad_0 = const()[name = tensor("op_1700_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1700_dilations_0 = const()[name = tensor("op_1700_dilations_0"), val = tensor([1, 1])]; + tensor var_1700_groups_0 = const()[name = tensor("op_1700_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111627456))), name = tensor("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111618304))), shape = tensor([768, 768, 1, 1])]; + tensor var_1700_cast_fp16 = conv(dilations = var_1700_dilations_0, groups = var_1700_groups_0, pad = var_1700_pad_0, pad_type = var_1700_pad_type_0, strides = var_1700_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_71_cast_fp16)[name = tensor("op_1700_cast_fp16")]; + tensor query_21_cast_fp16 = add(x = var_1694_cast_fp16, y = var_1700_cast_fp16)[name = tensor("query_21_cast_fp16")]; + tensor var_1709_pad_type_0 = const()[name = tensor("op_1709_pad_type_0"), val = tensor("valid")]; + tensor var_1709_strides_0 = const()[name = tensor("op_1709_strides_0"), val = tensor([1, 1])]; + tensor var_1709_pad_0 = const()[name = tensor("op_1709_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1709_dilations_0 = const()[name = tensor("op_1709_dilations_0"), val = tensor([1, 1])]; + tensor var_1709_groups_0 = const()[name = tensor("op_1709_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111701248))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111996224))), name = tensor("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1709_cast_fp16 = conv(dilations = var_1709_dilations_0, groups = var_1709_groups_0, pad = var_1709_pad_0, pad_type = var_1709_pad_type_0, strides = var_1709_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_71_cast_fp16)[name = tensor("op_1709_cast_fp16")]; + tensor var_1715_pad_type_0 = const()[name = tensor("op_1715_pad_type_0"), val = tensor("valid")]; + tensor var_1715_strides_0 = const()[name = tensor("op_1715_strides_0"), val = tensor([1, 1])]; + tensor var_1715_pad_0 = const()[name = tensor("op_1715_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1715_dilations_0 = const()[name = tensor("op_1715_dilations_0"), val = tensor([1, 1])]; + tensor var_1715_groups_0 = const()[name = tensor("op_1715_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112005696))), name = tensor("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111996352))), shape = tensor([768, 768, 1, 1])]; + tensor var_1715_cast_fp16 = conv(dilations = var_1715_dilations_0, groups = var_1715_groups_0, pad = var_1715_pad_0, pad_type = var_1715_pad_type_0, strides = var_1715_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_71_cast_fp16)[name = tensor("op_1715_cast_fp16")]; + tensor current_key_11_cast_fp16 = add(x = var_1709_cast_fp16, y = var_1715_cast_fp16)[name = tensor("current_key_11_cast_fp16")]; + tensor var_1725_pad_type_0 = const()[name = tensor("op_1725_pad_type_0"), val = tensor("valid")]; + tensor var_1725_strides_0 = const()[name = tensor("op_1725_strides_0"), val = tensor([1, 1])]; + tensor var_1725_pad_0 = const()[name = tensor("op_1725_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1725_dilations_0 = const()[name = tensor("op_1725_dilations_0"), val = tensor([1, 1])]; + tensor var_1725_groups_0 = const()[name = tensor("op_1725_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112079488))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112374464))), name = tensor("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112374592)))]; + tensor var_1725_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1725_dilations_0, groups = var_1725_groups_0, pad = var_1725_pad_0, pad_type = var_1725_pad_type_0, strides = var_1725_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_71_cast_fp16)[name = tensor("op_1725_cast_fp16")]; + tensor var_1731_pad_type_0 = const()[name = tensor("op_1731_pad_type_0"), val = tensor("valid")]; + tensor var_1731_strides_0 = const()[name = tensor("op_1731_strides_0"), val = tensor([1, 1])]; + tensor var_1731_pad_0 = const()[name = tensor("op_1731_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1731_dilations_0 = const()[name = tensor("op_1731_dilations_0"), val = tensor([1, 1])]; + tensor var_1731_groups_0 = const()[name = tensor("op_1731_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112382144))), name = tensor("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112376192))), shape = tensor([768, 768, 1, 1])]; + tensor var_1731_cast_fp16 = conv(dilations = var_1731_dilations_0, groups = var_1731_groups_0, pad = var_1731_pad_0, pad_type = var_1731_pad_type_0, strides = var_1731_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_71_cast_fp16)[name = tensor("op_1731_cast_fp16")]; + tensor current_value_11_cast_fp16 = add(x = var_1725_cast_fp16, y = var_1731_cast_fp16)[name = tensor("current_value_11_cast_fp16")]; + tensor var_1738_cast_fp16 = mul(x = var_69_cast_fp16_5, y = var_192_cast_fp16)[name = tensor("op_1738_cast_fp16")]; + tensor var_1739_cast_fp16 = mul(x = current_key_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_1739_cast_fp16")]; + tensor key_21_cast_fp16 = add(x = var_1738_cast_fp16, y = var_1739_cast_fp16)[name = tensor("key_21_cast_fp16")]; + tensor var_1742_cast_fp16 = mul(x = var_84_cast_fp16_5, y = var_192_cast_fp16)[name = tensor("op_1742_cast_fp16")]; + tensor var_1743_cast_fp16 = mul(x = current_value_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_1743_cast_fp16")]; + tensor value_21_cast_fp16 = add(x = var_1742_cast_fp16, y = var_1743_cast_fp16)[name = tensor("value_21_cast_fp16")]; + tensor var_1747 = const()[name = tensor("op_1747"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_21_cast_fp16 = reshape(shape = var_1747, x = query_21_cast_fp16)[name = tensor("mh_q_21_cast_fp16")]; + tensor var_1749_to_fp16 = const()[name = tensor("op_1749_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1750_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_1749_to_fp16)[name = tensor("op_1750_cast_fp16")]; + tensor var_1753 = const()[name = tensor("op_1753"), val = tensor([1, 12, 64, 448])]; + tensor var_1754_cast_fp16 = reshape(shape = var_1753, x = key_21_cast_fp16)[name = tensor("op_1754_cast_fp16")]; + tensor mh_w_31_transpose_x_0 = const()[name = tensor("mh_w_31_transpose_x_0"), val = tensor(true)]; + tensor mh_w_31_transpose_y_0 = const()[name = tensor("mh_w_31_transpose_y_0"), val = tensor(false)]; + tensor mh_w_31_cast_fp16 = matmul(transpose_x = mh_w_31_transpose_x_0, transpose_y = mh_w_31_transpose_y_0, x = var_1750_cast_fp16, y = var_1754_cast_fp16)[name = tensor("mh_w_31_cast_fp16")]; + tensor mh_w_33_cast_fp16 = add(x = mh_w_31_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_33_cast_fp16")]; + tensor var_1762_cast_fp16 = softmax(axis = var_1647, x = mh_w_33_cast_fp16)[name = tensor("op_1762_cast_fp16")]; + tensor var_1763 = const()[name = tensor("op_1763"), val = tensor([1, 12, 64, 448])]; + tensor var_1764_cast_fp16 = reshape(shape = var_1763, x = value_21_cast_fp16)[name = tensor("op_1764_cast_fp16")]; + tensor attn_21_transpose_x_0 = const()[name = tensor("attn_21_transpose_x_0"), val = tensor(false)]; + tensor attn_21_transpose_y_0 = const()[name = tensor("attn_21_transpose_y_0"), val = tensor(true)]; + tensor attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_1764_cast_fp16, y = var_1762_cast_fp16)[name = tensor("attn_21_cast_fp16")]; + tensor var_1767 = const()[name = tensor("op_1767"), val = tensor([1, 768, 1, 1])]; + tensor input_51_cast_fp16 = reshape(shape = var_1767, x = attn_21_cast_fp16)[name = tensor("input_51_cast_fp16")]; + tensor var_1777_pad_type_0 = const()[name = tensor("op_1777_pad_type_0"), val = tensor("valid")]; + tensor var_1777_strides_0 = const()[name = tensor("op_1777_strides_0"), val = tensor([1, 1])]; + tensor var_1777_pad_0 = const()[name = tensor("op_1777_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1777_dilations_0 = const()[name = tensor("op_1777_dilations_0"), val = tensor([1, 1])]; + tensor var_1777_groups_0 = const()[name = tensor("op_1777_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112455936))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112750912))), name = tensor("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112751040)))]; + tensor var_1777_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1777_dilations_0, groups = var_1777_groups_0, pad = var_1777_pad_0, pad_type = var_1777_pad_type_0, strides = var_1777_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = tensor("op_1777_cast_fp16")]; + tensor var_1783_pad_type_0 = const()[name = tensor("op_1783_pad_type_0"), val = tensor("valid")]; + tensor var_1783_strides_0 = const()[name = tensor("op_1783_strides_0"), val = tensor([1, 1])]; + tensor var_1783_pad_0 = const()[name = tensor("op_1783_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1783_dilations_0 = const()[name = tensor("op_1783_dilations_0"), val = tensor([1, 1])]; + tensor var_1783_groups_0 = const()[name = tensor("op_1783_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112759040))), name = tensor("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112752640))), shape = tensor([768, 768, 1, 1])]; + tensor var_1783_cast_fp16 = conv(dilations = var_1783_dilations_0, groups = var_1783_groups_0, pad = var_1783_pad_0, pad_type = var_1783_pad_type_0, strides = var_1783_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = tensor("op_1783_cast_fp16")]; + tensor obj_77_cast_fp16 = add(x = var_1777_cast_fp16, y = var_1783_cast_fp16)[name = tensor("obj_77_cast_fp16")]; + tensor inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = obj_77_cast_fp16)[name = tensor("inputs_33_cast_fp16")]; + tensor out_33_axes_0 = const()[name = tensor("out_33_axes_0"), val = tensor([1])]; + tensor var_1798_to_fp16 = const()[name = tensor("op_1798_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1798_to_fp16, x = inputs_33_cast_fp16)[name = tensor("out_33_cast_fp16")]; + tensor obj_79_gamma_0_to_fp16 = const()[name = tensor("obj_79_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112832832)))]; + tensor obj_79_beta_0_to_fp16 = const()[name = tensor("obj_79_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112834432)))]; + tensor obj_79_epsilon_0_to_fp16 = const()[name = tensor("obj_79_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_79_cast_fp16 = batch_norm(beta = obj_79_beta_0_to_fp16, epsilon = obj_79_epsilon_0_to_fp16, gamma = obj_79_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor("obj_79_cast_fp16")]; + tensor var_1820_pad_type_0 = const()[name = tensor("op_1820_pad_type_0"), val = tensor("valid")]; + tensor var_1820_strides_0 = const()[name = tensor("op_1820_strides_0"), val = tensor([1, 1])]; + tensor var_1820_pad_0 = const()[name = tensor("op_1820_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1820_dilations_0 = const()[name = tensor("op_1820_dilations_0"), val = tensor([1, 1])]; + tensor var_1820_groups_0 = const()[name = tensor("op_1820_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112836032))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113131008))), name = tensor("layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113131136)))]; + tensor var_1820_cast_fp16 = conv(bias = layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1820_dilations_0, groups = var_1820_groups_0, pad = var_1820_pad_0, pad_type = var_1820_pad_type_0, strides = var_1820_strides_0, weight = layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_79_cast_fp16)[name = tensor("op_1820_cast_fp16")]; + tensor var_1826_pad_type_0 = const()[name = tensor("op_1826_pad_type_0"), val = tensor("valid")]; + tensor var_1826_strides_0 = const()[name = tensor("op_1826_strides_0"), val = tensor([1, 1])]; + tensor var_1826_pad_0 = const()[name = tensor("op_1826_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1826_dilations_0 = const()[name = tensor("op_1826_dilations_0"), val = tensor([1, 1])]; + tensor var_1826_groups_0 = const()[name = tensor("op_1826_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113142080))), name = tensor("layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113132736))), shape = tensor([768, 768, 1, 1])]; + tensor var_1826_cast_fp16 = conv(dilations = var_1826_dilations_0, groups = var_1826_groups_0, pad = var_1826_pad_0, pad_type = var_1826_pad_type_0, strides = var_1826_strides_0, weight = layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_79_cast_fp16)[name = tensor("op_1826_cast_fp16")]; + tensor query_23_cast_fp16 = add(x = var_1820_cast_fp16, y = var_1826_cast_fp16)[name = tensor("query_23_cast_fp16")]; + tensor var_1835_pad_type_0 = const()[name = tensor("op_1835_pad_type_0"), val = tensor("valid")]; + tensor var_1835_strides_0 = const()[name = tensor("op_1835_strides_0"), val = tensor([1, 1])]; + tensor var_1835_pad_0 = const()[name = tensor("op_1835_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1835_dilations_0 = const()[name = tensor("op_1835_dilations_0"), val = tensor([1, 1])]; + tensor var_1835_groups_0 = const()[name = tensor("op_1835_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113215872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113510848))), name = tensor("layers_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1835_cast_fp16 = conv(dilations = var_1835_dilations_0, groups = var_1835_groups_0, pad = var_1835_pad_0, pad_type = var_1835_pad_type_0, strides = var_1835_strides_0, weight = layers_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_1835_cast_fp16")]; + tensor var_1841_pad_type_0 = const()[name = tensor("op_1841_pad_type_0"), val = tensor("valid")]; + tensor var_1841_strides_0 = const()[name = tensor("op_1841_strides_0"), val = tensor([1, 1])]; + tensor var_1841_pad_0 = const()[name = tensor("op_1841_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1841_dilations_0 = const()[name = tensor("op_1841_dilations_0"), val = tensor([1, 1])]; + tensor var_1841_groups_0 = const()[name = tensor("op_1841_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113521344))), name = tensor("layers_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113510976))), shape = tensor([768, 768, 1, 1])]; + tensor var_1841_cast_fp16 = conv(dilations = var_1841_dilations_0, groups = var_1841_groups_0, pad = var_1841_pad_0, pad_type = var_1841_pad_type_0, strides = var_1841_strides_0, weight = layers_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_1841_cast_fp16")]; + tensor key_23_cast_fp16 = add(x = var_1835_cast_fp16, y = var_1841_cast_fp16)[name = tensor("key_23_cast_fp16")]; + tensor var_1851_pad_type_0 = const()[name = tensor("op_1851_pad_type_0"), val = tensor("valid")]; + tensor var_1851_strides_0 = const()[name = tensor("op_1851_strides_0"), val = tensor([1, 1])]; + tensor var_1851_pad_0 = const()[name = tensor("op_1851_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1851_dilations_0 = const()[name = tensor("op_1851_dilations_0"), val = tensor([1, 1])]; + tensor var_1851_groups_0 = const()[name = tensor("op_1851_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113595136))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113890112))), name = tensor("layers_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113890240)))]; + tensor var_1851_cast_fp16 = conv(bias = layers_5_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1851_dilations_0, groups = var_1851_groups_0, pad = var_1851_pad_0, pad_type = var_1851_pad_type_0, strides = var_1851_strides_0, weight = layers_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_1851_cast_fp16")]; + tensor var_1857_pad_type_0 = const()[name = tensor("op_1857_pad_type_0"), val = tensor("valid")]; + tensor var_1857_strides_0 = const()[name = tensor("op_1857_strides_0"), val = tensor([1, 1])]; + tensor var_1857_pad_0 = const()[name = tensor("op_1857_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1857_dilations_0 = const()[name = tensor("op_1857_dilations_0"), val = tensor([1, 1])]; + tensor var_1857_groups_0 = const()[name = tensor("op_1857_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113899136))), name = tensor("layers_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113891840))), shape = tensor([768, 768, 1, 1])]; + tensor var_1857_cast_fp16 = conv(dilations = var_1857_dilations_0, groups = var_1857_groups_0, pad = var_1857_pad_0, pad_type = var_1857_pad_type_0, strides = var_1857_strides_0, weight = layers_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_1857_cast_fp16")]; + tensor value_23_cast_fp16 = add(x = var_1851_cast_fp16, y = var_1857_cast_fp16)[name = tensor("value_23_cast_fp16")]; + tensor var_1861 = const()[name = tensor("op_1861"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_23_cast_fp16 = reshape(shape = var_1861, x = query_23_cast_fp16)[name = tensor("mh_q_23_cast_fp16")]; + tensor var_1863_to_fp16 = const()[name = tensor("op_1863_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1864_cast_fp16 = mul(x = mh_q_23_cast_fp16, y = var_1863_to_fp16)[name = tensor("op_1864_cast_fp16")]; + tensor var_1867 = const()[name = tensor("op_1867"), val = tensor([1, 12, 64, 1500])]; + tensor var_1868_cast_fp16 = reshape(shape = var_1867, x = key_23_cast_fp16)[name = tensor("op_1868_cast_fp16")]; + tensor mh_w_35_transpose_x_0 = const()[name = tensor("mh_w_35_transpose_x_0"), val = tensor(true)]; + tensor mh_w_35_transpose_y_0 = const()[name = tensor("mh_w_35_transpose_y_0"), val = tensor(false)]; + tensor mh_w_35_cast_fp16 = matmul(transpose_x = mh_w_35_transpose_x_0, transpose_y = mh_w_35_transpose_y_0, x = var_1864_cast_fp16, y = var_1868_cast_fp16)[name = tensor("mh_w_35_cast_fp16")]; + tensor obj_83_cast_fp16 = softmax(axis = var_1647, x = mh_w_35_cast_fp16)[name = tensor("obj_83_cast_fp16")]; + tensor var_1872 = const()[name = tensor("op_1872"), val = tensor([1, 12, 64, 1500])]; + tensor var_1873_cast_fp16 = reshape(shape = var_1872, x = value_23_cast_fp16)[name = tensor("op_1873_cast_fp16")]; + tensor attn_23_transpose_x_0 = const()[name = tensor("attn_23_transpose_x_0"), val = tensor(false)]; + tensor attn_23_transpose_y_0 = const()[name = tensor("attn_23_transpose_y_0"), val = tensor(true)]; + tensor attn_23_cast_fp16 = matmul(transpose_x = attn_23_transpose_x_0, transpose_y = attn_23_transpose_y_0, x = var_1873_cast_fp16, y = obj_83_cast_fp16)[name = tensor("attn_23_cast_fp16")]; + tensor var_1876 = const()[name = tensor("op_1876"), val = tensor([1, 768, 1, 1])]; + tensor input_53_cast_fp16 = reshape(shape = var_1876, x = attn_23_cast_fp16)[name = tensor("input_53_cast_fp16")]; + tensor var_1886_pad_type_0 = const()[name = tensor("op_1886_pad_type_0"), val = tensor("valid")]; + tensor var_1886_strides_0 = const()[name = tensor("op_1886_strides_0"), val = tensor([1, 1])]; + tensor var_1886_pad_0 = const()[name = tensor("op_1886_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1886_dilations_0 = const()[name = tensor("op_1886_dilations_0"), val = tensor([1, 1])]; + tensor var_1886_groups_0 = const()[name = tensor("op_1886_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113972928))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114267904))), name = tensor("layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114268032)))]; + tensor var_1886_cast_fp16 = conv(bias = layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1886_dilations_0, groups = var_1886_groups_0, pad = var_1886_pad_0, pad_type = var_1886_pad_type_0, strides = var_1886_strides_0, weight = layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_53_cast_fp16)[name = tensor("op_1886_cast_fp16")]; + tensor var_1892_pad_type_0 = const()[name = tensor("op_1892_pad_type_0"), val = tensor("valid")]; + tensor var_1892_strides_0 = const()[name = tensor("op_1892_strides_0"), val = tensor([1, 1])]; + tensor var_1892_pad_0 = const()[name = tensor("op_1892_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1892_dilations_0 = const()[name = tensor("op_1892_dilations_0"), val = tensor([1, 1])]; + tensor var_1892_groups_0 = const()[name = tensor("op_1892_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114276608))), name = tensor("layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114269632))), shape = tensor([768, 768, 1, 1])]; + tensor var_1892_cast_fp16 = conv(dilations = var_1892_dilations_0, groups = var_1892_groups_0, pad = var_1892_pad_0, pad_type = var_1892_pad_type_0, strides = var_1892_strides_0, weight = layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_53_cast_fp16)[name = tensor("op_1892_cast_fp16")]; + tensor obj_81_cast_fp16 = add(x = var_1886_cast_fp16, y = var_1892_cast_fp16)[name = tensor("obj_81_cast_fp16")]; + tensor inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_81_cast_fp16)[name = tensor("inputs_35_cast_fp16")]; + tensor out_35_axes_0 = const()[name = tensor("out_35_axes_0"), val = tensor([1])]; + tensor var_1903_to_fp16 = const()[name = tensor("op_1903_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1903_to_fp16, x = inputs_35_cast_fp16)[name = tensor("out_35_cast_fp16")]; + tensor input_55_gamma_0_to_fp16 = const()[name = tensor("input_55_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114350400)))]; + tensor input_55_beta_0_to_fp16 = const()[name = tensor("input_55_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114352000)))]; + tensor input_55_epsilon_0_to_fp16 = const()[name = tensor("input_55_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_55_cast_fp16 = batch_norm(beta = input_55_beta_0_to_fp16, epsilon = input_55_epsilon_0_to_fp16, gamma = input_55_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor("input_55_cast_fp16")]; + tensor var_1921_pad_type_0 = const()[name = tensor("op_1921_pad_type_0"), val = tensor("valid")]; + tensor var_1921_strides_0 = const()[name = tensor("op_1921_strides_0"), val = tensor([1, 1])]; + tensor var_1921_pad_0 = const()[name = tensor("op_1921_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1921_dilations_0 = const()[name = tensor("op_1921_dilations_0"), val = tensor([1, 1])]; + tensor var_1921_groups_0 = const()[name = tensor("op_1921_groups_0"), val = tensor(1)]; + tensor layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114353600))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115533312))), name = tensor("layers_5_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115533440)))]; + tensor var_1921_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_1921_dilations_0, groups = var_1921_groups_0, pad = var_1921_pad_0, pad_type = var_1921_pad_type_0, strides = var_1921_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = tensor("op_1921_cast_fp16")]; + tensor var_1927_pad_type_0 = const()[name = tensor("op_1927_pad_type_0"), val = tensor("valid")]; + tensor var_1927_strides_0 = const()[name = tensor("op_1927_strides_0"), val = tensor([1, 1])]; + tensor var_1927_pad_0 = const()[name = tensor("op_1927_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1927_dilations_0 = const()[name = tensor("op_1927_dilations_0"), val = tensor([1, 1])]; + tensor var_1927_groups_0 = const()[name = tensor("op_1927_groups_0"), val = tensor(1)]; + tensor layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115561024))), name = tensor("layers_5_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115539648))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1927_cast_fp16 = conv(dilations = var_1927_dilations_0, groups = var_1927_groups_0, pad = var_1927_pad_0, pad_type = var_1927_pad_type_0, strides = var_1927_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = tensor("op_1927_cast_fp16")]; + tensor input_57_cast_fp16 = add(x = var_1921_cast_fp16, y = var_1927_cast_fp16)[name = tensor("input_57_cast_fp16")]; + tensor input_59_mode_0 = const()[name = tensor("input_59_mode_0"), val = tensor("EXACT")]; + tensor input_59_cast_fp16 = gelu(mode = input_59_mode_0, x = input_57_cast_fp16)[name = tensor("input_59_cast_fp16")]; + tensor var_1938_pad_type_0 = const()[name = tensor("op_1938_pad_type_0"), val = tensor("valid")]; + tensor var_1938_strides_0 = const()[name = tensor("op_1938_strides_0"), val = tensor([1, 1])]; + tensor var_1938_pad_0 = const()[name = tensor("op_1938_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1938_dilations_0 = const()[name = tensor("op_1938_dilations_0"), val = tensor([1, 1])]; + tensor var_1938_groups_0 = const()[name = tensor("op_1938_groups_0"), val = tensor(1)]; + tensor layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115856000))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117035712))), name = tensor("layers_5_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117035840)))]; + tensor var_1938_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_1938_dilations_0, groups = var_1938_groups_0, pad = var_1938_pad_0, pad_type = var_1938_pad_type_0, strides = var_1938_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = tensor("op_1938_cast_fp16")]; + tensor var_1944_pad_type_0 = const()[name = tensor("op_1944_pad_type_0"), val = tensor("valid")]; + tensor var_1944_strides_0 = const()[name = tensor("op_1944_strides_0"), val = tensor([1, 1])]; + tensor var_1944_pad_0 = const()[name = tensor("op_1944_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1944_dilations_0 = const()[name = tensor("op_1944_dilations_0"), val = tensor([1, 1])]; + tensor var_1944_groups_0 = const()[name = tensor("op_1944_groups_0"), val = tensor(1)]; + tensor layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117059648))), name = tensor("layers_5_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117037440))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1944_cast_fp16 = conv(dilations = var_1944_dilations_0, groups = var_1944_groups_0, pad = var_1944_pad_0, pad_type = var_1944_pad_type_0, strides = var_1944_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = tensor("op_1944_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = var_1938_cast_fp16, y = var_1944_cast_fp16)[name = tensor("hidden_states_13_cast_fp16")]; + tensor inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor("inputs_37_cast_fp16")]; + tensor var_1956 = const()[name = tensor("op_1956"), val = tensor(3)]; + tensor out_37_axes_0 = const()[name = tensor("out_37_axes_0"), val = tensor([1])]; + tensor var_1981_to_fp16 = const()[name = tensor("op_1981_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1981_to_fp16, x = inputs_37_cast_fp16)[name = tensor("out_37_cast_fp16")]; + tensor obj_85_gamma_0_to_fp16 = const()[name = tensor("obj_85_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117354624)))]; + tensor obj_85_beta_0_to_fp16 = const()[name = tensor("obj_85_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117356224)))]; + tensor obj_85_epsilon_0_to_fp16 = const()[name = tensor("obj_85_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_85_cast_fp16 = batch_norm(beta = obj_85_beta_0_to_fp16, epsilon = obj_85_epsilon_0_to_fp16, gamma = obj_85_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = tensor("obj_85_cast_fp16")]; + tensor var_2003_pad_type_0 = const()[name = tensor("op_2003_pad_type_0"), val = tensor("valid")]; + tensor var_2003_strides_0 = const()[name = tensor("op_2003_strides_0"), val = tensor([1, 1])]; + tensor var_2003_pad_0 = const()[name = tensor("op_2003_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2003_dilations_0 = const()[name = tensor("op_2003_dilations_0"), val = tensor([1, 1])]; + tensor var_2003_groups_0 = const()[name = tensor("op_2003_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117357824))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117652800))), name = tensor("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117652928)))]; + tensor var_2003_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2003_dilations_0, groups = var_2003_groups_0, pad = var_2003_pad_0, pad_type = var_2003_pad_type_0, strides = var_2003_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = tensor("op_2003_cast_fp16")]; + tensor var_2009_pad_type_0 = const()[name = tensor("op_2009_pad_type_0"), val = tensor("valid")]; + tensor var_2009_strides_0 = const()[name = tensor("op_2009_strides_0"), val = tensor([1, 1])]; + tensor var_2009_pad_0 = const()[name = tensor("op_2009_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2009_dilations_0 = const()[name = tensor("op_2009_dilations_0"), val = tensor([1, 1])]; + tensor var_2009_groups_0 = const()[name = tensor("op_2009_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117662848))), name = tensor("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117654528))), shape = tensor([768, 768, 1, 1])]; + tensor var_2009_cast_fp16 = conv(dilations = var_2009_dilations_0, groups = var_2009_groups_0, pad = var_2009_pad_0, pad_type = var_2009_pad_type_0, strides = var_2009_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = tensor("op_2009_cast_fp16")]; + tensor query_25_cast_fp16 = add(x = var_2003_cast_fp16, y = var_2009_cast_fp16)[name = tensor("query_25_cast_fp16")]; + tensor var_2018_pad_type_0 = const()[name = tensor("op_2018_pad_type_0"), val = tensor("valid")]; + tensor var_2018_strides_0 = const()[name = tensor("op_2018_strides_0"), val = tensor([1, 1])]; + tensor var_2018_pad_0 = const()[name = tensor("op_2018_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2018_dilations_0 = const()[name = tensor("op_2018_dilations_0"), val = tensor([1, 1])]; + tensor var_2018_groups_0 = const()[name = tensor("op_2018_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117736640))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118031616))), name = tensor("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2018_cast_fp16 = conv(dilations = var_2018_dilations_0, groups = var_2018_groups_0, pad = var_2018_pad_0, pad_type = var_2018_pad_type_0, strides = var_2018_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = tensor("op_2018_cast_fp16")]; + tensor var_2024_pad_type_0 = const()[name = tensor("op_2024_pad_type_0"), val = tensor("valid")]; + tensor var_2024_strides_0 = const()[name = tensor("op_2024_strides_0"), val = tensor([1, 1])]; + tensor var_2024_pad_0 = const()[name = tensor("op_2024_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2024_dilations_0 = const()[name = tensor("op_2024_dilations_0"), val = tensor([1, 1])]; + tensor var_2024_groups_0 = const()[name = tensor("op_2024_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118040256))), name = tensor("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118031744))), shape = tensor([768, 768, 1, 1])]; + tensor var_2024_cast_fp16 = conv(dilations = var_2024_dilations_0, groups = var_2024_groups_0, pad = var_2024_pad_0, pad_type = var_2024_pad_type_0, strides = var_2024_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = tensor("op_2024_cast_fp16")]; + tensor current_key_13_cast_fp16 = add(x = var_2018_cast_fp16, y = var_2024_cast_fp16)[name = tensor("current_key_13_cast_fp16")]; + tensor var_2034_pad_type_0 = const()[name = tensor("op_2034_pad_type_0"), val = tensor("valid")]; + tensor var_2034_strides_0 = const()[name = tensor("op_2034_strides_0"), val = tensor([1, 1])]; + tensor var_2034_pad_0 = const()[name = tensor("op_2034_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2034_dilations_0 = const()[name = tensor("op_2034_dilations_0"), val = tensor([1, 1])]; + tensor var_2034_groups_0 = const()[name = tensor("op_2034_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118114048))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118409024))), name = tensor("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118409152)))]; + tensor var_2034_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2034_dilations_0, groups = var_2034_groups_0, pad = var_2034_pad_0, pad_type = var_2034_pad_type_0, strides = var_2034_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = tensor("op_2034_cast_fp16")]; + tensor var_2040_pad_type_0 = const()[name = tensor("op_2040_pad_type_0"), val = tensor("valid")]; + tensor var_2040_strides_0 = const()[name = tensor("op_2040_strides_0"), val = tensor([1, 1])]; + tensor var_2040_pad_0 = const()[name = tensor("op_2040_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2040_dilations_0 = const()[name = tensor("op_2040_dilations_0"), val = tensor([1, 1])]; + tensor var_2040_groups_0 = const()[name = tensor("op_2040_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118416000))), name = tensor("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118410752))), shape = tensor([768, 768, 1, 1])]; + tensor var_2040_cast_fp16 = conv(dilations = var_2040_dilations_0, groups = var_2040_groups_0, pad = var_2040_pad_0, pad_type = var_2040_pad_type_0, strides = var_2040_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = tensor("op_2040_cast_fp16")]; + tensor current_value_13_cast_fp16 = add(x = var_2034_cast_fp16, y = var_2040_cast_fp16)[name = tensor("current_value_13_cast_fp16")]; + tensor var_2047_cast_fp16 = mul(x = var_69_cast_fp16_6, y = var_192_cast_fp16)[name = tensor("op_2047_cast_fp16")]; + tensor var_2048_cast_fp16 = mul(x = current_key_13_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2048_cast_fp16")]; + tensor key_25_cast_fp16 = add(x = var_2047_cast_fp16, y = var_2048_cast_fp16)[name = tensor("key_25_cast_fp16")]; + tensor var_2051_cast_fp16 = mul(x = var_84_cast_fp16_6, y = var_192_cast_fp16)[name = tensor("op_2051_cast_fp16")]; + tensor var_2052_cast_fp16 = mul(x = current_value_13_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2052_cast_fp16")]; + tensor value_25_cast_fp16 = add(x = var_2051_cast_fp16, y = var_2052_cast_fp16)[name = tensor("value_25_cast_fp16")]; + tensor var_2056 = const()[name = tensor("op_2056"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_25_cast_fp16 = reshape(shape = var_2056, x = query_25_cast_fp16)[name = tensor("mh_q_25_cast_fp16")]; + tensor var_2058_to_fp16 = const()[name = tensor("op_2058_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2059_cast_fp16 = mul(x = mh_q_25_cast_fp16, y = var_2058_to_fp16)[name = tensor("op_2059_cast_fp16")]; + tensor var_2062 = const()[name = tensor("op_2062"), val = tensor([1, 12, 64, 448])]; + tensor var_2063_cast_fp16 = reshape(shape = var_2062, x = key_25_cast_fp16)[name = tensor("op_2063_cast_fp16")]; + tensor mh_w_37_transpose_x_0 = const()[name = tensor("mh_w_37_transpose_x_0"), val = tensor(true)]; + tensor mh_w_37_transpose_y_0 = const()[name = tensor("mh_w_37_transpose_y_0"), val = tensor(false)]; + tensor mh_w_37_cast_fp16 = matmul(transpose_x = mh_w_37_transpose_x_0, transpose_y = mh_w_37_transpose_y_0, x = var_2059_cast_fp16, y = var_2063_cast_fp16)[name = tensor("mh_w_37_cast_fp16")]; + tensor mh_w_39_cast_fp16 = add(x = mh_w_37_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_39_cast_fp16")]; + tensor var_2071_cast_fp16 = softmax(axis = var_1956, x = mh_w_39_cast_fp16)[name = tensor("op_2071_cast_fp16")]; + tensor var_2072 = const()[name = tensor("op_2072"), val = tensor([1, 12, 64, 448])]; + tensor var_2073_cast_fp16 = reshape(shape = var_2072, x = value_25_cast_fp16)[name = tensor("op_2073_cast_fp16")]; + tensor attn_25_transpose_x_0 = const()[name = tensor("attn_25_transpose_x_0"), val = tensor(false)]; + tensor attn_25_transpose_y_0 = const()[name = tensor("attn_25_transpose_y_0"), val = tensor(true)]; + tensor attn_25_cast_fp16 = matmul(transpose_x = attn_25_transpose_x_0, transpose_y = attn_25_transpose_y_0, x = var_2073_cast_fp16, y = var_2071_cast_fp16)[name = tensor("attn_25_cast_fp16")]; + tensor var_2076 = const()[name = tensor("op_2076"), val = tensor([1, 768, 1, 1])]; + tensor input_61_cast_fp16 = reshape(shape = var_2076, x = attn_25_cast_fp16)[name = tensor("input_61_cast_fp16")]; + tensor var_2086_pad_type_0 = const()[name = tensor("op_2086_pad_type_0"), val = tensor("valid")]; + tensor var_2086_strides_0 = const()[name = tensor("op_2086_strides_0"), val = tensor([1, 1])]; + tensor var_2086_pad_0 = const()[name = tensor("op_2086_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2086_dilations_0 = const()[name = tensor("op_2086_dilations_0"), val = tensor([1, 1])]; + tensor var_2086_groups_0 = const()[name = tensor("op_2086_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118489792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118784768))), name = tensor("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118784896)))]; + tensor var_2086_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2086_dilations_0, groups = var_2086_groups_0, pad = var_2086_pad_0, pad_type = var_2086_pad_type_0, strides = var_2086_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_61_cast_fp16)[name = tensor("op_2086_cast_fp16")]; + tensor var_2092_pad_type_0 = const()[name = tensor("op_2092_pad_type_0"), val = tensor("valid")]; + tensor var_2092_strides_0 = const()[name = tensor("op_2092_strides_0"), val = tensor([1, 1])]; + tensor var_2092_pad_0 = const()[name = tensor("op_2092_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2092_dilations_0 = const()[name = tensor("op_2092_dilations_0"), val = tensor([1, 1])]; + tensor var_2092_groups_0 = const()[name = tensor("op_2092_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118792384))), name = tensor("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118786496))), shape = tensor([768, 768, 1, 1])]; + tensor var_2092_cast_fp16 = conv(dilations = var_2092_dilations_0, groups = var_2092_groups_0, pad = var_2092_pad_0, pad_type = var_2092_pad_type_0, strides = var_2092_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_61_cast_fp16)[name = tensor("op_2092_cast_fp16")]; + tensor obj_91_cast_fp16 = add(x = var_2086_cast_fp16, y = var_2092_cast_fp16)[name = tensor("obj_91_cast_fp16")]; + tensor inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_91_cast_fp16)[name = tensor("inputs_39_cast_fp16")]; + tensor out_39_axes_0 = const()[name = tensor("out_39_axes_0"), val = tensor([1])]; + tensor var_2107_to_fp16 = const()[name = tensor("op_2107_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_2107_to_fp16, x = inputs_39_cast_fp16)[name = tensor("out_39_cast_fp16")]; + tensor obj_93_gamma_0_to_fp16 = const()[name = tensor("obj_93_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118866176)))]; + tensor obj_93_beta_0_to_fp16 = const()[name = tensor("obj_93_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118867776)))]; + tensor obj_93_epsilon_0_to_fp16 = const()[name = tensor("obj_93_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_93_cast_fp16 = batch_norm(beta = obj_93_beta_0_to_fp16, epsilon = obj_93_epsilon_0_to_fp16, gamma = obj_93_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = tensor("obj_93_cast_fp16")]; + tensor var_2129_pad_type_0 = const()[name = tensor("op_2129_pad_type_0"), val = tensor("valid")]; + tensor var_2129_strides_0 = const()[name = tensor("op_2129_strides_0"), val = tensor([1, 1])]; + tensor var_2129_pad_0 = const()[name = tensor("op_2129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2129_dilations_0 = const()[name = tensor("op_2129_dilations_0"), val = tensor([1, 1])]; + tensor var_2129_groups_0 = const()[name = tensor("op_2129_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118869376))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119164352))), name = tensor("layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119164480)))]; + tensor var_2129_cast_fp16 = conv(bias = layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2129_dilations_0, groups = var_2129_groups_0, pad = var_2129_pad_0, pad_type = var_2129_pad_type_0, strides = var_2129_strides_0, weight = layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_93_cast_fp16)[name = tensor("op_2129_cast_fp16")]; + tensor var_2135_pad_type_0 = const()[name = tensor("op_2135_pad_type_0"), val = tensor("valid")]; + tensor var_2135_strides_0 = const()[name = tensor("op_2135_strides_0"), val = tensor([1, 1])]; + tensor var_2135_pad_0 = const()[name = tensor("op_2135_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2135_dilations_0 = const()[name = tensor("op_2135_dilations_0"), val = tensor([1, 1])]; + tensor var_2135_groups_0 = const()[name = tensor("op_2135_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119172928))), name = tensor("layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119166080))), shape = tensor([768, 768, 1, 1])]; + tensor var_2135_cast_fp16 = conv(dilations = var_2135_dilations_0, groups = var_2135_groups_0, pad = var_2135_pad_0, pad_type = var_2135_pad_type_0, strides = var_2135_strides_0, weight = layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_93_cast_fp16)[name = tensor("op_2135_cast_fp16")]; + tensor query_27_cast_fp16 = add(x = var_2129_cast_fp16, y = var_2135_cast_fp16)[name = tensor("query_27_cast_fp16")]; + tensor var_2144_pad_type_0 = const()[name = tensor("op_2144_pad_type_0"), val = tensor("valid")]; + tensor var_2144_strides_0 = const()[name = tensor("op_2144_strides_0"), val = tensor([1, 1])]; + tensor var_2144_pad_0 = const()[name = tensor("op_2144_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2144_dilations_0 = const()[name = tensor("op_2144_dilations_0"), val = tensor([1, 1])]; + tensor var_2144_groups_0 = const()[name = tensor("op_2144_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119246720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119541696))), name = tensor("layers_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2144_cast_fp16 = conv(dilations = var_2144_dilations_0, groups = var_2144_groups_0, pad = var_2144_pad_0, pad_type = var_2144_pad_type_0, strides = var_2144_strides_0, weight = layers_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_2144_cast_fp16")]; + tensor var_2150_pad_type_0 = const()[name = tensor("op_2150_pad_type_0"), val = tensor("valid")]; + tensor var_2150_strides_0 = const()[name = tensor("op_2150_strides_0"), val = tensor([1, 1])]; + tensor var_2150_pad_0 = const()[name = tensor("op_2150_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2150_dilations_0 = const()[name = tensor("op_2150_dilations_0"), val = tensor([1, 1])]; + tensor var_2150_groups_0 = const()[name = tensor("op_2150_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119549312))), name = tensor("layers_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119541824))), shape = tensor([768, 768, 1, 1])]; + tensor var_2150_cast_fp16 = conv(dilations = var_2150_dilations_0, groups = var_2150_groups_0, pad = var_2150_pad_0, pad_type = var_2150_pad_type_0, strides = var_2150_strides_0, weight = layers_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_2150_cast_fp16")]; + tensor key_27_cast_fp16 = add(x = var_2144_cast_fp16, y = var_2150_cast_fp16)[name = tensor("key_27_cast_fp16")]; + tensor var_2160_pad_type_0 = const()[name = tensor("op_2160_pad_type_0"), val = tensor("valid")]; + tensor var_2160_strides_0 = const()[name = tensor("op_2160_strides_0"), val = tensor([1, 1])]; + tensor var_2160_pad_0 = const()[name = tensor("op_2160_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2160_dilations_0 = const()[name = tensor("op_2160_dilations_0"), val = tensor([1, 1])]; + tensor var_2160_groups_0 = const()[name = tensor("op_2160_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119623104))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119918080))), name = tensor("layers_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119918208)))]; + tensor var_2160_cast_fp16 = conv(bias = layers_6_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2160_dilations_0, groups = var_2160_groups_0, pad = var_2160_pad_0, pad_type = var_2160_pad_type_0, strides = var_2160_strides_0, weight = layers_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_2160_cast_fp16")]; + tensor var_2166_pad_type_0 = const()[name = tensor("op_2166_pad_type_0"), val = tensor("valid")]; + tensor var_2166_strides_0 = const()[name = tensor("op_2166_strides_0"), val = tensor([1, 1])]; + tensor var_2166_pad_0 = const()[name = tensor("op_2166_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2166_dilations_0 = const()[name = tensor("op_2166_dilations_0"), val = tensor([1, 1])]; + tensor var_2166_groups_0 = const()[name = tensor("op_2166_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119926080))), name = tensor("layers_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119919808))), shape = tensor([768, 768, 1, 1])]; + tensor var_2166_cast_fp16 = conv(dilations = var_2166_dilations_0, groups = var_2166_groups_0, pad = var_2166_pad_0, pad_type = var_2166_pad_type_0, strides = var_2166_strides_0, weight = layers_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_2166_cast_fp16")]; + tensor value_27_cast_fp16 = add(x = var_2160_cast_fp16, y = var_2166_cast_fp16)[name = tensor("value_27_cast_fp16")]; + tensor var_2170 = const()[name = tensor("op_2170"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_27_cast_fp16 = reshape(shape = var_2170, x = query_27_cast_fp16)[name = tensor("mh_q_27_cast_fp16")]; + tensor var_2172_to_fp16 = const()[name = tensor("op_2172_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2173_cast_fp16 = mul(x = mh_q_27_cast_fp16, y = var_2172_to_fp16)[name = tensor("op_2173_cast_fp16")]; + tensor var_2176 = const()[name = tensor("op_2176"), val = tensor([1, 12, 64, 1500])]; + tensor var_2177_cast_fp16 = reshape(shape = var_2176, x = key_27_cast_fp16)[name = tensor("op_2177_cast_fp16")]; + tensor mh_w_41_transpose_x_0 = const()[name = tensor("mh_w_41_transpose_x_0"), val = tensor(true)]; + tensor mh_w_41_transpose_y_0 = const()[name = tensor("mh_w_41_transpose_y_0"), val = tensor(false)]; + tensor mh_w_41_cast_fp16 = matmul(transpose_x = mh_w_41_transpose_x_0, transpose_y = mh_w_41_transpose_y_0, x = var_2173_cast_fp16, y = var_2177_cast_fp16)[name = tensor("mh_w_41_cast_fp16")]; + tensor obj_97_cast_fp16 = softmax(axis = var_1956, x = mh_w_41_cast_fp16)[name = tensor("obj_97_cast_fp16")]; + tensor var_2181 = const()[name = tensor("op_2181"), val = tensor([1, 12, 64, 1500])]; + tensor var_2182_cast_fp16 = reshape(shape = var_2181, x = value_27_cast_fp16)[name = tensor("op_2182_cast_fp16")]; + tensor attn_27_transpose_x_0 = const()[name = tensor("attn_27_transpose_x_0"), val = tensor(false)]; + tensor attn_27_transpose_y_0 = const()[name = tensor("attn_27_transpose_y_0"), val = tensor(true)]; + tensor attn_27_cast_fp16 = matmul(transpose_x = attn_27_transpose_x_0, transpose_y = attn_27_transpose_y_0, x = var_2182_cast_fp16, y = obj_97_cast_fp16)[name = tensor("attn_27_cast_fp16")]; + tensor var_2185 = const()[name = tensor("op_2185"), val = tensor([1, 768, 1, 1])]; + tensor input_63_cast_fp16 = reshape(shape = var_2185, x = attn_27_cast_fp16)[name = tensor("input_63_cast_fp16")]; + tensor var_2195_pad_type_0 = const()[name = tensor("op_2195_pad_type_0"), val = tensor("valid")]; + tensor var_2195_strides_0 = const()[name = tensor("op_2195_strides_0"), val = tensor([1, 1])]; + tensor var_2195_pad_0 = const()[name = tensor("op_2195_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2195_dilations_0 = const()[name = tensor("op_2195_dilations_0"), val = tensor([1, 1])]; + tensor var_2195_groups_0 = const()[name = tensor("op_2195_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119999872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120294848))), name = tensor("layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120294976)))]; + tensor var_2195_cast_fp16 = conv(bias = layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2195_dilations_0, groups = var_2195_groups_0, pad = var_2195_pad_0, pad_type = var_2195_pad_type_0, strides = var_2195_strides_0, weight = layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = tensor("op_2195_cast_fp16")]; + tensor var_2201_pad_type_0 = const()[name = tensor("op_2201_pad_type_0"), val = tensor("valid")]; + tensor var_2201_strides_0 = const()[name = tensor("op_2201_strides_0"), val = tensor([1, 1])]; + tensor var_2201_pad_0 = const()[name = tensor("op_2201_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2201_dilations_0 = const()[name = tensor("op_2201_dilations_0"), val = tensor([1, 1])]; + tensor var_2201_groups_0 = const()[name = tensor("op_2201_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120302912))), name = tensor("layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120296576))), shape = tensor([768, 768, 1, 1])]; + tensor var_2201_cast_fp16 = conv(dilations = var_2201_dilations_0, groups = var_2201_groups_0, pad = var_2201_pad_0, pad_type = var_2201_pad_type_0, strides = var_2201_strides_0, weight = layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = tensor("op_2201_cast_fp16")]; + tensor obj_95_cast_fp16 = add(x = var_2195_cast_fp16, y = var_2201_cast_fp16)[name = tensor("obj_95_cast_fp16")]; + tensor inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = obj_95_cast_fp16)[name = tensor("inputs_41_cast_fp16")]; + tensor out_41_axes_0 = const()[name = tensor("out_41_axes_0"), val = tensor([1])]; + tensor var_2215_to_fp16 = const()[name = tensor("op_2215_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_2215_to_fp16, x = inputs_41_cast_fp16)[name = tensor("out_41_cast_fp16")]; + tensor input_65_gamma_0_to_fp16 = const()[name = tensor("input_65_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120376704)))]; + tensor input_65_beta_0_to_fp16 = const()[name = tensor("input_65_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120378304)))]; + tensor input_65_epsilon_0_to_fp16 = const()[name = tensor("input_65_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_65_cast_fp16 = batch_norm(beta = input_65_beta_0_to_fp16, epsilon = input_65_epsilon_0_to_fp16, gamma = input_65_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = tensor("input_65_cast_fp16")]; + tensor var_2233_pad_type_0 = const()[name = tensor("op_2233_pad_type_0"), val = tensor("valid")]; + tensor var_2233_strides_0 = const()[name = tensor("op_2233_strides_0"), val = tensor([1, 1])]; + tensor var_2233_pad_0 = const()[name = tensor("op_2233_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2233_dilations_0 = const()[name = tensor("op_2233_dilations_0"), val = tensor([1, 1])]; + tensor var_2233_groups_0 = const()[name = tensor("op_2233_groups_0"), val = tensor(1)]; + tensor layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120379904))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121559616))), name = tensor("layers_6_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121559744)))]; + tensor var_2233_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_2233_dilations_0, groups = var_2233_groups_0, pad = var_2233_pad_0, pad_type = var_2233_pad_type_0, strides = var_2233_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = tensor("op_2233_cast_fp16")]; + tensor var_2239_pad_type_0 = const()[name = tensor("op_2239_pad_type_0"), val = tensor("valid")]; + tensor var_2239_strides_0 = const()[name = tensor("op_2239_strides_0"), val = tensor([1, 1])]; + tensor var_2239_pad_0 = const()[name = tensor("op_2239_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2239_dilations_0 = const()[name = tensor("op_2239_dilations_0"), val = tensor([1, 1])]; + tensor var_2239_groups_0 = const()[name = tensor("op_2239_groups_0"), val = tensor(1)]; + tensor layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121585664))), name = tensor("layers_6_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121565952))), shape = tensor([3072, 768, 1, 1])]; + tensor var_2239_cast_fp16 = conv(dilations = var_2239_dilations_0, groups = var_2239_groups_0, pad = var_2239_pad_0, pad_type = var_2239_pad_type_0, strides = var_2239_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = tensor("op_2239_cast_fp16")]; + tensor input_67_cast_fp16 = add(x = var_2233_cast_fp16, y = var_2239_cast_fp16)[name = tensor("input_67_cast_fp16")]; + tensor input_69_mode_0 = const()[name = tensor("input_69_mode_0"), val = tensor("EXACT")]; + tensor input_69_cast_fp16 = gelu(mode = input_69_mode_0, x = input_67_cast_fp16)[name = tensor("input_69_cast_fp16")]; + tensor var_2250_pad_type_0 = const()[name = tensor("op_2250_pad_type_0"), val = tensor("valid")]; + tensor var_2250_strides_0 = const()[name = tensor("op_2250_strides_0"), val = tensor([1, 1])]; + tensor var_2250_pad_0 = const()[name = tensor("op_2250_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2250_dilations_0 = const()[name = tensor("op_2250_dilations_0"), val = tensor([1, 1])]; + tensor var_2250_groups_0 = const()[name = tensor("op_2250_groups_0"), val = tensor(1)]; + tensor layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121880640))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123060352))), name = tensor("layers_6_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123060480)))]; + tensor var_2250_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_2250_dilations_0, groups = var_2250_groups_0, pad = var_2250_pad_0, pad_type = var_2250_pad_type_0, strides = var_2250_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_69_cast_fp16)[name = tensor("op_2250_cast_fp16")]; + tensor var_2256_pad_type_0 = const()[name = tensor("op_2256_pad_type_0"), val = tensor("valid")]; + tensor var_2256_strides_0 = const()[name = tensor("op_2256_strides_0"), val = tensor([1, 1])]; + tensor var_2256_pad_0 = const()[name = tensor("op_2256_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2256_dilations_0 = const()[name = tensor("op_2256_dilations_0"), val = tensor([1, 1])]; + tensor var_2256_groups_0 = const()[name = tensor("op_2256_groups_0"), val = tensor(1)]; + tensor layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123088640))), name = tensor("layers_6_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123062080))), shape = tensor([768, 3072, 1, 1])]; + tensor var_2256_cast_fp16 = conv(dilations = var_2256_dilations_0, groups = var_2256_groups_0, pad = var_2256_pad_0, pad_type = var_2256_pad_type_0, strides = var_2256_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_69_cast_fp16)[name = tensor("op_2256_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = add(x = var_2250_cast_fp16, y = var_2256_cast_fp16)[name = tensor("hidden_states_15_cast_fp16")]; + tensor inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = hidden_states_15_cast_fp16)[name = tensor("inputs_43_cast_fp16")]; + tensor var_2269 = const()[name = tensor("op_2269"), val = tensor(3)]; + tensor out_43_axes_0 = const()[name = tensor("out_43_axes_0"), val = tensor([1])]; + tensor var_2294_to_fp16 = const()[name = tensor("op_2294_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_2294_to_fp16, x = inputs_43_cast_fp16)[name = tensor("out_43_cast_fp16")]; + tensor obj_99_gamma_0_to_fp16 = const()[name = tensor("obj_99_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123383616)))]; + tensor obj_99_beta_0_to_fp16 = const()[name = tensor("obj_99_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123385216)))]; + tensor obj_99_epsilon_0_to_fp16 = const()[name = tensor("obj_99_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_99_cast_fp16 = batch_norm(beta = obj_99_beta_0_to_fp16, epsilon = obj_99_epsilon_0_to_fp16, gamma = obj_99_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = tensor("obj_99_cast_fp16")]; + tensor var_2316_pad_type_0 = const()[name = tensor("op_2316_pad_type_0"), val = tensor("valid")]; + tensor var_2316_strides_0 = const()[name = tensor("op_2316_strides_0"), val = tensor([1, 1])]; + tensor var_2316_pad_0 = const()[name = tensor("op_2316_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2316_dilations_0 = const()[name = tensor("op_2316_dilations_0"), val = tensor([1, 1])]; + tensor var_2316_groups_0 = const()[name = tensor("op_2316_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123386816))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123681792))), name = tensor("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123681920)))]; + tensor var_2316_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2316_dilations_0, groups = var_2316_groups_0, pad = var_2316_pad_0, pad_type = var_2316_pad_type_0, strides = var_2316_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_99_cast_fp16)[name = tensor("op_2316_cast_fp16")]; + tensor var_2322_pad_type_0 = const()[name = tensor("op_2322_pad_type_0"), val = tensor("valid")]; + tensor var_2322_strides_0 = const()[name = tensor("op_2322_strides_0"), val = tensor([1, 1])]; + tensor var_2322_pad_0 = const()[name = tensor("op_2322_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2322_dilations_0 = const()[name = tensor("op_2322_dilations_0"), val = tensor([1, 1])]; + tensor var_2322_groups_0 = const()[name = tensor("op_2322_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123689664))), name = tensor("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123683520))), shape = tensor([768, 768, 1, 1])]; + tensor var_2322_cast_fp16 = conv(dilations = var_2322_dilations_0, groups = var_2322_groups_0, pad = var_2322_pad_0, pad_type = var_2322_pad_type_0, strides = var_2322_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_99_cast_fp16)[name = tensor("op_2322_cast_fp16")]; + tensor query_29_cast_fp16 = add(x = var_2316_cast_fp16, y = var_2322_cast_fp16)[name = tensor("query_29_cast_fp16")]; + tensor var_2331_pad_type_0 = const()[name = tensor("op_2331_pad_type_0"), val = tensor("valid")]; + tensor var_2331_strides_0 = const()[name = tensor("op_2331_strides_0"), val = tensor([1, 1])]; + tensor var_2331_pad_0 = const()[name = tensor("op_2331_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2331_dilations_0 = const()[name = tensor("op_2331_dilations_0"), val = tensor([1, 1])]; + tensor var_2331_groups_0 = const()[name = tensor("op_2331_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123763456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124058432))), name = tensor("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2331_cast_fp16 = conv(dilations = var_2331_dilations_0, groups = var_2331_groups_0, pad = var_2331_pad_0, pad_type = var_2331_pad_type_0, strides = var_2331_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_99_cast_fp16)[name = tensor("op_2331_cast_fp16")]; + tensor var_2337_pad_type_0 = const()[name = tensor("op_2337_pad_type_0"), val = tensor("valid")]; + tensor var_2337_strides_0 = const()[name = tensor("op_2337_strides_0"), val = tensor([1, 1])]; + tensor var_2337_pad_0 = const()[name = tensor("op_2337_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2337_dilations_0 = const()[name = tensor("op_2337_dilations_0"), val = tensor([1, 1])]; + tensor var_2337_groups_0 = const()[name = tensor("op_2337_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124065024))), name = tensor("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124058560))), shape = tensor([768, 768, 1, 1])]; + tensor var_2337_cast_fp16 = conv(dilations = var_2337_dilations_0, groups = var_2337_groups_0, pad = var_2337_pad_0, pad_type = var_2337_pad_type_0, strides = var_2337_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_99_cast_fp16)[name = tensor("op_2337_cast_fp16")]; + tensor current_key_15_cast_fp16 = add(x = var_2331_cast_fp16, y = var_2337_cast_fp16)[name = tensor("current_key_15_cast_fp16")]; + tensor var_2347_pad_type_0 = const()[name = tensor("op_2347_pad_type_0"), val = tensor("valid")]; + tensor var_2347_strides_0 = const()[name = tensor("op_2347_strides_0"), val = tensor([1, 1])]; + tensor var_2347_pad_0 = const()[name = tensor("op_2347_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2347_dilations_0 = const()[name = tensor("op_2347_dilations_0"), val = tensor([1, 1])]; + tensor var_2347_groups_0 = const()[name = tensor("op_2347_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124138816))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124433792))), name = tensor("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124433920)))]; + tensor var_2347_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2347_dilations_0, groups = var_2347_groups_0, pad = var_2347_pad_0, pad_type = var_2347_pad_type_0, strides = var_2347_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_99_cast_fp16)[name = tensor("op_2347_cast_fp16")]; + tensor var_2353_pad_type_0 = const()[name = tensor("op_2353_pad_type_0"), val = tensor("valid")]; + tensor var_2353_strides_0 = const()[name = tensor("op_2353_strides_0"), val = tensor([1, 1])]; + tensor var_2353_pad_0 = const()[name = tensor("op_2353_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2353_dilations_0 = const()[name = tensor("op_2353_dilations_0"), val = tensor([1, 1])]; + tensor var_2353_groups_0 = const()[name = tensor("op_2353_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124440512))), name = tensor("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124435520))), shape = tensor([768, 768, 1, 1])]; + tensor var_2353_cast_fp16 = conv(dilations = var_2353_dilations_0, groups = var_2353_groups_0, pad = var_2353_pad_0, pad_type = var_2353_pad_type_0, strides = var_2353_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_99_cast_fp16)[name = tensor("op_2353_cast_fp16")]; + tensor current_value_15_cast_fp16 = add(x = var_2347_cast_fp16, y = var_2353_cast_fp16)[name = tensor("current_value_15_cast_fp16")]; + tensor var_2360_cast_fp16 = mul(x = var_69_cast_fp16_7, y = var_192_cast_fp16)[name = tensor("op_2360_cast_fp16")]; + tensor var_2361_cast_fp16 = mul(x = current_key_15_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2361_cast_fp16")]; + tensor key_29_cast_fp16 = add(x = var_2360_cast_fp16, y = var_2361_cast_fp16)[name = tensor("key_29_cast_fp16")]; + tensor var_2364_cast_fp16 = mul(x = var_84_cast_fp16_7, y = var_192_cast_fp16)[name = tensor("op_2364_cast_fp16")]; + tensor var_2365_cast_fp16 = mul(x = current_value_15_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2365_cast_fp16")]; + tensor value_29_cast_fp16 = add(x = var_2364_cast_fp16, y = var_2365_cast_fp16)[name = tensor("value_29_cast_fp16")]; + tensor var_2369 = const()[name = tensor("op_2369"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_29_cast_fp16 = reshape(shape = var_2369, x = query_29_cast_fp16)[name = tensor("mh_q_29_cast_fp16")]; + tensor var_2371_to_fp16 = const()[name = tensor("op_2371_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2372_cast_fp16 = mul(x = mh_q_29_cast_fp16, y = var_2371_to_fp16)[name = tensor("op_2372_cast_fp16")]; + tensor var_2375 = const()[name = tensor("op_2375"), val = tensor([1, 12, 64, 448])]; + tensor var_2376_cast_fp16 = reshape(shape = var_2375, x = key_29_cast_fp16)[name = tensor("op_2376_cast_fp16")]; + tensor mh_w_43_transpose_x_0 = const()[name = tensor("mh_w_43_transpose_x_0"), val = tensor(true)]; + tensor mh_w_43_transpose_y_0 = const()[name = tensor("mh_w_43_transpose_y_0"), val = tensor(false)]; + tensor mh_w_43_cast_fp16 = matmul(transpose_x = mh_w_43_transpose_x_0, transpose_y = mh_w_43_transpose_y_0, x = var_2372_cast_fp16, y = var_2376_cast_fp16)[name = tensor("mh_w_43_cast_fp16")]; + tensor mh_w_45_cast_fp16 = add(x = mh_w_43_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_45_cast_fp16")]; + tensor var_2384_cast_fp16 = softmax(axis = var_2269, x = mh_w_45_cast_fp16)[name = tensor("op_2384_cast_fp16")]; + tensor var_2385 = const()[name = tensor("op_2385"), val = tensor([1, 12, 64, 448])]; + tensor var_2386_cast_fp16 = reshape(shape = var_2385, x = value_29_cast_fp16)[name = tensor("op_2386_cast_fp16")]; + tensor attn_29_transpose_x_0 = const()[name = tensor("attn_29_transpose_x_0"), val = tensor(false)]; + tensor attn_29_transpose_y_0 = const()[name = tensor("attn_29_transpose_y_0"), val = tensor(true)]; + tensor attn_29_cast_fp16 = matmul(transpose_x = attn_29_transpose_x_0, transpose_y = attn_29_transpose_y_0, x = var_2386_cast_fp16, y = var_2384_cast_fp16)[name = tensor("attn_29_cast_fp16")]; + tensor var_2389 = const()[name = tensor("op_2389"), val = tensor([1, 768, 1, 1])]; + tensor input_71_cast_fp16 = reshape(shape = var_2389, x = attn_29_cast_fp16)[name = tensor("input_71_cast_fp16")]; + tensor var_2399_pad_type_0 = const()[name = tensor("op_2399_pad_type_0"), val = tensor("valid")]; + tensor var_2399_strides_0 = const()[name = tensor("op_2399_strides_0"), val = tensor([1, 1])]; + tensor var_2399_pad_0 = const()[name = tensor("op_2399_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2399_dilations_0 = const()[name = tensor("op_2399_dilations_0"), val = tensor([1, 1])]; + tensor var_2399_groups_0 = const()[name = tensor("op_2399_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124514304))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124809280))), name = tensor("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124809408)))]; + tensor var_2399_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2399_dilations_0, groups = var_2399_groups_0, pad = var_2399_pad_0, pad_type = var_2399_pad_type_0, strides = var_2399_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = tensor("op_2399_cast_fp16")]; + tensor var_2405_pad_type_0 = const()[name = tensor("op_2405_pad_type_0"), val = tensor("valid")]; + tensor var_2405_strides_0 = const()[name = tensor("op_2405_strides_0"), val = tensor([1, 1])]; + tensor var_2405_pad_0 = const()[name = tensor("op_2405_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2405_dilations_0 = const()[name = tensor("op_2405_dilations_0"), val = tensor([1, 1])]; + tensor var_2405_groups_0 = const()[name = tensor("op_2405_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124816448))), name = tensor("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124811008))), shape = tensor([768, 768, 1, 1])]; + tensor var_2405_cast_fp16 = conv(dilations = var_2405_dilations_0, groups = var_2405_groups_0, pad = var_2405_pad_0, pad_type = var_2405_pad_type_0, strides = var_2405_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = tensor("op_2405_cast_fp16")]; + tensor obj_105_cast_fp16 = add(x = var_2399_cast_fp16, y = var_2405_cast_fp16)[name = tensor("obj_105_cast_fp16")]; + tensor inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = obj_105_cast_fp16)[name = tensor("inputs_45_cast_fp16")]; + tensor out_45_axes_0 = const()[name = tensor("out_45_axes_0"), val = tensor([1])]; + tensor var_2420_to_fp16 = const()[name = tensor("op_2420_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_2420_to_fp16, x = inputs_45_cast_fp16)[name = tensor("out_45_cast_fp16")]; + tensor obj_107_gamma_0_to_fp16 = const()[name = tensor("obj_107_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124890240)))]; + tensor obj_107_beta_0_to_fp16 = const()[name = tensor("obj_107_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124891840)))]; + tensor obj_107_epsilon_0_to_fp16 = const()[name = tensor("obj_107_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_107_cast_fp16 = batch_norm(beta = obj_107_beta_0_to_fp16, epsilon = obj_107_epsilon_0_to_fp16, gamma = obj_107_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = tensor("obj_107_cast_fp16")]; + tensor var_2442_pad_type_0 = const()[name = tensor("op_2442_pad_type_0"), val = tensor("valid")]; + tensor var_2442_strides_0 = const()[name = tensor("op_2442_strides_0"), val = tensor([1, 1])]; + tensor var_2442_pad_0 = const()[name = tensor("op_2442_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2442_dilations_0 = const()[name = tensor("op_2442_dilations_0"), val = tensor([1, 1])]; + tensor var_2442_groups_0 = const()[name = tensor("op_2442_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124893440))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125188416))), name = tensor("layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125188544)))]; + tensor var_2442_cast_fp16 = conv(bias = layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2442_dilations_0, groups = var_2442_groups_0, pad = var_2442_pad_0, pad_type = var_2442_pad_type_0, strides = var_2442_strides_0, weight = layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_107_cast_fp16)[name = tensor("op_2442_cast_fp16")]; + tensor var_2448_pad_type_0 = const()[name = tensor("op_2448_pad_type_0"), val = tensor("valid")]; + tensor var_2448_strides_0 = const()[name = tensor("op_2448_strides_0"), val = tensor([1, 1])]; + tensor var_2448_pad_0 = const()[name = tensor("op_2448_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2448_dilations_0 = const()[name = tensor("op_2448_dilations_0"), val = tensor([1, 1])]; + tensor var_2448_groups_0 = const()[name = tensor("op_2448_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125196992))), name = tensor("layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125190144))), shape = tensor([768, 768, 1, 1])]; + tensor var_2448_cast_fp16 = conv(dilations = var_2448_dilations_0, groups = var_2448_groups_0, pad = var_2448_pad_0, pad_type = var_2448_pad_type_0, strides = var_2448_strides_0, weight = layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_107_cast_fp16)[name = tensor("op_2448_cast_fp16")]; + tensor query_31_cast_fp16 = add(x = var_2442_cast_fp16, y = var_2448_cast_fp16)[name = tensor("query_31_cast_fp16")]; + tensor var_2457_pad_type_0 = const()[name = tensor("op_2457_pad_type_0"), val = tensor("valid")]; + tensor var_2457_strides_0 = const()[name = tensor("op_2457_strides_0"), val = tensor([1, 1])]; + tensor var_2457_pad_0 = const()[name = tensor("op_2457_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2457_dilations_0 = const()[name = tensor("op_2457_dilations_0"), val = tensor([1, 1])]; + tensor var_2457_groups_0 = const()[name = tensor("op_2457_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125270784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125565760))), name = tensor("layers_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2457_cast_fp16 = conv(dilations = var_2457_dilations_0, groups = var_2457_groups_0, pad = var_2457_pad_0, pad_type = var_2457_pad_type_0, strides = var_2457_strides_0, weight = layers_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_2457_cast_fp16")]; + tensor var_2463_pad_type_0 = const()[name = tensor("op_2463_pad_type_0"), val = tensor("valid")]; + tensor var_2463_strides_0 = const()[name = tensor("op_2463_strides_0"), val = tensor([1, 1])]; + tensor var_2463_pad_0 = const()[name = tensor("op_2463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2463_dilations_0 = const()[name = tensor("op_2463_dilations_0"), val = tensor([1, 1])]; + tensor var_2463_groups_0 = const()[name = tensor("op_2463_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125574016))), name = tensor("layers_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125565888))), shape = tensor([768, 768, 1, 1])]; + tensor var_2463_cast_fp16 = conv(dilations = var_2463_dilations_0, groups = var_2463_groups_0, pad = var_2463_pad_0, pad_type = var_2463_pad_type_0, strides = var_2463_strides_0, weight = layers_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_2463_cast_fp16")]; + tensor key_31_cast_fp16 = add(x = var_2457_cast_fp16, y = var_2463_cast_fp16)[name = tensor("key_31_cast_fp16")]; + tensor var_2473_pad_type_0 = const()[name = tensor("op_2473_pad_type_0"), val = tensor("valid")]; + tensor var_2473_strides_0 = const()[name = tensor("op_2473_strides_0"), val = tensor([1, 1])]; + tensor var_2473_pad_0 = const()[name = tensor("op_2473_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2473_dilations_0 = const()[name = tensor("op_2473_dilations_0"), val = tensor([1, 1])]; + tensor var_2473_groups_0 = const()[name = tensor("op_2473_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125647808))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125942784))), name = tensor("layers_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125942912)))]; + tensor var_2473_cast_fp16 = conv(bias = layers_7_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2473_dilations_0, groups = var_2473_groups_0, pad = var_2473_pad_0, pad_type = var_2473_pad_type_0, strides = var_2473_strides_0, weight = layers_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_2473_cast_fp16")]; + tensor var_2479_pad_type_0 = const()[name = tensor("op_2479_pad_type_0"), val = tensor("valid")]; + tensor var_2479_strides_0 = const()[name = tensor("op_2479_strides_0"), val = tensor([1, 1])]; + tensor var_2479_pad_0 = const()[name = tensor("op_2479_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2479_dilations_0 = const()[name = tensor("op_2479_dilations_0"), val = tensor([1, 1])]; + tensor var_2479_groups_0 = const()[name = tensor("op_2479_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125950272))), name = tensor("layers_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125944512))), shape = tensor([768, 768, 1, 1])]; + tensor var_2479_cast_fp16 = conv(dilations = var_2479_dilations_0, groups = var_2479_groups_0, pad = var_2479_pad_0, pad_type = var_2479_pad_type_0, strides = var_2479_strides_0, weight = layers_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_2479_cast_fp16")]; + tensor value_31_cast_fp16 = add(x = var_2473_cast_fp16, y = var_2479_cast_fp16)[name = tensor("value_31_cast_fp16")]; + tensor var_2483 = const()[name = tensor("op_2483"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_31_cast_fp16 = reshape(shape = var_2483, x = query_31_cast_fp16)[name = tensor("mh_q_31_cast_fp16")]; + tensor var_2485_to_fp16 = const()[name = tensor("op_2485_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2486_cast_fp16 = mul(x = mh_q_31_cast_fp16, y = var_2485_to_fp16)[name = tensor("op_2486_cast_fp16")]; + tensor var_2489 = const()[name = tensor("op_2489"), val = tensor([1, 12, 64, 1500])]; + tensor var_2490_cast_fp16 = reshape(shape = var_2489, x = key_31_cast_fp16)[name = tensor("op_2490_cast_fp16")]; + tensor mh_w_47_transpose_x_0 = const()[name = tensor("mh_w_47_transpose_x_0"), val = tensor(true)]; + tensor mh_w_47_transpose_y_0 = const()[name = tensor("mh_w_47_transpose_y_0"), val = tensor(false)]; + tensor mh_w_47_cast_fp16 = matmul(transpose_x = mh_w_47_transpose_x_0, transpose_y = mh_w_47_transpose_y_0, x = var_2486_cast_fp16, y = var_2490_cast_fp16)[name = tensor("mh_w_47_cast_fp16")]; + tensor obj_111_cast_fp16 = softmax(axis = var_2269, x = mh_w_47_cast_fp16)[name = tensor("obj_111_cast_fp16")]; + tensor var_2494 = const()[name = tensor("op_2494"), val = tensor([1, 12, 64, 1500])]; + tensor var_2495_cast_fp16 = reshape(shape = var_2494, x = value_31_cast_fp16)[name = tensor("op_2495_cast_fp16")]; + tensor attn_31_transpose_x_0 = const()[name = tensor("attn_31_transpose_x_0"), val = tensor(false)]; + tensor attn_31_transpose_y_0 = const()[name = tensor("attn_31_transpose_y_0"), val = tensor(true)]; + tensor attn_31_cast_fp16 = matmul(transpose_x = attn_31_transpose_x_0, transpose_y = attn_31_transpose_y_0, x = var_2495_cast_fp16, y = obj_111_cast_fp16)[name = tensor("attn_31_cast_fp16")]; + tensor var_2498 = const()[name = tensor("op_2498"), val = tensor([1, 768, 1, 1])]; + tensor input_73_cast_fp16 = reshape(shape = var_2498, x = attn_31_cast_fp16)[name = tensor("input_73_cast_fp16")]; + tensor var_2508_pad_type_0 = const()[name = tensor("op_2508_pad_type_0"), val = tensor("valid")]; + tensor var_2508_strides_0 = const()[name = tensor("op_2508_strides_0"), val = tensor([1, 1])]; + tensor var_2508_pad_0 = const()[name = tensor("op_2508_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2508_dilations_0 = const()[name = tensor("op_2508_dilations_0"), val = tensor([1, 1])]; + tensor var_2508_groups_0 = const()[name = tensor("op_2508_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126024064))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126319040))), name = tensor("layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126319168)))]; + tensor var_2508_cast_fp16 = conv(bias = layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2508_dilations_0, groups = var_2508_groups_0, pad = var_2508_pad_0, pad_type = var_2508_pad_type_0, strides = var_2508_strides_0, weight = layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = tensor("op_2508_cast_fp16")]; + tensor var_2514_pad_type_0 = const()[name = tensor("op_2514_pad_type_0"), val = tensor("valid")]; + tensor var_2514_strides_0 = const()[name = tensor("op_2514_strides_0"), val = tensor([1, 1])]; + tensor var_2514_pad_0 = const()[name = tensor("op_2514_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2514_dilations_0 = const()[name = tensor("op_2514_dilations_0"), val = tensor([1, 1])]; + tensor var_2514_groups_0 = const()[name = tensor("op_2514_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126326528))), name = tensor("layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126320768))), shape = tensor([768, 768, 1, 1])]; + tensor var_2514_cast_fp16 = conv(dilations = var_2514_dilations_0, groups = var_2514_groups_0, pad = var_2514_pad_0, pad_type = var_2514_pad_type_0, strides = var_2514_strides_0, weight = layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = tensor("op_2514_cast_fp16")]; + tensor obj_109_cast_fp16 = add(x = var_2508_cast_fp16, y = var_2514_cast_fp16)[name = tensor("obj_109_cast_fp16")]; + tensor inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_109_cast_fp16)[name = tensor("inputs_47_cast_fp16")]; + tensor out_47_axes_0 = const()[name = tensor("out_47_axes_0"), val = tensor([1])]; + tensor var_2528_to_fp16 = const()[name = tensor("op_2528_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_2528_to_fp16, x = inputs_47_cast_fp16)[name = tensor("out_47_cast_fp16")]; + tensor input_75_gamma_0_to_fp16 = const()[name = tensor("input_75_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126400320)))]; + tensor input_75_beta_0_to_fp16 = const()[name = tensor("input_75_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126401920)))]; + tensor input_75_epsilon_0_to_fp16 = const()[name = tensor("input_75_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = tensor("input_75_cast_fp16")]; + tensor var_2546_pad_type_0 = const()[name = tensor("op_2546_pad_type_0"), val = tensor("valid")]; + tensor var_2546_strides_0 = const()[name = tensor("op_2546_strides_0"), val = tensor([1, 1])]; + tensor var_2546_pad_0 = const()[name = tensor("op_2546_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2546_dilations_0 = const()[name = tensor("op_2546_dilations_0"), val = tensor([1, 1])]; + tensor var_2546_groups_0 = const()[name = tensor("op_2546_groups_0"), val = tensor(1)]; + tensor layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126403520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127583232))), name = tensor("layers_7_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127583360)))]; + tensor var_2546_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_2546_dilations_0, groups = var_2546_groups_0, pad = var_2546_pad_0, pad_type = var_2546_pad_type_0, strides = var_2546_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = tensor("op_2546_cast_fp16")]; + tensor var_2552_pad_type_0 = const()[name = tensor("op_2552_pad_type_0"), val = tensor("valid")]; + tensor var_2552_strides_0 = const()[name = tensor("op_2552_strides_0"), val = tensor([1, 1])]; + tensor var_2552_pad_0 = const()[name = tensor("op_2552_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2552_dilations_0 = const()[name = tensor("op_2552_dilations_0"), val = tensor([1, 1])]; + tensor var_2552_groups_0 = const()[name = tensor("op_2552_groups_0"), val = tensor(1)]; + tensor layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127608768))), name = tensor("layers_7_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127589568))), shape = tensor([3072, 768, 1, 1])]; + tensor var_2552_cast_fp16 = conv(dilations = var_2552_dilations_0, groups = var_2552_groups_0, pad = var_2552_pad_0, pad_type = var_2552_pad_type_0, strides = var_2552_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = tensor("op_2552_cast_fp16")]; + tensor input_77_cast_fp16 = add(x = var_2546_cast_fp16, y = var_2552_cast_fp16)[name = tensor("input_77_cast_fp16")]; + tensor input_79_mode_0 = const()[name = tensor("input_79_mode_0"), val = tensor("EXACT")]; + tensor input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = tensor("input_79_cast_fp16")]; + tensor var_2563_pad_type_0 = const()[name = tensor("op_2563_pad_type_0"), val = tensor("valid")]; + tensor var_2563_strides_0 = const()[name = tensor("op_2563_strides_0"), val = tensor([1, 1])]; + tensor var_2563_pad_0 = const()[name = tensor("op_2563_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2563_dilations_0 = const()[name = tensor("op_2563_dilations_0"), val = tensor([1, 1])]; + tensor var_2563_groups_0 = const()[name = tensor("op_2563_groups_0"), val = tensor(1)]; + tensor layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127903744))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129083456))), name = tensor("layers_7_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129083584)))]; + tensor var_2563_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_2563_dilations_0, groups = var_2563_groups_0, pad = var_2563_pad_0, pad_type = var_2563_pad_type_0, strides = var_2563_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = tensor("op_2563_cast_fp16")]; + tensor var_2569_pad_type_0 = const()[name = tensor("op_2569_pad_type_0"), val = tensor("valid")]; + tensor var_2569_strides_0 = const()[name = tensor("op_2569_strides_0"), val = tensor([1, 1])]; + tensor var_2569_pad_0 = const()[name = tensor("op_2569_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2569_dilations_0 = const()[name = tensor("op_2569_dilations_0"), val = tensor([1, 1])]; + tensor var_2569_groups_0 = const()[name = tensor("op_2569_groups_0"), val = tensor(1)]; + tensor layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129109568))), name = tensor("layers_7_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129085184))), shape = tensor([768, 3072, 1, 1])]; + tensor var_2569_cast_fp16 = conv(dilations = var_2569_dilations_0, groups = var_2569_groups_0, pad = var_2569_pad_0, pad_type = var_2569_pad_type_0, strides = var_2569_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = tensor("op_2569_cast_fp16")]; + tensor hidden_states_17_cast_fp16 = add(x = var_2563_cast_fp16, y = var_2569_cast_fp16)[name = tensor("hidden_states_17_cast_fp16")]; + tensor inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_17_cast_fp16)[name = tensor("inputs_49_cast_fp16")]; + tensor var_2582 = const()[name = tensor("op_2582"), val = tensor(3)]; + tensor out_49_axes_0 = const()[name = tensor("out_49_axes_0"), val = tensor([1])]; + tensor var_2607_to_fp16 = const()[name = tensor("op_2607_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_2607_to_fp16, x = inputs_49_cast_fp16)[name = tensor("out_49_cast_fp16")]; + tensor obj_113_gamma_0_to_fp16 = const()[name = tensor("obj_113_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129404544)))]; + tensor obj_113_beta_0_to_fp16 = const()[name = tensor("obj_113_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129406144)))]; + tensor obj_113_epsilon_0_to_fp16 = const()[name = tensor("obj_113_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_113_cast_fp16 = batch_norm(beta = obj_113_beta_0_to_fp16, epsilon = obj_113_epsilon_0_to_fp16, gamma = obj_113_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_49_cast_fp16)[name = tensor("obj_113_cast_fp16")]; + tensor var_2629_pad_type_0 = const()[name = tensor("op_2629_pad_type_0"), val = tensor("valid")]; + tensor var_2629_strides_0 = const()[name = tensor("op_2629_strides_0"), val = tensor([1, 1])]; + tensor var_2629_pad_0 = const()[name = tensor("op_2629_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2629_dilations_0 = const()[name = tensor("op_2629_dilations_0"), val = tensor([1, 1])]; + tensor var_2629_groups_0 = const()[name = tensor("op_2629_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129407744))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129702720))), name = tensor("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129702848)))]; + tensor var_2629_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2629_dilations_0, groups = var_2629_groups_0, pad = var_2629_pad_0, pad_type = var_2629_pad_type_0, strides = var_2629_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = tensor("op_2629_cast_fp16")]; + tensor var_2635_pad_type_0 = const()[name = tensor("op_2635_pad_type_0"), val = tensor("valid")]; + tensor var_2635_strides_0 = const()[name = tensor("op_2635_strides_0"), val = tensor([1, 1])]; + tensor var_2635_pad_0 = const()[name = tensor("op_2635_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2635_dilations_0 = const()[name = tensor("op_2635_dilations_0"), val = tensor([1, 1])]; + tensor var_2635_groups_0 = const()[name = tensor("op_2635_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129710400))), name = tensor("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129704448))), shape = tensor([768, 768, 1, 1])]; + tensor var_2635_cast_fp16 = conv(dilations = var_2635_dilations_0, groups = var_2635_groups_0, pad = var_2635_pad_0, pad_type = var_2635_pad_type_0, strides = var_2635_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = tensor("op_2635_cast_fp16")]; + tensor query_33_cast_fp16 = add(x = var_2629_cast_fp16, y = var_2635_cast_fp16)[name = tensor("query_33_cast_fp16")]; + tensor var_2644_pad_type_0 = const()[name = tensor("op_2644_pad_type_0"), val = tensor("valid")]; + tensor var_2644_strides_0 = const()[name = tensor("op_2644_strides_0"), val = tensor([1, 1])]; + tensor var_2644_pad_0 = const()[name = tensor("op_2644_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2644_dilations_0 = const()[name = tensor("op_2644_dilations_0"), val = tensor([1, 1])]; + tensor var_2644_groups_0 = const()[name = tensor("op_2644_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129784192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130079168))), name = tensor("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2644_cast_fp16 = conv(dilations = var_2644_dilations_0, groups = var_2644_groups_0, pad = var_2644_pad_0, pad_type = var_2644_pad_type_0, strides = var_2644_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = tensor("op_2644_cast_fp16")]; + tensor var_2650_pad_type_0 = const()[name = tensor("op_2650_pad_type_0"), val = tensor("valid")]; + tensor var_2650_strides_0 = const()[name = tensor("op_2650_strides_0"), val = tensor([1, 1])]; + tensor var_2650_pad_0 = const()[name = tensor("op_2650_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2650_dilations_0 = const()[name = tensor("op_2650_dilations_0"), val = tensor([1, 1])]; + tensor var_2650_groups_0 = const()[name = tensor("op_2650_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130086208))), name = tensor("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130079296))), shape = tensor([768, 768, 1, 1])]; + tensor var_2650_cast_fp16 = conv(dilations = var_2650_dilations_0, groups = var_2650_groups_0, pad = var_2650_pad_0, pad_type = var_2650_pad_type_0, strides = var_2650_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = tensor("op_2650_cast_fp16")]; + tensor current_key_17_cast_fp16 = add(x = var_2644_cast_fp16, y = var_2650_cast_fp16)[name = tensor("current_key_17_cast_fp16")]; + tensor var_2660_pad_type_0 = const()[name = tensor("op_2660_pad_type_0"), val = tensor("valid")]; + tensor var_2660_strides_0 = const()[name = tensor("op_2660_strides_0"), val = tensor([1, 1])]; + tensor var_2660_pad_0 = const()[name = tensor("op_2660_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2660_dilations_0 = const()[name = tensor("op_2660_dilations_0"), val = tensor([1, 1])]; + tensor var_2660_groups_0 = const()[name = tensor("op_2660_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130160000))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130454976))), name = tensor("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130455104)))]; + tensor var_2660_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2660_dilations_0, groups = var_2660_groups_0, pad = var_2660_pad_0, pad_type = var_2660_pad_type_0, strides = var_2660_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = tensor("op_2660_cast_fp16")]; + tensor var_2666_pad_type_0 = const()[name = tensor("op_2666_pad_type_0"), val = tensor("valid")]; + tensor var_2666_strides_0 = const()[name = tensor("op_2666_strides_0"), val = tensor([1, 1])]; + tensor var_2666_pad_0 = const()[name = tensor("op_2666_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2666_dilations_0 = const()[name = tensor("op_2666_dilations_0"), val = tensor([1, 1])]; + tensor var_2666_groups_0 = const()[name = tensor("op_2666_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130463040))), name = tensor("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130456704))), shape = tensor([768, 768, 1, 1])]; + tensor var_2666_cast_fp16 = conv(dilations = var_2666_dilations_0, groups = var_2666_groups_0, pad = var_2666_pad_0, pad_type = var_2666_pad_type_0, strides = var_2666_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = tensor("op_2666_cast_fp16")]; + tensor current_value_17_cast_fp16 = add(x = var_2660_cast_fp16, y = var_2666_cast_fp16)[name = tensor("current_value_17_cast_fp16")]; + tensor var_2673_cast_fp16 = mul(x = var_69_cast_fp16_8, y = var_192_cast_fp16)[name = tensor("op_2673_cast_fp16")]; + tensor var_2674_cast_fp16 = mul(x = current_key_17_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2674_cast_fp16")]; + tensor key_33_cast_fp16 = add(x = var_2673_cast_fp16, y = var_2674_cast_fp16)[name = tensor("key_33_cast_fp16")]; + tensor var_2677_cast_fp16 = mul(x = var_84_cast_fp16_8, y = var_192_cast_fp16)[name = tensor("op_2677_cast_fp16")]; + tensor var_2678_cast_fp16 = mul(x = current_value_17_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2678_cast_fp16")]; + tensor value_33_cast_fp16 = add(x = var_2677_cast_fp16, y = var_2678_cast_fp16)[name = tensor("value_33_cast_fp16")]; + tensor var_2682 = const()[name = tensor("op_2682"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_33_cast_fp16 = reshape(shape = var_2682, x = query_33_cast_fp16)[name = tensor("mh_q_33_cast_fp16")]; + tensor var_2684_to_fp16 = const()[name = tensor("op_2684_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2685_cast_fp16 = mul(x = mh_q_33_cast_fp16, y = var_2684_to_fp16)[name = tensor("op_2685_cast_fp16")]; + tensor var_2688 = const()[name = tensor("op_2688"), val = tensor([1, 12, 64, 448])]; + tensor var_2689_cast_fp16 = reshape(shape = var_2688, x = key_33_cast_fp16)[name = tensor("op_2689_cast_fp16")]; + tensor mh_w_49_transpose_x_0 = const()[name = tensor("mh_w_49_transpose_x_0"), val = tensor(true)]; + tensor mh_w_49_transpose_y_0 = const()[name = tensor("mh_w_49_transpose_y_0"), val = tensor(false)]; + tensor mh_w_49_cast_fp16 = matmul(transpose_x = mh_w_49_transpose_x_0, transpose_y = mh_w_49_transpose_y_0, x = var_2685_cast_fp16, y = var_2689_cast_fp16)[name = tensor("mh_w_49_cast_fp16")]; + tensor mh_w_51_cast_fp16 = add(x = mh_w_49_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_51_cast_fp16")]; + tensor var_2697_cast_fp16 = softmax(axis = var_2582, x = mh_w_51_cast_fp16)[name = tensor("op_2697_cast_fp16")]; + tensor var_2698 = const()[name = tensor("op_2698"), val = tensor([1, 12, 64, 448])]; + tensor var_2699_cast_fp16 = reshape(shape = var_2698, x = value_33_cast_fp16)[name = tensor("op_2699_cast_fp16")]; + tensor attn_33_transpose_x_0 = const()[name = tensor("attn_33_transpose_x_0"), val = tensor(false)]; + tensor attn_33_transpose_y_0 = const()[name = tensor("attn_33_transpose_y_0"), val = tensor(true)]; + tensor attn_33_cast_fp16 = matmul(transpose_x = attn_33_transpose_x_0, transpose_y = attn_33_transpose_y_0, x = var_2699_cast_fp16, y = var_2697_cast_fp16)[name = tensor("attn_33_cast_fp16")]; + tensor var_2702 = const()[name = tensor("op_2702"), val = tensor([1, 768, 1, 1])]; + tensor input_81_cast_fp16 = reshape(shape = var_2702, x = attn_33_cast_fp16)[name = tensor("input_81_cast_fp16")]; + tensor var_2712_pad_type_0 = const()[name = tensor("op_2712_pad_type_0"), val = tensor("valid")]; + tensor var_2712_strides_0 = const()[name = tensor("op_2712_strides_0"), val = tensor([1, 1])]; + tensor var_2712_pad_0 = const()[name = tensor("op_2712_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2712_dilations_0 = const()[name = tensor("op_2712_dilations_0"), val = tensor([1, 1])]; + tensor var_2712_groups_0 = const()[name = tensor("op_2712_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130536832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130831808))), name = tensor("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130831936)))]; + tensor var_2712_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2712_dilations_0, groups = var_2712_groups_0, pad = var_2712_pad_0, pad_type = var_2712_pad_type_0, strides = var_2712_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = tensor("op_2712_cast_fp16")]; + tensor var_2718_pad_type_0 = const()[name = tensor("op_2718_pad_type_0"), val = tensor("valid")]; + tensor var_2718_strides_0 = const()[name = tensor("op_2718_strides_0"), val = tensor([1, 1])]; + tensor var_2718_pad_0 = const()[name = tensor("op_2718_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2718_dilations_0 = const()[name = tensor("op_2718_dilations_0"), val = tensor([1, 1])]; + tensor var_2718_groups_0 = const()[name = tensor("op_2718_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130839936))), name = tensor("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130833536))), shape = tensor([768, 768, 1, 1])]; + tensor var_2718_cast_fp16 = conv(dilations = var_2718_dilations_0, groups = var_2718_groups_0, pad = var_2718_pad_0, pad_type = var_2718_pad_type_0, strides = var_2718_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = tensor("op_2718_cast_fp16")]; + tensor obj_119_cast_fp16 = add(x = var_2712_cast_fp16, y = var_2718_cast_fp16)[name = tensor("obj_119_cast_fp16")]; + tensor inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = obj_119_cast_fp16)[name = tensor("inputs_51_cast_fp16")]; + tensor out_51_axes_0 = const()[name = tensor("out_51_axes_0"), val = tensor([1])]; + tensor var_2733_to_fp16 = const()[name = tensor("op_2733_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_51_cast_fp16 = layer_norm(axes = out_51_axes_0, epsilon = var_2733_to_fp16, x = inputs_51_cast_fp16)[name = tensor("out_51_cast_fp16")]; + tensor obj_121_gamma_0_to_fp16 = const()[name = tensor("obj_121_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130913728)))]; + tensor obj_121_beta_0_to_fp16 = const()[name = tensor("obj_121_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130915328)))]; + tensor obj_121_epsilon_0_to_fp16 = const()[name = tensor("obj_121_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_121_cast_fp16 = batch_norm(beta = obj_121_beta_0_to_fp16, epsilon = obj_121_epsilon_0_to_fp16, gamma = obj_121_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_51_cast_fp16)[name = tensor("obj_121_cast_fp16")]; + tensor var_2755_pad_type_0 = const()[name = tensor("op_2755_pad_type_0"), val = tensor("valid")]; + tensor var_2755_strides_0 = const()[name = tensor("op_2755_strides_0"), val = tensor([1, 1])]; + tensor var_2755_pad_0 = const()[name = tensor("op_2755_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2755_dilations_0 = const()[name = tensor("op_2755_dilations_0"), val = tensor([1, 1])]; + tensor var_2755_groups_0 = const()[name = tensor("op_2755_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130916928))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131211904))), name = tensor("layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131212032)))]; + tensor var_2755_cast_fp16 = conv(bias = layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2755_dilations_0, groups = var_2755_groups_0, pad = var_2755_pad_0, pad_type = var_2755_pad_type_0, strides = var_2755_strides_0, weight = layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_121_cast_fp16)[name = tensor("op_2755_cast_fp16")]; + tensor var_2761_pad_type_0 = const()[name = tensor("op_2761_pad_type_0"), val = tensor("valid")]; + tensor var_2761_strides_0 = const()[name = tensor("op_2761_strides_0"), val = tensor([1, 1])]; + tensor var_2761_pad_0 = const()[name = tensor("op_2761_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2761_dilations_0 = const()[name = tensor("op_2761_dilations_0"), val = tensor([1, 1])]; + tensor var_2761_groups_0 = const()[name = tensor("op_2761_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131219264))), name = tensor("layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131213632))), shape = tensor([768, 768, 1, 1])]; + tensor var_2761_cast_fp16 = conv(dilations = var_2761_dilations_0, groups = var_2761_groups_0, pad = var_2761_pad_0, pad_type = var_2761_pad_type_0, strides = var_2761_strides_0, weight = layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_121_cast_fp16)[name = tensor("op_2761_cast_fp16")]; + tensor query_35_cast_fp16 = add(x = var_2755_cast_fp16, y = var_2761_cast_fp16)[name = tensor("query_35_cast_fp16")]; + tensor var_2770_pad_type_0 = const()[name = tensor("op_2770_pad_type_0"), val = tensor("valid")]; + tensor var_2770_strides_0 = const()[name = tensor("op_2770_strides_0"), val = tensor([1, 1])]; + tensor var_2770_pad_0 = const()[name = tensor("op_2770_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2770_dilations_0 = const()[name = tensor("op_2770_dilations_0"), val = tensor([1, 1])]; + tensor var_2770_groups_0 = const()[name = tensor("op_2770_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131293056))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131588032))), name = tensor("layers_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2770_cast_fp16 = conv(dilations = var_2770_dilations_0, groups = var_2770_groups_0, pad = var_2770_pad_0, pad_type = var_2770_pad_type_0, strides = var_2770_strides_0, weight = layers_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_2770_cast_fp16")]; + tensor var_2776_pad_type_0 = const()[name = tensor("op_2776_pad_type_0"), val = tensor("valid")]; + tensor var_2776_strides_0 = const()[name = tensor("op_2776_strides_0"), val = tensor([1, 1])]; + tensor var_2776_pad_0 = const()[name = tensor("op_2776_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2776_dilations_0 = const()[name = tensor("op_2776_dilations_0"), val = tensor([1, 1])]; + tensor var_2776_groups_0 = const()[name = tensor("op_2776_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131595968))), name = tensor("layers_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131588160))), shape = tensor([768, 768, 1, 1])]; + tensor var_2776_cast_fp16 = conv(dilations = var_2776_dilations_0, groups = var_2776_groups_0, pad = var_2776_pad_0, pad_type = var_2776_pad_type_0, strides = var_2776_strides_0, weight = layers_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_2776_cast_fp16")]; + tensor key_35_cast_fp16 = add(x = var_2770_cast_fp16, y = var_2776_cast_fp16)[name = tensor("key_35_cast_fp16")]; + tensor var_2786_pad_type_0 = const()[name = tensor("op_2786_pad_type_0"), val = tensor("valid")]; + tensor var_2786_strides_0 = const()[name = tensor("op_2786_strides_0"), val = tensor([1, 1])]; + tensor var_2786_pad_0 = const()[name = tensor("op_2786_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2786_dilations_0 = const()[name = tensor("op_2786_dilations_0"), val = tensor([1, 1])]; + tensor var_2786_groups_0 = const()[name = tensor("op_2786_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131669760))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131964736))), name = tensor("layers_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131964864)))]; + tensor var_2786_cast_fp16 = conv(bias = layers_8_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2786_dilations_0, groups = var_2786_groups_0, pad = var_2786_pad_0, pad_type = var_2786_pad_type_0, strides = var_2786_strides_0, weight = layers_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_2786_cast_fp16")]; + tensor var_2792_pad_type_0 = const()[name = tensor("op_2792_pad_type_0"), val = tensor("valid")]; + tensor var_2792_strides_0 = const()[name = tensor("op_2792_strides_0"), val = tensor([1, 1])]; + tensor var_2792_pad_0 = const()[name = tensor("op_2792_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2792_dilations_0 = const()[name = tensor("op_2792_dilations_0"), val = tensor([1, 1])]; + tensor var_2792_groups_0 = const()[name = tensor("op_2792_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131972160))), name = tensor("layers_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131966464))), shape = tensor([768, 768, 1, 1])]; + tensor var_2792_cast_fp16 = conv(dilations = var_2792_dilations_0, groups = var_2792_groups_0, pad = var_2792_pad_0, pad_type = var_2792_pad_type_0, strides = var_2792_strides_0, weight = layers_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_2792_cast_fp16")]; + tensor value_35_cast_fp16 = add(x = var_2786_cast_fp16, y = var_2792_cast_fp16)[name = tensor("value_35_cast_fp16")]; + tensor var_2796 = const()[name = tensor("op_2796"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_35_cast_fp16 = reshape(shape = var_2796, x = query_35_cast_fp16)[name = tensor("mh_q_35_cast_fp16")]; + tensor var_2798_to_fp16 = const()[name = tensor("op_2798_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2799_cast_fp16 = mul(x = mh_q_35_cast_fp16, y = var_2798_to_fp16)[name = tensor("op_2799_cast_fp16")]; + tensor var_2802 = const()[name = tensor("op_2802"), val = tensor([1, 12, 64, 1500])]; + tensor var_2803_cast_fp16 = reshape(shape = var_2802, x = key_35_cast_fp16)[name = tensor("op_2803_cast_fp16")]; + tensor mh_w_53_transpose_x_0 = const()[name = tensor("mh_w_53_transpose_x_0"), val = tensor(true)]; + tensor mh_w_53_transpose_y_0 = const()[name = tensor("mh_w_53_transpose_y_0"), val = tensor(false)]; + tensor mh_w_53_cast_fp16 = matmul(transpose_x = mh_w_53_transpose_x_0, transpose_y = mh_w_53_transpose_y_0, x = var_2799_cast_fp16, y = var_2803_cast_fp16)[name = tensor("mh_w_53_cast_fp16")]; + tensor obj_125_cast_fp16 = softmax(axis = var_2582, x = mh_w_53_cast_fp16)[name = tensor("obj_125_cast_fp16")]; + tensor var_2807 = const()[name = tensor("op_2807"), val = tensor([1, 12, 64, 1500])]; + tensor var_2808_cast_fp16 = reshape(shape = var_2807, x = value_35_cast_fp16)[name = tensor("op_2808_cast_fp16")]; + tensor attn_35_transpose_x_0 = const()[name = tensor("attn_35_transpose_x_0"), val = tensor(false)]; + tensor attn_35_transpose_y_0 = const()[name = tensor("attn_35_transpose_y_0"), val = tensor(true)]; + tensor attn_35_cast_fp16 = matmul(transpose_x = attn_35_transpose_x_0, transpose_y = attn_35_transpose_y_0, x = var_2808_cast_fp16, y = obj_125_cast_fp16)[name = tensor("attn_35_cast_fp16")]; + tensor var_2811 = const()[name = tensor("op_2811"), val = tensor([1, 768, 1, 1])]; + tensor input_83_cast_fp16 = reshape(shape = var_2811, x = attn_35_cast_fp16)[name = tensor("input_83_cast_fp16")]; + tensor var_2821_pad_type_0 = const()[name = tensor("op_2821_pad_type_0"), val = tensor("valid")]; + tensor var_2821_strides_0 = const()[name = tensor("op_2821_strides_0"), val = tensor([1, 1])]; + tensor var_2821_pad_0 = const()[name = tensor("op_2821_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2821_dilations_0 = const()[name = tensor("op_2821_dilations_0"), val = tensor([1, 1])]; + tensor var_2821_groups_0 = const()[name = tensor("op_2821_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132045952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132340928))), name = tensor("layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132341056)))]; + tensor var_2821_cast_fp16 = conv(bias = layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2821_dilations_0, groups = var_2821_groups_0, pad = var_2821_pad_0, pad_type = var_2821_pad_type_0, strides = var_2821_strides_0, weight = layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = tensor("op_2821_cast_fp16")]; + tensor var_2827_pad_type_0 = const()[name = tensor("op_2827_pad_type_0"), val = tensor("valid")]; + tensor var_2827_strides_0 = const()[name = tensor("op_2827_strides_0"), val = tensor([1, 1])]; + tensor var_2827_pad_0 = const()[name = tensor("op_2827_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2827_dilations_0 = const()[name = tensor("op_2827_dilations_0"), val = tensor([1, 1])]; + tensor var_2827_groups_0 = const()[name = tensor("op_2827_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132348096))), name = tensor("layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132342656))), shape = tensor([768, 768, 1, 1])]; + tensor var_2827_cast_fp16 = conv(dilations = var_2827_dilations_0, groups = var_2827_groups_0, pad = var_2827_pad_0, pad_type = var_2827_pad_type_0, strides = var_2827_strides_0, weight = layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = tensor("op_2827_cast_fp16")]; + tensor obj_123_cast_fp16 = add(x = var_2821_cast_fp16, y = var_2827_cast_fp16)[name = tensor("obj_123_cast_fp16")]; + tensor inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = obj_123_cast_fp16)[name = tensor("inputs_53_cast_fp16")]; + tensor out_53_axes_0 = const()[name = tensor("out_53_axes_0"), val = tensor([1])]; + tensor var_2841_to_fp16 = const()[name = tensor("op_2841_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_53_cast_fp16 = layer_norm(axes = out_53_axes_0, epsilon = var_2841_to_fp16, x = inputs_53_cast_fp16)[name = tensor("out_53_cast_fp16")]; + tensor input_85_gamma_0_to_fp16 = const()[name = tensor("input_85_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132421888)))]; + tensor input_85_beta_0_to_fp16 = const()[name = tensor("input_85_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132423488)))]; + tensor input_85_epsilon_0_to_fp16 = const()[name = tensor("input_85_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_85_cast_fp16 = batch_norm(beta = input_85_beta_0_to_fp16, epsilon = input_85_epsilon_0_to_fp16, gamma = input_85_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_53_cast_fp16)[name = tensor("input_85_cast_fp16")]; + tensor var_2859_pad_type_0 = const()[name = tensor("op_2859_pad_type_0"), val = tensor("valid")]; + tensor var_2859_strides_0 = const()[name = tensor("op_2859_strides_0"), val = tensor([1, 1])]; + tensor var_2859_pad_0 = const()[name = tensor("op_2859_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2859_dilations_0 = const()[name = tensor("op_2859_dilations_0"), val = tensor([1, 1])]; + tensor var_2859_groups_0 = const()[name = tensor("op_2859_groups_0"), val = tensor(1)]; + tensor layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132425088))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133604800))), name = tensor("layers_8_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133604928)))]; + tensor var_2859_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_2859_dilations_0, groups = var_2859_groups_0, pad = var_2859_pad_0, pad_type = var_2859_pad_type_0, strides = var_2859_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_85_cast_fp16)[name = tensor("op_2859_cast_fp16")]; + tensor var_2865_pad_type_0 = const()[name = tensor("op_2865_pad_type_0"), val = tensor("valid")]; + tensor var_2865_strides_0 = const()[name = tensor("op_2865_strides_0"), val = tensor([1, 1])]; + tensor var_2865_pad_0 = const()[name = tensor("op_2865_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2865_dilations_0 = const()[name = tensor("op_2865_dilations_0"), val = tensor([1, 1])]; + tensor var_2865_groups_0 = const()[name = tensor("op_2865_groups_0"), val = tensor(1)]; + tensor layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133629056))), name = tensor("layers_8_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133611136))), shape = tensor([3072, 768, 1, 1])]; + tensor var_2865_cast_fp16 = conv(dilations = var_2865_dilations_0, groups = var_2865_groups_0, pad = var_2865_pad_0, pad_type = var_2865_pad_type_0, strides = var_2865_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_85_cast_fp16)[name = tensor("op_2865_cast_fp16")]; + tensor input_87_cast_fp16 = add(x = var_2859_cast_fp16, y = var_2865_cast_fp16)[name = tensor("input_87_cast_fp16")]; + tensor input_89_mode_0 = const()[name = tensor("input_89_mode_0"), val = tensor("EXACT")]; + tensor input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = input_87_cast_fp16)[name = tensor("input_89_cast_fp16")]; + tensor var_2876_pad_type_0 = const()[name = tensor("op_2876_pad_type_0"), val = tensor("valid")]; + tensor var_2876_strides_0 = const()[name = tensor("op_2876_strides_0"), val = tensor([1, 1])]; + tensor var_2876_pad_0 = const()[name = tensor("op_2876_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2876_dilations_0 = const()[name = tensor("op_2876_dilations_0"), val = tensor([1, 1])]; + tensor var_2876_groups_0 = const()[name = tensor("op_2876_groups_0"), val = tensor(1)]; + tensor layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133924032))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135103744))), name = tensor("layers_8_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135103872)))]; + tensor var_2876_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_2876_dilations_0, groups = var_2876_groups_0, pad = var_2876_pad_0, pad_type = var_2876_pad_type_0, strides = var_2876_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = tensor("op_2876_cast_fp16")]; + tensor var_2882_pad_type_0 = const()[name = tensor("op_2882_pad_type_0"), val = tensor("valid")]; + tensor var_2882_strides_0 = const()[name = tensor("op_2882_strides_0"), val = tensor([1, 1])]; + tensor var_2882_pad_0 = const()[name = tensor("op_2882_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2882_dilations_0 = const()[name = tensor("op_2882_dilations_0"), val = tensor([1, 1])]; + tensor var_2882_groups_0 = const()[name = tensor("op_2882_groups_0"), val = tensor(1)]; + tensor layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135129600))), name = tensor("layers_8_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135105472))), shape = tensor([768, 3072, 1, 1])]; + tensor var_2882_cast_fp16 = conv(dilations = var_2882_dilations_0, groups = var_2882_groups_0, pad = var_2882_pad_0, pad_type = var_2882_pad_type_0, strides = var_2882_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = tensor("op_2882_cast_fp16")]; + tensor hidden_states_19_cast_fp16 = add(x = var_2876_cast_fp16, y = var_2882_cast_fp16)[name = tensor("hidden_states_19_cast_fp16")]; + tensor inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor("inputs_55_cast_fp16")]; + tensor var_2895 = const()[name = tensor("op_2895"), val = tensor(3)]; + tensor out_55_axes_0 = const()[name = tensor("out_55_axes_0"), val = tensor([1])]; + tensor var_2920_to_fp16 = const()[name = tensor("op_2920_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_2920_to_fp16, x = inputs_55_cast_fp16)[name = tensor("out_55_cast_fp16")]; + tensor obj_127_gamma_0_to_fp16 = const()[name = tensor("obj_127_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135424576)))]; + tensor obj_127_beta_0_to_fp16 = const()[name = tensor("obj_127_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135426176)))]; + tensor obj_127_epsilon_0_to_fp16 = const()[name = tensor("obj_127_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_127_cast_fp16 = batch_norm(beta = obj_127_beta_0_to_fp16, epsilon = obj_127_epsilon_0_to_fp16, gamma = obj_127_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_55_cast_fp16)[name = tensor("obj_127_cast_fp16")]; + tensor var_2942_pad_type_0 = const()[name = tensor("op_2942_pad_type_0"), val = tensor("valid")]; + tensor var_2942_strides_0 = const()[name = tensor("op_2942_strides_0"), val = tensor([1, 1])]; + tensor var_2942_pad_0 = const()[name = tensor("op_2942_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2942_dilations_0 = const()[name = tensor("op_2942_dilations_0"), val = tensor([1, 1])]; + tensor var_2942_groups_0 = const()[name = tensor("op_2942_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135427776))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135722752))), name = tensor("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135722880)))]; + tensor var_2942_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2942_dilations_0, groups = var_2942_groups_0, pad = var_2942_pad_0, pad_type = var_2942_pad_type_0, strides = var_2942_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_127_cast_fp16)[name = tensor("op_2942_cast_fp16")]; + tensor var_2948_pad_type_0 = const()[name = tensor("op_2948_pad_type_0"), val = tensor("valid")]; + tensor var_2948_strides_0 = const()[name = tensor("op_2948_strides_0"), val = tensor([1, 1])]; + tensor var_2948_pad_0 = const()[name = tensor("op_2948_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2948_dilations_0 = const()[name = tensor("op_2948_dilations_0"), val = tensor([1, 1])]; + tensor var_2948_groups_0 = const()[name = tensor("op_2948_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135729792))), name = tensor("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135724480))), shape = tensor([768, 768, 1, 1])]; + tensor var_2948_cast_fp16 = conv(dilations = var_2948_dilations_0, groups = var_2948_groups_0, pad = var_2948_pad_0, pad_type = var_2948_pad_type_0, strides = var_2948_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_127_cast_fp16)[name = tensor("op_2948_cast_fp16")]; + tensor query_37_cast_fp16 = add(x = var_2942_cast_fp16, y = var_2948_cast_fp16)[name = tensor("query_37_cast_fp16")]; + tensor var_2957_pad_type_0 = const()[name = tensor("op_2957_pad_type_0"), val = tensor("valid")]; + tensor var_2957_strides_0 = const()[name = tensor("op_2957_strides_0"), val = tensor([1, 1])]; + tensor var_2957_pad_0 = const()[name = tensor("op_2957_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2957_dilations_0 = const()[name = tensor("op_2957_dilations_0"), val = tensor([1, 1])]; + tensor var_2957_groups_0 = const()[name = tensor("op_2957_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135803584))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136098560))), name = tensor("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2957_cast_fp16 = conv(dilations = var_2957_dilations_0, groups = var_2957_groups_0, pad = var_2957_pad_0, pad_type = var_2957_pad_type_0, strides = var_2957_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_127_cast_fp16)[name = tensor("op_2957_cast_fp16")]; + tensor var_2963_pad_type_0 = const()[name = tensor("op_2963_pad_type_0"), val = tensor("valid")]; + tensor var_2963_strides_0 = const()[name = tensor("op_2963_strides_0"), val = tensor([1, 1])]; + tensor var_2963_pad_0 = const()[name = tensor("op_2963_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2963_dilations_0 = const()[name = tensor("op_2963_dilations_0"), val = tensor([1, 1])]; + tensor var_2963_groups_0 = const()[name = tensor("op_2963_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136104128))), name = tensor("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136098688))), shape = tensor([768, 768, 1, 1])]; + tensor var_2963_cast_fp16 = conv(dilations = var_2963_dilations_0, groups = var_2963_groups_0, pad = var_2963_pad_0, pad_type = var_2963_pad_type_0, strides = var_2963_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_127_cast_fp16)[name = tensor("op_2963_cast_fp16")]; + tensor current_key_19_cast_fp16 = add(x = var_2957_cast_fp16, y = var_2963_cast_fp16)[name = tensor("current_key_19_cast_fp16")]; + tensor var_2973_pad_type_0 = const()[name = tensor("op_2973_pad_type_0"), val = tensor("valid")]; + tensor var_2973_strides_0 = const()[name = tensor("op_2973_strides_0"), val = tensor([1, 1])]; + tensor var_2973_pad_0 = const()[name = tensor("op_2973_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2973_dilations_0 = const()[name = tensor("op_2973_dilations_0"), val = tensor([1, 1])]; + tensor var_2973_groups_0 = const()[name = tensor("op_2973_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136177920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136472896))), name = tensor("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136473024)))]; + tensor var_2973_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2973_dilations_0, groups = var_2973_groups_0, pad = var_2973_pad_0, pad_type = var_2973_pad_type_0, strides = var_2973_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_127_cast_fp16)[name = tensor("op_2973_cast_fp16")]; + tensor var_2979_pad_type_0 = const()[name = tensor("op_2979_pad_type_0"), val = tensor("valid")]; + tensor var_2979_strides_0 = const()[name = tensor("op_2979_strides_0"), val = tensor([1, 1])]; + tensor var_2979_pad_0 = const()[name = tensor("op_2979_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2979_dilations_0 = const()[name = tensor("op_2979_dilations_0"), val = tensor([1, 1])]; + tensor var_2979_groups_0 = const()[name = tensor("op_2979_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136481344))), name = tensor("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136474624))), shape = tensor([768, 768, 1, 1])]; + tensor var_2979_cast_fp16 = conv(dilations = var_2979_dilations_0, groups = var_2979_groups_0, pad = var_2979_pad_0, pad_type = var_2979_pad_type_0, strides = var_2979_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_127_cast_fp16)[name = tensor("op_2979_cast_fp16")]; + tensor current_value_19_cast_fp16 = add(x = var_2973_cast_fp16, y = var_2979_cast_fp16)[name = tensor("current_value_19_cast_fp16")]; + tensor var_2986_cast_fp16 = mul(x = var_69_cast_fp16_9, y = var_192_cast_fp16)[name = tensor("op_2986_cast_fp16")]; + tensor var_2987_cast_fp16 = mul(x = current_key_19_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2987_cast_fp16")]; + tensor key_37_cast_fp16 = add(x = var_2986_cast_fp16, y = var_2987_cast_fp16)[name = tensor("key_37_cast_fp16")]; + tensor var_2990_cast_fp16 = mul(x = var_84_cast_fp16_9, y = var_192_cast_fp16)[name = tensor("op_2990_cast_fp16")]; + tensor var_2991_cast_fp16 = mul(x = current_value_19_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2991_cast_fp16")]; + tensor value_37_cast_fp16 = add(x = var_2990_cast_fp16, y = var_2991_cast_fp16)[name = tensor("value_37_cast_fp16")]; + tensor var_2995 = const()[name = tensor("op_2995"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_37_cast_fp16 = reshape(shape = var_2995, x = query_37_cast_fp16)[name = tensor("mh_q_37_cast_fp16")]; + tensor var_2997_to_fp16 = const()[name = tensor("op_2997_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2998_cast_fp16 = mul(x = mh_q_37_cast_fp16, y = var_2997_to_fp16)[name = tensor("op_2998_cast_fp16")]; + tensor var_3001 = const()[name = tensor("op_3001"), val = tensor([1, 12, 64, 448])]; + tensor var_3002_cast_fp16 = reshape(shape = var_3001, x = key_37_cast_fp16)[name = tensor("op_3002_cast_fp16")]; + tensor mh_w_55_transpose_x_0 = const()[name = tensor("mh_w_55_transpose_x_0"), val = tensor(true)]; + tensor mh_w_55_transpose_y_0 = const()[name = tensor("mh_w_55_transpose_y_0"), val = tensor(false)]; + tensor mh_w_55_cast_fp16 = matmul(transpose_x = mh_w_55_transpose_x_0, transpose_y = mh_w_55_transpose_y_0, x = var_2998_cast_fp16, y = var_3002_cast_fp16)[name = tensor("mh_w_55_cast_fp16")]; + tensor mh_w_57_cast_fp16 = add(x = mh_w_55_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_57_cast_fp16")]; + tensor var_3010_cast_fp16 = softmax(axis = var_2895, x = mh_w_57_cast_fp16)[name = tensor("op_3010_cast_fp16")]; + tensor var_3011 = const()[name = tensor("op_3011"), val = tensor([1, 12, 64, 448])]; + tensor var_3012_cast_fp16 = reshape(shape = var_3011, x = value_37_cast_fp16)[name = tensor("op_3012_cast_fp16")]; + tensor attn_37_transpose_x_0 = const()[name = tensor("attn_37_transpose_x_0"), val = tensor(false)]; + tensor attn_37_transpose_y_0 = const()[name = tensor("attn_37_transpose_y_0"), val = tensor(true)]; + tensor attn_37_cast_fp16 = matmul(transpose_x = attn_37_transpose_x_0, transpose_y = attn_37_transpose_y_0, x = var_3012_cast_fp16, y = var_3010_cast_fp16)[name = tensor("attn_37_cast_fp16")]; + tensor var_3015 = const()[name = tensor("op_3015"), val = tensor([1, 768, 1, 1])]; + tensor input_91_cast_fp16 = reshape(shape = var_3015, x = attn_37_cast_fp16)[name = tensor("input_91_cast_fp16")]; + tensor var_3025_pad_type_0 = const()[name = tensor("op_3025_pad_type_0"), val = tensor("valid")]; + tensor var_3025_strides_0 = const()[name = tensor("op_3025_strides_0"), val = tensor([1, 1])]; + tensor var_3025_pad_0 = const()[name = tensor("op_3025_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3025_dilations_0 = const()[name = tensor("op_3025_dilations_0"), val = tensor([1, 1])]; + tensor var_3025_groups_0 = const()[name = tensor("op_3025_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136555136))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136850112))), name = tensor("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136850240)))]; + tensor var_3025_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3025_dilations_0, groups = var_3025_groups_0, pad = var_3025_pad_0, pad_type = var_3025_pad_type_0, strides = var_3025_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = tensor("op_3025_cast_fp16")]; + tensor var_3031_pad_type_0 = const()[name = tensor("op_3031_pad_type_0"), val = tensor("valid")]; + tensor var_3031_strides_0 = const()[name = tensor("op_3031_strides_0"), val = tensor([1, 1])]; + tensor var_3031_pad_0 = const()[name = tensor("op_3031_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3031_dilations_0 = const()[name = tensor("op_3031_dilations_0"), val = tensor([1, 1])]; + tensor var_3031_groups_0 = const()[name = tensor("op_3031_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136858432))), name = tensor("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136851840))), shape = tensor([768, 768, 1, 1])]; + tensor var_3031_cast_fp16 = conv(dilations = var_3031_dilations_0, groups = var_3031_groups_0, pad = var_3031_pad_0, pad_type = var_3031_pad_type_0, strides = var_3031_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = tensor("op_3031_cast_fp16")]; + tensor obj_133_cast_fp16 = add(x = var_3025_cast_fp16, y = var_3031_cast_fp16)[name = tensor("obj_133_cast_fp16")]; + tensor inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = obj_133_cast_fp16)[name = tensor("inputs_57_cast_fp16")]; + tensor out_57_axes_0 = const()[name = tensor("out_57_axes_0"), val = tensor([1])]; + tensor var_3046_to_fp16 = const()[name = tensor("op_3046_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_57_cast_fp16 = layer_norm(axes = out_57_axes_0, epsilon = var_3046_to_fp16, x = inputs_57_cast_fp16)[name = tensor("out_57_cast_fp16")]; + tensor obj_135_gamma_0_to_fp16 = const()[name = tensor("obj_135_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136932224)))]; + tensor obj_135_beta_0_to_fp16 = const()[name = tensor("obj_135_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136933824)))]; + tensor obj_135_epsilon_0_to_fp16 = const()[name = tensor("obj_135_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_135_cast_fp16 = batch_norm(beta = obj_135_beta_0_to_fp16, epsilon = obj_135_epsilon_0_to_fp16, gamma = obj_135_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_57_cast_fp16)[name = tensor("obj_135_cast_fp16")]; + tensor var_3068_pad_type_0 = const()[name = tensor("op_3068_pad_type_0"), val = tensor("valid")]; + tensor var_3068_strides_0 = const()[name = tensor("op_3068_strides_0"), val = tensor([1, 1])]; + tensor var_3068_pad_0 = const()[name = tensor("op_3068_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3068_dilations_0 = const()[name = tensor("op_3068_dilations_0"), val = tensor([1, 1])]; + tensor var_3068_groups_0 = const()[name = tensor("op_3068_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136935424))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137230400))), name = tensor("layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137230528)))]; + tensor var_3068_cast_fp16 = conv(bias = layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3068_dilations_0, groups = var_3068_groups_0, pad = var_3068_pad_0, pad_type = var_3068_pad_type_0, strides = var_3068_strides_0, weight = layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_135_cast_fp16)[name = tensor("op_3068_cast_fp16")]; + tensor var_3074_pad_type_0 = const()[name = tensor("op_3074_pad_type_0"), val = tensor("valid")]; + tensor var_3074_strides_0 = const()[name = tensor("op_3074_strides_0"), val = tensor([1, 1])]; + tensor var_3074_pad_0 = const()[name = tensor("op_3074_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3074_dilations_0 = const()[name = tensor("op_3074_dilations_0"), val = tensor([1, 1])]; + tensor var_3074_groups_0 = const()[name = tensor("op_3074_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137237248))), name = tensor("layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137232128))), shape = tensor([768, 768, 1, 1])]; + tensor var_3074_cast_fp16 = conv(dilations = var_3074_dilations_0, groups = var_3074_groups_0, pad = var_3074_pad_0, pad_type = var_3074_pad_type_0, strides = var_3074_strides_0, weight = layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_135_cast_fp16)[name = tensor("op_3074_cast_fp16")]; + tensor query_39_cast_fp16 = add(x = var_3068_cast_fp16, y = var_3074_cast_fp16)[name = tensor("query_39_cast_fp16")]; + tensor var_3083_pad_type_0 = const()[name = tensor("op_3083_pad_type_0"), val = tensor("valid")]; + tensor var_3083_strides_0 = const()[name = tensor("op_3083_strides_0"), val = tensor([1, 1])]; + tensor var_3083_pad_0 = const()[name = tensor("op_3083_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3083_dilations_0 = const()[name = tensor("op_3083_dilations_0"), val = tensor([1, 1])]; + tensor var_3083_groups_0 = const()[name = tensor("op_3083_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137311040))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137606016))), name = tensor("layers_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_3083_cast_fp16 = conv(dilations = var_3083_dilations_0, groups = var_3083_groups_0, pad = var_3083_pad_0, pad_type = var_3083_pad_type_0, strides = var_3083_strides_0, weight = layers_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_3083_cast_fp16")]; + tensor var_3089_pad_type_0 = const()[name = tensor("op_3089_pad_type_0"), val = tensor("valid")]; + tensor var_3089_strides_0 = const()[name = tensor("op_3089_strides_0"), val = tensor([1, 1])]; + tensor var_3089_pad_0 = const()[name = tensor("op_3089_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3089_dilations_0 = const()[name = tensor("op_3089_dilations_0"), val = tensor([1, 1])]; + tensor var_3089_groups_0 = const()[name = tensor("op_3089_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137612608))), name = tensor("layers_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137606144))), shape = tensor([768, 768, 1, 1])]; + tensor var_3089_cast_fp16 = conv(dilations = var_3089_dilations_0, groups = var_3089_groups_0, pad = var_3089_pad_0, pad_type = var_3089_pad_type_0, strides = var_3089_strides_0, weight = layers_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_3089_cast_fp16")]; + tensor key_39_cast_fp16 = add(x = var_3083_cast_fp16, y = var_3089_cast_fp16)[name = tensor("key_39_cast_fp16")]; + tensor var_3099_pad_type_0 = const()[name = tensor("op_3099_pad_type_0"), val = tensor("valid")]; + tensor var_3099_strides_0 = const()[name = tensor("op_3099_strides_0"), val = tensor([1, 1])]; + tensor var_3099_pad_0 = const()[name = tensor("op_3099_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3099_dilations_0 = const()[name = tensor("op_3099_dilations_0"), val = tensor([1, 1])]; + tensor var_3099_groups_0 = const()[name = tensor("op_3099_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137686400))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137981376))), name = tensor("layers_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137981504)))]; + tensor var_3099_cast_fp16 = conv(bias = layers_9_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3099_dilations_0, groups = var_3099_groups_0, pad = var_3099_pad_0, pad_type = var_3099_pad_type_0, strides = var_3099_strides_0, weight = layers_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_3099_cast_fp16")]; + tensor var_3105_pad_type_0 = const()[name = tensor("op_3105_pad_type_0"), val = tensor("valid")]; + tensor var_3105_strides_0 = const()[name = tensor("op_3105_strides_0"), val = tensor([1, 1])]; + tensor var_3105_pad_0 = const()[name = tensor("op_3105_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3105_dilations_0 = const()[name = tensor("op_3105_dilations_0"), val = tensor([1, 1])]; + tensor var_3105_groups_0 = const()[name = tensor("op_3105_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137988352))), name = tensor("layers_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137983104))), shape = tensor([768, 768, 1, 1])]; + tensor var_3105_cast_fp16 = conv(dilations = var_3105_dilations_0, groups = var_3105_groups_0, pad = var_3105_pad_0, pad_type = var_3105_pad_type_0, strides = var_3105_strides_0, weight = layers_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_3105_cast_fp16")]; + tensor value_39_cast_fp16 = add(x = var_3099_cast_fp16, y = var_3105_cast_fp16)[name = tensor("value_39_cast_fp16")]; + tensor var_3109 = const()[name = tensor("op_3109"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_39_cast_fp16 = reshape(shape = var_3109, x = query_39_cast_fp16)[name = tensor("mh_q_39_cast_fp16")]; + tensor var_3111_to_fp16 = const()[name = tensor("op_3111_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3112_cast_fp16 = mul(x = mh_q_39_cast_fp16, y = var_3111_to_fp16)[name = tensor("op_3112_cast_fp16")]; + tensor var_3115 = const()[name = tensor("op_3115"), val = tensor([1, 12, 64, 1500])]; + tensor var_3116_cast_fp16 = reshape(shape = var_3115, x = key_39_cast_fp16)[name = tensor("op_3116_cast_fp16")]; + tensor mh_w_59_transpose_x_0 = const()[name = tensor("mh_w_59_transpose_x_0"), val = tensor(true)]; + tensor mh_w_59_transpose_y_0 = const()[name = tensor("mh_w_59_transpose_y_0"), val = tensor(false)]; + tensor mh_w_59_cast_fp16 = matmul(transpose_x = mh_w_59_transpose_x_0, transpose_y = mh_w_59_transpose_y_0, x = var_3112_cast_fp16, y = var_3116_cast_fp16)[name = tensor("mh_w_59_cast_fp16")]; + tensor obj_139_cast_fp16 = softmax(axis = var_2895, x = mh_w_59_cast_fp16)[name = tensor("obj_139_cast_fp16")]; + tensor var_3120 = const()[name = tensor("op_3120"), val = tensor([1, 12, 64, 1500])]; + tensor var_3121_cast_fp16 = reshape(shape = var_3120, x = value_39_cast_fp16)[name = tensor("op_3121_cast_fp16")]; + tensor attn_39_transpose_x_0 = const()[name = tensor("attn_39_transpose_x_0"), val = tensor(false)]; + tensor attn_39_transpose_y_0 = const()[name = tensor("attn_39_transpose_y_0"), val = tensor(true)]; + tensor attn_39_cast_fp16 = matmul(transpose_x = attn_39_transpose_x_0, transpose_y = attn_39_transpose_y_0, x = var_3121_cast_fp16, y = obj_139_cast_fp16)[name = tensor("attn_39_cast_fp16")]; + tensor var_3124 = const()[name = tensor("op_3124"), val = tensor([1, 768, 1, 1])]; + tensor input_93_cast_fp16 = reshape(shape = var_3124, x = attn_39_cast_fp16)[name = tensor("input_93_cast_fp16")]; + tensor var_3134_pad_type_0 = const()[name = tensor("op_3134_pad_type_0"), val = tensor("valid")]; + tensor var_3134_strides_0 = const()[name = tensor("op_3134_strides_0"), val = tensor([1, 1])]; + tensor var_3134_pad_0 = const()[name = tensor("op_3134_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3134_dilations_0 = const()[name = tensor("op_3134_dilations_0"), val = tensor([1, 1])]; + tensor var_3134_groups_0 = const()[name = tensor("op_3134_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138062144))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138357120))), name = tensor("layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138357248)))]; + tensor var_3134_cast_fp16 = conv(bias = layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3134_dilations_0, groups = var_3134_groups_0, pad = var_3134_pad_0, pad_type = var_3134_pad_type_0, strides = var_3134_strides_0, weight = layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_93_cast_fp16)[name = tensor("op_3134_cast_fp16")]; + tensor var_3140_pad_type_0 = const()[name = tensor("op_3140_pad_type_0"), val = tensor("valid")]; + tensor var_3140_strides_0 = const()[name = tensor("op_3140_strides_0"), val = tensor([1, 1])]; + tensor var_3140_pad_0 = const()[name = tensor("op_3140_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3140_dilations_0 = const()[name = tensor("op_3140_dilations_0"), val = tensor([1, 1])]; + tensor var_3140_groups_0 = const()[name = tensor("op_3140_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138364288))), name = tensor("layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138358848))), shape = tensor([768, 768, 1, 1])]; + tensor var_3140_cast_fp16 = conv(dilations = var_3140_dilations_0, groups = var_3140_groups_0, pad = var_3140_pad_0, pad_type = var_3140_pad_type_0, strides = var_3140_strides_0, weight = layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_93_cast_fp16)[name = tensor("op_3140_cast_fp16")]; + tensor obj_137_cast_fp16 = add(x = var_3134_cast_fp16, y = var_3140_cast_fp16)[name = tensor("obj_137_cast_fp16")]; + tensor inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = obj_137_cast_fp16)[name = tensor("inputs_59_cast_fp16")]; + tensor out_59_axes_0 = const()[name = tensor("out_59_axes_0"), val = tensor([1])]; + tensor var_3154_to_fp16 = const()[name = tensor("op_3154_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_59_cast_fp16 = layer_norm(axes = out_59_axes_0, epsilon = var_3154_to_fp16, x = inputs_59_cast_fp16)[name = tensor("out_59_cast_fp16")]; + tensor input_95_gamma_0_to_fp16 = const()[name = tensor("input_95_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138438080)))]; + tensor input_95_beta_0_to_fp16 = const()[name = tensor("input_95_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138439680)))]; + tensor input_95_epsilon_0_to_fp16 = const()[name = tensor("input_95_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_95_cast_fp16 = batch_norm(beta = input_95_beta_0_to_fp16, epsilon = input_95_epsilon_0_to_fp16, gamma = input_95_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_59_cast_fp16)[name = tensor("input_95_cast_fp16")]; + tensor var_3172_pad_type_0 = const()[name = tensor("op_3172_pad_type_0"), val = tensor("valid")]; + tensor var_3172_strides_0 = const()[name = tensor("op_3172_strides_0"), val = tensor([1, 1])]; + tensor var_3172_pad_0 = const()[name = tensor("op_3172_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3172_dilations_0 = const()[name = tensor("op_3172_dilations_0"), val = tensor([1, 1])]; + tensor var_3172_groups_0 = const()[name = tensor("op_3172_groups_0"), val = tensor(1)]; + tensor layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138441280))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139620992))), name = tensor("layers_9_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139621120)))]; + tensor var_3172_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_3172_dilations_0, groups = var_3172_groups_0, pad = var_3172_pad_0, pad_type = var_3172_pad_type_0, strides = var_3172_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = tensor("op_3172_cast_fp16")]; + tensor var_3178_pad_type_0 = const()[name = tensor("op_3178_pad_type_0"), val = tensor("valid")]; + tensor var_3178_strides_0 = const()[name = tensor("op_3178_strides_0"), val = tensor([1, 1])]; + tensor var_3178_pad_0 = const()[name = tensor("op_3178_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3178_dilations_0 = const()[name = tensor("op_3178_dilations_0"), val = tensor([1, 1])]; + tensor var_3178_groups_0 = const()[name = tensor("op_3178_groups_0"), val = tensor(1)]; + tensor layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139645056))), name = tensor("layers_9_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139627328))), shape = tensor([3072, 768, 1, 1])]; + tensor var_3178_cast_fp16 = conv(dilations = var_3178_dilations_0, groups = var_3178_groups_0, pad = var_3178_pad_0, pad_type = var_3178_pad_type_0, strides = var_3178_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_95_cast_fp16)[name = tensor("op_3178_cast_fp16")]; + tensor input_97_cast_fp16 = add(x = var_3172_cast_fp16, y = var_3178_cast_fp16)[name = tensor("input_97_cast_fp16")]; + tensor input_99_mode_0 = const()[name = tensor("input_99_mode_0"), val = tensor("EXACT")]; + tensor input_99_cast_fp16 = gelu(mode = input_99_mode_0, x = input_97_cast_fp16)[name = tensor("input_99_cast_fp16")]; + tensor var_3189_pad_type_0 = const()[name = tensor("op_3189_pad_type_0"), val = tensor("valid")]; + tensor var_3189_strides_0 = const()[name = tensor("op_3189_strides_0"), val = tensor([1, 1])]; + tensor var_3189_pad_0 = const()[name = tensor("op_3189_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3189_dilations_0 = const()[name = tensor("op_3189_dilations_0"), val = tensor([1, 1])]; + tensor var_3189_groups_0 = const()[name = tensor("op_3189_groups_0"), val = tensor(1)]; + tensor layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139940032))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141119744))), name = tensor("layers_9_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141119872)))]; + tensor var_3189_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_3189_dilations_0, groups = var_3189_groups_0, pad = var_3189_pad_0, pad_type = var_3189_pad_type_0, strides = var_3189_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = tensor("op_3189_cast_fp16")]; + tensor var_3195_pad_type_0 = const()[name = tensor("op_3195_pad_type_0"), val = tensor("valid")]; + tensor var_3195_strides_0 = const()[name = tensor("op_3195_strides_0"), val = tensor([1, 1])]; + tensor var_3195_pad_0 = const()[name = tensor("op_3195_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3195_dilations_0 = const()[name = tensor("op_3195_dilations_0"), val = tensor([1, 1])]; + tensor var_3195_groups_0 = const()[name = tensor("op_3195_groups_0"), val = tensor(1)]; + tensor layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141147328))), name = tensor("layers_9_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141121472))), shape = tensor([768, 3072, 1, 1])]; + tensor var_3195_cast_fp16 = conv(dilations = var_3195_dilations_0, groups = var_3195_groups_0, pad = var_3195_pad_0, pad_type = var_3195_pad_type_0, strides = var_3195_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_99_cast_fp16)[name = tensor("op_3195_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = var_3189_cast_fp16, y = var_3195_cast_fp16)[name = tensor("hidden_states_21_cast_fp16")]; + tensor inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor("inputs_61_cast_fp16")]; + tensor var_3208 = const()[name = tensor("op_3208"), val = tensor(3)]; + tensor out_61_axes_0 = const()[name = tensor("out_61_axes_0"), val = tensor([1])]; + tensor var_3233_to_fp16 = const()[name = tensor("op_3233_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_3233_to_fp16, x = inputs_61_cast_fp16)[name = tensor("out_61_cast_fp16")]; + tensor obj_141_gamma_0_to_fp16 = const()[name = tensor("obj_141_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141442304)))]; + tensor obj_141_beta_0_to_fp16 = const()[name = tensor("obj_141_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141443904)))]; + tensor obj_141_epsilon_0_to_fp16 = const()[name = tensor("obj_141_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_141_cast_fp16 = batch_norm(beta = obj_141_beta_0_to_fp16, epsilon = obj_141_epsilon_0_to_fp16, gamma = obj_141_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_61_cast_fp16)[name = tensor("obj_141_cast_fp16")]; + tensor var_3255_pad_type_0 = const()[name = tensor("op_3255_pad_type_0"), val = tensor("valid")]; + tensor var_3255_strides_0 = const()[name = tensor("op_3255_strides_0"), val = tensor([1, 1])]; + tensor var_3255_pad_0 = const()[name = tensor("op_3255_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3255_dilations_0 = const()[name = tensor("op_3255_dilations_0"), val = tensor([1, 1])]; + tensor var_3255_groups_0 = const()[name = tensor("op_3255_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141445504))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141740480))), name = tensor("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141740608)))]; + tensor var_3255_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3255_dilations_0, groups = var_3255_groups_0, pad = var_3255_pad_0, pad_type = var_3255_pad_type_0, strides = var_3255_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_141_cast_fp16)[name = tensor("op_3255_cast_fp16")]; + tensor var_3261_pad_type_0 = const()[name = tensor("op_3261_pad_type_0"), val = tensor("valid")]; + tensor var_3261_strides_0 = const()[name = tensor("op_3261_strides_0"), val = tensor([1, 1])]; + tensor var_3261_pad_0 = const()[name = tensor("op_3261_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3261_dilations_0 = const()[name = tensor("op_3261_dilations_0"), val = tensor([1, 1])]; + tensor var_3261_groups_0 = const()[name = tensor("op_3261_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141747776))), name = tensor("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141742208))), shape = tensor([768, 768, 1, 1])]; + tensor var_3261_cast_fp16 = conv(dilations = var_3261_dilations_0, groups = var_3261_groups_0, pad = var_3261_pad_0, pad_type = var_3261_pad_type_0, strides = var_3261_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_141_cast_fp16)[name = tensor("op_3261_cast_fp16")]; + tensor query_41_cast_fp16 = add(x = var_3255_cast_fp16, y = var_3261_cast_fp16)[name = tensor("query_41_cast_fp16")]; + tensor var_3270_pad_type_0 = const()[name = tensor("op_3270_pad_type_0"), val = tensor("valid")]; + tensor var_3270_strides_0 = const()[name = tensor("op_3270_strides_0"), val = tensor([1, 1])]; + tensor var_3270_pad_0 = const()[name = tensor("op_3270_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3270_dilations_0 = const()[name = tensor("op_3270_dilations_0"), val = tensor([1, 1])]; + tensor var_3270_groups_0 = const()[name = tensor("op_3270_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141821568))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142116544))), name = tensor("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_3270_cast_fp16 = conv(dilations = var_3270_dilations_0, groups = var_3270_groups_0, pad = var_3270_pad_0, pad_type = var_3270_pad_type_0, strides = var_3270_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_141_cast_fp16)[name = tensor("op_3270_cast_fp16")]; + tensor var_3276_pad_type_0 = const()[name = tensor("op_3276_pad_type_0"), val = tensor("valid")]; + tensor var_3276_strides_0 = const()[name = tensor("op_3276_strides_0"), val = tensor([1, 1])]; + tensor var_3276_pad_0 = const()[name = tensor("op_3276_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3276_dilations_0 = const()[name = tensor("op_3276_dilations_0"), val = tensor([1, 1])]; + tensor var_3276_groups_0 = const()[name = tensor("op_3276_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142122496))), name = tensor("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142116672))), shape = tensor([768, 768, 1, 1])]; + tensor var_3276_cast_fp16 = conv(dilations = var_3276_dilations_0, groups = var_3276_groups_0, pad = var_3276_pad_0, pad_type = var_3276_pad_type_0, strides = var_3276_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_141_cast_fp16)[name = tensor("op_3276_cast_fp16")]; + tensor current_key_21_cast_fp16 = add(x = var_3270_cast_fp16, y = var_3276_cast_fp16)[name = tensor("current_key_21_cast_fp16")]; + tensor var_3286_pad_type_0 = const()[name = tensor("op_3286_pad_type_0"), val = tensor("valid")]; + tensor var_3286_strides_0 = const()[name = tensor("op_3286_strides_0"), val = tensor([1, 1])]; + tensor var_3286_pad_0 = const()[name = tensor("op_3286_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3286_dilations_0 = const()[name = tensor("op_3286_dilations_0"), val = tensor([1, 1])]; + tensor var_3286_groups_0 = const()[name = tensor("op_3286_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142196288))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142491264))), name = tensor("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142491392)))]; + tensor var_3286_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3286_dilations_0, groups = var_3286_groups_0, pad = var_3286_pad_0, pad_type = var_3286_pad_type_0, strides = var_3286_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_141_cast_fp16)[name = tensor("op_3286_cast_fp16")]; + tensor var_3292_pad_type_0 = const()[name = tensor("op_3292_pad_type_0"), val = tensor("valid")]; + tensor var_3292_strides_0 = const()[name = tensor("op_3292_strides_0"), val = tensor([1, 1])]; + tensor var_3292_pad_0 = const()[name = tensor("op_3292_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3292_dilations_0 = const()[name = tensor("op_3292_dilations_0"), val = tensor([1, 1])]; + tensor var_3292_groups_0 = const()[name = tensor("op_3292_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142501056))), name = tensor("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142492992))), shape = tensor([768, 768, 1, 1])]; + tensor var_3292_cast_fp16 = conv(dilations = var_3292_dilations_0, groups = var_3292_groups_0, pad = var_3292_pad_0, pad_type = var_3292_pad_type_0, strides = var_3292_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_141_cast_fp16)[name = tensor("op_3292_cast_fp16")]; + tensor current_value_21_cast_fp16 = add(x = var_3286_cast_fp16, y = var_3292_cast_fp16)[name = tensor("current_value_21_cast_fp16")]; + tensor var_3299_cast_fp16 = mul(x = var_69_cast_fp16_10, y = var_192_cast_fp16)[name = tensor("op_3299_cast_fp16")]; + tensor var_3300_cast_fp16 = mul(x = current_key_21_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_3300_cast_fp16")]; + tensor key_41_cast_fp16 = add(x = var_3299_cast_fp16, y = var_3300_cast_fp16)[name = tensor("key_41_cast_fp16")]; + tensor var_3303_cast_fp16 = mul(x = var_84_cast_fp16_10, y = var_192_cast_fp16)[name = tensor("op_3303_cast_fp16")]; + tensor var_3304_cast_fp16 = mul(x = current_value_21_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_3304_cast_fp16")]; + tensor value_41_cast_fp16 = add(x = var_3303_cast_fp16, y = var_3304_cast_fp16)[name = tensor("value_41_cast_fp16")]; + tensor var_3308 = const()[name = tensor("op_3308"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_41_cast_fp16 = reshape(shape = var_3308, x = query_41_cast_fp16)[name = tensor("mh_q_41_cast_fp16")]; + tensor var_3310_to_fp16 = const()[name = tensor("op_3310_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3311_cast_fp16 = mul(x = mh_q_41_cast_fp16, y = var_3310_to_fp16)[name = tensor("op_3311_cast_fp16")]; + tensor var_3314 = const()[name = tensor("op_3314"), val = tensor([1, 12, 64, 448])]; + tensor var_3315_cast_fp16 = reshape(shape = var_3314, x = key_41_cast_fp16)[name = tensor("op_3315_cast_fp16")]; + tensor mh_w_61_transpose_x_0 = const()[name = tensor("mh_w_61_transpose_x_0"), val = tensor(true)]; + tensor mh_w_61_transpose_y_0 = const()[name = tensor("mh_w_61_transpose_y_0"), val = tensor(false)]; + tensor mh_w_61_cast_fp16 = matmul(transpose_x = mh_w_61_transpose_x_0, transpose_y = mh_w_61_transpose_y_0, x = var_3311_cast_fp16, y = var_3315_cast_fp16)[name = tensor("mh_w_61_cast_fp16")]; + tensor mh_w_63_cast_fp16 = add(x = mh_w_61_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_63_cast_fp16")]; + tensor var_3323_cast_fp16 = softmax(axis = var_3208, x = mh_w_63_cast_fp16)[name = tensor("op_3323_cast_fp16")]; + tensor var_3324 = const()[name = tensor("op_3324"), val = tensor([1, 12, 64, 448])]; + tensor var_3325_cast_fp16 = reshape(shape = var_3324, x = value_41_cast_fp16)[name = tensor("op_3325_cast_fp16")]; + tensor attn_41_transpose_x_0 = const()[name = tensor("attn_41_transpose_x_0"), val = tensor(false)]; + tensor attn_41_transpose_y_0 = const()[name = tensor("attn_41_transpose_y_0"), val = tensor(true)]; + tensor attn_41_cast_fp16 = matmul(transpose_x = attn_41_transpose_x_0, transpose_y = attn_41_transpose_y_0, x = var_3325_cast_fp16, y = var_3323_cast_fp16)[name = tensor("attn_41_cast_fp16")]; + tensor var_3328 = const()[name = tensor("op_3328"), val = tensor([1, 768, 1, 1])]; + tensor input_101_cast_fp16 = reshape(shape = var_3328, x = attn_41_cast_fp16)[name = tensor("input_101_cast_fp16")]; + tensor var_3338_pad_type_0 = const()[name = tensor("op_3338_pad_type_0"), val = tensor("valid")]; + tensor var_3338_strides_0 = const()[name = tensor("op_3338_strides_0"), val = tensor([1, 1])]; + tensor var_3338_pad_0 = const()[name = tensor("op_3338_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3338_dilations_0 = const()[name = tensor("op_3338_dilations_0"), val = tensor([1, 1])]; + tensor var_3338_groups_0 = const()[name = tensor("op_3338_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142574848))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142869824))), name = tensor("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142869952)))]; + tensor var_3338_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3338_dilations_0, groups = var_3338_groups_0, pad = var_3338_pad_0, pad_type = var_3338_pad_type_0, strides = var_3338_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_101_cast_fp16)[name = tensor("op_3338_cast_fp16")]; + tensor var_3344_pad_type_0 = const()[name = tensor("op_3344_pad_type_0"), val = tensor("valid")]; + tensor var_3344_strides_0 = const()[name = tensor("op_3344_strides_0"), val = tensor([1, 1])]; + tensor var_3344_pad_0 = const()[name = tensor("op_3344_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3344_dilations_0 = const()[name = tensor("op_3344_dilations_0"), val = tensor([1, 1])]; + tensor var_3344_groups_0 = const()[name = tensor("op_3344_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142879552))), name = tensor("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142871552))), shape = tensor([768, 768, 1, 1])]; + tensor var_3344_cast_fp16 = conv(dilations = var_3344_dilations_0, groups = var_3344_groups_0, pad = var_3344_pad_0, pad_type = var_3344_pad_type_0, strides = var_3344_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_101_cast_fp16)[name = tensor("op_3344_cast_fp16")]; + tensor obj_147_cast_fp16 = add(x = var_3338_cast_fp16, y = var_3344_cast_fp16)[name = tensor("obj_147_cast_fp16")]; + tensor inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = obj_147_cast_fp16)[name = tensor("inputs_63_cast_fp16")]; + tensor out_63_axes_0 = const()[name = tensor("out_63_axes_0"), val = tensor([1])]; + tensor var_3359_to_fp16 = const()[name = tensor("op_3359_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_63_cast_fp16 = layer_norm(axes = out_63_axes_0, epsilon = var_3359_to_fp16, x = inputs_63_cast_fp16)[name = tensor("out_63_cast_fp16")]; + tensor obj_149_gamma_0_to_fp16 = const()[name = tensor("obj_149_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142953344)))]; + tensor obj_149_beta_0_to_fp16 = const()[name = tensor("obj_149_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142954944)))]; + tensor obj_149_epsilon_0_to_fp16 = const()[name = tensor("obj_149_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_149_cast_fp16 = batch_norm(beta = obj_149_beta_0_to_fp16, epsilon = obj_149_epsilon_0_to_fp16, gamma = obj_149_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_63_cast_fp16)[name = tensor("obj_149_cast_fp16")]; + tensor var_3381_pad_type_0 = const()[name = tensor("op_3381_pad_type_0"), val = tensor("valid")]; + tensor var_3381_strides_0 = const()[name = tensor("op_3381_strides_0"), val = tensor([1, 1])]; + tensor var_3381_pad_0 = const()[name = tensor("op_3381_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3381_dilations_0 = const()[name = tensor("op_3381_dilations_0"), val = tensor([1, 1])]; + tensor var_3381_groups_0 = const()[name = tensor("op_3381_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142956544))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143251520))), name = tensor("layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143251648)))]; + tensor var_3381_cast_fp16 = conv(bias = layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3381_dilations_0, groups = var_3381_groups_0, pad = var_3381_pad_0, pad_type = var_3381_pad_type_0, strides = var_3381_strides_0, weight = layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_149_cast_fp16)[name = tensor("op_3381_cast_fp16")]; + tensor var_3387_pad_type_0 = const()[name = tensor("op_3387_pad_type_0"), val = tensor("valid")]; + tensor var_3387_strides_0 = const()[name = tensor("op_3387_strides_0"), val = tensor([1, 1])]; + tensor var_3387_pad_0 = const()[name = tensor("op_3387_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3387_dilations_0 = const()[name = tensor("op_3387_dilations_0"), val = tensor([1, 1])]; + tensor var_3387_groups_0 = const()[name = tensor("op_3387_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143259072))), name = tensor("layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143253248))), shape = tensor([768, 768, 1, 1])]; + tensor var_3387_cast_fp16 = conv(dilations = var_3387_dilations_0, groups = var_3387_groups_0, pad = var_3387_pad_0, pad_type = var_3387_pad_type_0, strides = var_3387_strides_0, weight = layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_149_cast_fp16)[name = tensor("op_3387_cast_fp16")]; + tensor query_43_cast_fp16 = add(x = var_3381_cast_fp16, y = var_3387_cast_fp16)[name = tensor("query_43_cast_fp16")]; + tensor var_3396_pad_type_0 = const()[name = tensor("op_3396_pad_type_0"), val = tensor("valid")]; + tensor var_3396_strides_0 = const()[name = tensor("op_3396_strides_0"), val = tensor([1, 1])]; + tensor var_3396_pad_0 = const()[name = tensor("op_3396_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3396_dilations_0 = const()[name = tensor("op_3396_dilations_0"), val = tensor([1, 1])]; + tensor var_3396_groups_0 = const()[name = tensor("op_3396_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143332864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143627840))), name = tensor("layers_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_3396_cast_fp16 = conv(dilations = var_3396_dilations_0, groups = var_3396_groups_0, pad = var_3396_pad_0, pad_type = var_3396_pad_type_0, strides = var_3396_strides_0, weight = layers_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_3396_cast_fp16")]; + tensor var_3402_pad_type_0 = const()[name = tensor("op_3402_pad_type_0"), val = tensor("valid")]; + tensor var_3402_strides_0 = const()[name = tensor("op_3402_strides_0"), val = tensor([1, 1])]; + tensor var_3402_pad_0 = const()[name = tensor("op_3402_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3402_dilations_0 = const()[name = tensor("op_3402_dilations_0"), val = tensor([1, 1])]; + tensor var_3402_groups_0 = const()[name = tensor("op_3402_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143635328))), name = tensor("layers_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143627968))), shape = tensor([768, 768, 1, 1])]; + tensor var_3402_cast_fp16 = conv(dilations = var_3402_dilations_0, groups = var_3402_groups_0, pad = var_3402_pad_0, pad_type = var_3402_pad_type_0, strides = var_3402_strides_0, weight = layers_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_3402_cast_fp16")]; + tensor key_43_cast_fp16 = add(x = var_3396_cast_fp16, y = var_3402_cast_fp16)[name = tensor("key_43_cast_fp16")]; + tensor var_3412_pad_type_0 = const()[name = tensor("op_3412_pad_type_0"), val = tensor("valid")]; + tensor var_3412_strides_0 = const()[name = tensor("op_3412_strides_0"), val = tensor([1, 1])]; + tensor var_3412_pad_0 = const()[name = tensor("op_3412_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3412_dilations_0 = const()[name = tensor("op_3412_dilations_0"), val = tensor([1, 1])]; + tensor var_3412_groups_0 = const()[name = tensor("op_3412_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143709120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144004096))), name = tensor("layers_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144004224)))]; + tensor var_3412_cast_fp16 = conv(bias = layers_10_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3412_dilations_0, groups = var_3412_groups_0, pad = var_3412_pad_0, pad_type = var_3412_pad_type_0, strides = var_3412_strides_0, weight = layers_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_3412_cast_fp16")]; + tensor var_3418_pad_type_0 = const()[name = tensor("op_3418_pad_type_0"), val = tensor("valid")]; + tensor var_3418_strides_0 = const()[name = tensor("op_3418_strides_0"), val = tensor([1, 1])]; + tensor var_3418_pad_0 = const()[name = tensor("op_3418_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3418_dilations_0 = const()[name = tensor("op_3418_dilations_0"), val = tensor([1, 1])]; + tensor var_3418_groups_0 = const()[name = tensor("op_3418_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144015168))), name = tensor("layers_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144005824))), shape = tensor([768, 768, 1, 1])]; + tensor var_3418_cast_fp16 = conv(dilations = var_3418_dilations_0, groups = var_3418_groups_0, pad = var_3418_pad_0, pad_type = var_3418_pad_type_0, strides = var_3418_strides_0, weight = layers_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_3418_cast_fp16")]; + tensor value_43_cast_fp16 = add(x = var_3412_cast_fp16, y = var_3418_cast_fp16)[name = tensor("value_43_cast_fp16")]; + tensor var_3422 = const()[name = tensor("op_3422"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_43_cast_fp16 = reshape(shape = var_3422, x = query_43_cast_fp16)[name = tensor("mh_q_43_cast_fp16")]; + tensor var_3424_to_fp16 = const()[name = tensor("op_3424_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3425_cast_fp16 = mul(x = mh_q_43_cast_fp16, y = var_3424_to_fp16)[name = tensor("op_3425_cast_fp16")]; + tensor var_3428 = const()[name = tensor("op_3428"), val = tensor([1, 12, 64, 1500])]; + tensor var_3429_cast_fp16 = reshape(shape = var_3428, x = key_43_cast_fp16)[name = tensor("op_3429_cast_fp16")]; + tensor mh_w_65_transpose_x_0 = const()[name = tensor("mh_w_65_transpose_x_0"), val = tensor(true)]; + tensor mh_w_65_transpose_y_0 = const()[name = tensor("mh_w_65_transpose_y_0"), val = tensor(false)]; + tensor mh_w_65_cast_fp16 = matmul(transpose_x = mh_w_65_transpose_x_0, transpose_y = mh_w_65_transpose_y_0, x = var_3425_cast_fp16, y = var_3429_cast_fp16)[name = tensor("mh_w_65_cast_fp16")]; + tensor obj_153_cast_fp16 = softmax(axis = var_3208, x = mh_w_65_cast_fp16)[name = tensor("obj_153_cast_fp16")]; + tensor var_3433 = const()[name = tensor("op_3433"), val = tensor([1, 12, 64, 1500])]; + tensor var_3434_cast_fp16 = reshape(shape = var_3433, x = value_43_cast_fp16)[name = tensor("op_3434_cast_fp16")]; + tensor attn_43_transpose_x_0 = const()[name = tensor("attn_43_transpose_x_0"), val = tensor(false)]; + tensor attn_43_transpose_y_0 = const()[name = tensor("attn_43_transpose_y_0"), val = tensor(true)]; + tensor attn_43_cast_fp16 = matmul(transpose_x = attn_43_transpose_x_0, transpose_y = attn_43_transpose_y_0, x = var_3434_cast_fp16, y = obj_153_cast_fp16)[name = tensor("attn_43_cast_fp16")]; + tensor var_3437 = const()[name = tensor("op_3437"), val = tensor([1, 768, 1, 1])]; + tensor input_103_cast_fp16 = reshape(shape = var_3437, x = attn_43_cast_fp16)[name = tensor("input_103_cast_fp16")]; + tensor var_3447_pad_type_0 = const()[name = tensor("op_3447_pad_type_0"), val = tensor("valid")]; + tensor var_3447_strides_0 = const()[name = tensor("op_3447_strides_0"), val = tensor([1, 1])]; + tensor var_3447_pad_0 = const()[name = tensor("op_3447_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3447_dilations_0 = const()[name = tensor("op_3447_dilations_0"), val = tensor([1, 1])]; + tensor var_3447_groups_0 = const()[name = tensor("op_3447_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144088960))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144383936))), name = tensor("layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144384064)))]; + tensor var_3447_cast_fp16 = conv(bias = layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3447_dilations_0, groups = var_3447_groups_0, pad = var_3447_pad_0, pad_type = var_3447_pad_type_0, strides = var_3447_strides_0, weight = layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = tensor("op_3447_cast_fp16")]; + tensor var_3453_pad_type_0 = const()[name = tensor("op_3453_pad_type_0"), val = tensor("valid")]; + tensor var_3453_strides_0 = const()[name = tensor("op_3453_strides_0"), val = tensor([1, 1])]; + tensor var_3453_pad_0 = const()[name = tensor("op_3453_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3453_dilations_0 = const()[name = tensor("op_3453_dilations_0"), val = tensor([1, 1])]; + tensor var_3453_groups_0 = const()[name = tensor("op_3453_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144395072))), name = tensor("layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144385664))), shape = tensor([768, 768, 1, 1])]; + tensor var_3453_cast_fp16 = conv(dilations = var_3453_dilations_0, groups = var_3453_groups_0, pad = var_3453_pad_0, pad_type = var_3453_pad_type_0, strides = var_3453_strides_0, weight = layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_103_cast_fp16)[name = tensor("op_3453_cast_fp16")]; + tensor obj_151_cast_fp16 = add(x = var_3447_cast_fp16, y = var_3453_cast_fp16)[name = tensor("obj_151_cast_fp16")]; + tensor inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = obj_151_cast_fp16)[name = tensor("inputs_65_cast_fp16")]; + tensor out_65_axes_0 = const()[name = tensor("out_65_axes_0"), val = tensor([1])]; + tensor var_3467_to_fp16 = const()[name = tensor("op_3467_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_65_cast_fp16 = layer_norm(axes = out_65_axes_0, epsilon = var_3467_to_fp16, x = inputs_65_cast_fp16)[name = tensor("out_65_cast_fp16")]; + tensor input_105_gamma_0_to_fp16 = const()[name = tensor("input_105_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144468864)))]; + tensor input_105_beta_0_to_fp16 = const()[name = tensor("input_105_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144470464)))]; + tensor input_105_epsilon_0_to_fp16 = const()[name = tensor("input_105_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_105_cast_fp16 = batch_norm(beta = input_105_beta_0_to_fp16, epsilon = input_105_epsilon_0_to_fp16, gamma = input_105_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_65_cast_fp16)[name = tensor("input_105_cast_fp16")]; + tensor var_3485_pad_type_0 = const()[name = tensor("op_3485_pad_type_0"), val = tensor("valid")]; + tensor var_3485_strides_0 = const()[name = tensor("op_3485_strides_0"), val = tensor([1, 1])]; + tensor var_3485_pad_0 = const()[name = tensor("op_3485_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3485_dilations_0 = const()[name = tensor("op_3485_dilations_0"), val = tensor([1, 1])]; + tensor var_3485_groups_0 = const()[name = tensor("op_3485_groups_0"), val = tensor(1)]; + tensor layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144472064))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145651776))), name = tensor("layers_10_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145651904)))]; + tensor var_3485_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_3485_dilations_0, groups = var_3485_groups_0, pad = var_3485_pad_0, pad_type = var_3485_pad_type_0, strides = var_3485_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_105_cast_fp16)[name = tensor("op_3485_cast_fp16")]; + tensor var_3491_pad_type_0 = const()[name = tensor("op_3491_pad_type_0"), val = tensor("valid")]; + tensor var_3491_strides_0 = const()[name = tensor("op_3491_strides_0"), val = tensor([1, 1])]; + tensor var_3491_pad_0 = const()[name = tensor("op_3491_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3491_dilations_0 = const()[name = tensor("op_3491_dilations_0"), val = tensor([1, 1])]; + tensor var_3491_groups_0 = const()[name = tensor("op_3491_groups_0"), val = tensor(1)]; + tensor layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145679616))), name = tensor("layers_10_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145658112))), shape = tensor([3072, 768, 1, 1])]; + tensor var_3491_cast_fp16 = conv(dilations = var_3491_dilations_0, groups = var_3491_groups_0, pad = var_3491_pad_0, pad_type = var_3491_pad_type_0, strides = var_3491_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_105_cast_fp16)[name = tensor("op_3491_cast_fp16")]; + tensor input_107_cast_fp16 = add(x = var_3485_cast_fp16, y = var_3491_cast_fp16)[name = tensor("input_107_cast_fp16")]; + tensor input_109_mode_0 = const()[name = tensor("input_109_mode_0"), val = tensor("EXACT")]; + tensor input_109_cast_fp16 = gelu(mode = input_109_mode_0, x = input_107_cast_fp16)[name = tensor("input_109_cast_fp16")]; + tensor var_3502_pad_type_0 = const()[name = tensor("op_3502_pad_type_0"), val = tensor("valid")]; + tensor var_3502_strides_0 = const()[name = tensor("op_3502_strides_0"), val = tensor([1, 1])]; + tensor var_3502_pad_0 = const()[name = tensor("op_3502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3502_dilations_0 = const()[name = tensor("op_3502_dilations_0"), val = tensor([1, 1])]; + tensor var_3502_groups_0 = const()[name = tensor("op_3502_groups_0"), val = tensor(1)]; + tensor layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145974592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147154304))), name = tensor("layers_10_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147154432)))]; + tensor var_3502_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_3502_dilations_0, groups = var_3502_groups_0, pad = var_3502_pad_0, pad_type = var_3502_pad_type_0, strides = var_3502_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_109_cast_fp16)[name = tensor("op_3502_cast_fp16")]; + tensor var_3508_pad_type_0 = const()[name = tensor("op_3508_pad_type_0"), val = tensor("valid")]; + tensor var_3508_strides_0 = const()[name = tensor("op_3508_strides_0"), val = tensor([1, 1])]; + tensor var_3508_pad_0 = const()[name = tensor("op_3508_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3508_dilations_0 = const()[name = tensor("op_3508_dilations_0"), val = tensor([1, 1])]; + tensor var_3508_groups_0 = const()[name = tensor("op_3508_groups_0"), val = tensor(1)]; + tensor layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147186048))), name = tensor("layers_10_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147156032))), shape = tensor([768, 3072, 1, 1])]; + tensor var_3508_cast_fp16 = conv(dilations = var_3508_dilations_0, groups = var_3508_groups_0, pad = var_3508_pad_0, pad_type = var_3508_pad_type_0, strides = var_3508_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_109_cast_fp16)[name = tensor("op_3508_cast_fp16")]; + tensor hidden_states_23_cast_fp16 = add(x = var_3502_cast_fp16, y = var_3508_cast_fp16)[name = tensor("hidden_states_23_cast_fp16")]; + tensor inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor("inputs_67_cast_fp16")]; + tensor var_3521 = const()[name = tensor("op_3521"), val = tensor(3)]; + tensor out_67_axes_0 = const()[name = tensor("out_67_axes_0"), val = tensor([1])]; + tensor var_3546_to_fp16 = const()[name = tensor("op_3546_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_3546_to_fp16, x = inputs_67_cast_fp16)[name = tensor("out_67_cast_fp16")]; + tensor obj_155_gamma_0_to_fp16 = const()[name = tensor("obj_155_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147481024)))]; + tensor obj_155_beta_0_to_fp16 = const()[name = tensor("obj_155_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147482624)))]; + tensor obj_155_epsilon_0_to_fp16 = const()[name = tensor("obj_155_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_155_cast_fp16 = batch_norm(beta = obj_155_beta_0_to_fp16, epsilon = obj_155_epsilon_0_to_fp16, gamma = obj_155_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_67_cast_fp16)[name = tensor("obj_155_cast_fp16")]; + tensor var_3568_pad_type_0 = const()[name = tensor("op_3568_pad_type_0"), val = tensor("valid")]; + tensor var_3568_strides_0 = const()[name = tensor("op_3568_strides_0"), val = tensor([1, 1])]; + tensor var_3568_pad_0 = const()[name = tensor("op_3568_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3568_dilations_0 = const()[name = tensor("op_3568_dilations_0"), val = tensor([1, 1])]; + tensor var_3568_groups_0 = const()[name = tensor("op_3568_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147484224))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147779200))), name = tensor("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147779328)))]; + tensor var_3568_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3568_dilations_0, groups = var_3568_groups_0, pad = var_3568_pad_0, pad_type = var_3568_pad_type_0, strides = var_3568_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_155_cast_fp16)[name = tensor("op_3568_cast_fp16")]; + tensor var_3574_pad_type_0 = const()[name = tensor("op_3574_pad_type_0"), val = tensor("valid")]; + tensor var_3574_strides_0 = const()[name = tensor("op_3574_strides_0"), val = tensor([1, 1])]; + tensor var_3574_pad_0 = const()[name = tensor("op_3574_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3574_dilations_0 = const()[name = tensor("op_3574_dilations_0"), val = tensor([1, 1])]; + tensor var_3574_groups_0 = const()[name = tensor("op_3574_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147786112))), name = tensor("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147780928))), shape = tensor([768, 768, 1, 1])]; + tensor var_3574_cast_fp16 = conv(dilations = var_3574_dilations_0, groups = var_3574_groups_0, pad = var_3574_pad_0, pad_type = var_3574_pad_type_0, strides = var_3574_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_155_cast_fp16)[name = tensor("op_3574_cast_fp16")]; + tensor query_45_cast_fp16 = add(x = var_3568_cast_fp16, y = var_3574_cast_fp16)[name = tensor("query_45_cast_fp16")]; + tensor var_3583_pad_type_0 = const()[name = tensor("op_3583_pad_type_0"), val = tensor("valid")]; + tensor var_3583_strides_0 = const()[name = tensor("op_3583_strides_0"), val = tensor([1, 1])]; + tensor var_3583_pad_0 = const()[name = tensor("op_3583_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3583_dilations_0 = const()[name = tensor("op_3583_dilations_0"), val = tensor([1, 1])]; + tensor var_3583_groups_0 = const()[name = tensor("op_3583_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147859904))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148154880))), name = tensor("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_3583_cast_fp16 = conv(dilations = var_3583_dilations_0, groups = var_3583_groups_0, pad = var_3583_pad_0, pad_type = var_3583_pad_type_0, strides = var_3583_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_155_cast_fp16)[name = tensor("op_3583_cast_fp16")]; + tensor var_3589_pad_type_0 = const()[name = tensor("op_3589_pad_type_0"), val = tensor("valid")]; + tensor var_3589_strides_0 = const()[name = tensor("op_3589_strides_0"), val = tensor([1, 1])]; + tensor var_3589_pad_0 = const()[name = tensor("op_3589_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3589_dilations_0 = const()[name = tensor("op_3589_dilations_0"), val = tensor([1, 1])]; + tensor var_3589_groups_0 = const()[name = tensor("op_3589_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148160192))), name = tensor("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148155008))), shape = tensor([768, 768, 1, 1])]; + tensor var_3589_cast_fp16 = conv(dilations = var_3589_dilations_0, groups = var_3589_groups_0, pad = var_3589_pad_0, pad_type = var_3589_pad_type_0, strides = var_3589_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_155_cast_fp16)[name = tensor("op_3589_cast_fp16")]; + tensor current_key_cast_fp16 = add(x = var_3583_cast_fp16, y = var_3589_cast_fp16)[name = tensor("current_key_cast_fp16")]; + tensor var_3599_pad_type_0 = const()[name = tensor("op_3599_pad_type_0"), val = tensor("valid")]; + tensor var_3599_strides_0 = const()[name = tensor("op_3599_strides_0"), val = tensor([1, 1])]; + tensor var_3599_pad_0 = const()[name = tensor("op_3599_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3599_dilations_0 = const()[name = tensor("op_3599_dilations_0"), val = tensor([1, 1])]; + tensor var_3599_groups_0 = const()[name = tensor("op_3599_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148233984))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148528960))), name = tensor("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148529088)))]; + tensor var_3599_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3599_dilations_0, groups = var_3599_groups_0, pad = var_3599_pad_0, pad_type = var_3599_pad_type_0, strides = var_3599_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_155_cast_fp16)[name = tensor("op_3599_cast_fp16")]; + tensor var_3605_pad_type_0 = const()[name = tensor("op_3605_pad_type_0"), val = tensor("valid")]; + tensor var_3605_strides_0 = const()[name = tensor("op_3605_strides_0"), val = tensor([1, 1])]; + tensor var_3605_pad_0 = const()[name = tensor("op_3605_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3605_dilations_0 = const()[name = tensor("op_3605_dilations_0"), val = tensor([1, 1])]; + tensor var_3605_groups_0 = const()[name = tensor("op_3605_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148536448))), name = tensor("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148530688))), shape = tensor([768, 768, 1, 1])]; + tensor var_3605_cast_fp16 = conv(dilations = var_3605_dilations_0, groups = var_3605_groups_0, pad = var_3605_pad_0, pad_type = var_3605_pad_type_0, strides = var_3605_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_155_cast_fp16)[name = tensor("op_3605_cast_fp16")]; + tensor current_value_cast_fp16 = add(x = var_3599_cast_fp16, y = var_3605_cast_fp16)[name = tensor("current_value_cast_fp16")]; + tensor var_3612_cast_fp16 = mul(x = var_69_cast_fp16_11, y = var_192_cast_fp16)[name = tensor("op_3612_cast_fp16")]; + tensor var_3613_cast_fp16 = mul(x = current_key_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_3613_cast_fp16")]; + tensor key_45_cast_fp16 = add(x = var_3612_cast_fp16, y = var_3613_cast_fp16)[name = tensor("key_45_cast_fp16")]; + tensor var_3616_cast_fp16 = mul(x = var_84_cast_fp16_11, y = var_192_cast_fp16)[name = tensor("op_3616_cast_fp16")]; + tensor var_3617_cast_fp16 = mul(x = current_value_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_3617_cast_fp16")]; + tensor value_45_cast_fp16 = add(x = var_3616_cast_fp16, y = var_3617_cast_fp16)[name = tensor("value_45_cast_fp16")]; + tensor var_3621 = const()[name = tensor("op_3621"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_45_cast_fp16 = reshape(shape = var_3621, x = query_45_cast_fp16)[name = tensor("mh_q_45_cast_fp16")]; + tensor var_3623_to_fp16 = const()[name = tensor("op_3623_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3624_cast_fp16 = mul(x = mh_q_45_cast_fp16, y = var_3623_to_fp16)[name = tensor("op_3624_cast_fp16")]; + tensor var_3627 = const()[name = tensor("op_3627"), val = tensor([1, 12, 64, 448])]; + tensor var_3628_cast_fp16 = reshape(shape = var_3627, x = key_45_cast_fp16)[name = tensor("op_3628_cast_fp16")]; + tensor mh_w_67_transpose_x_0 = const()[name = tensor("mh_w_67_transpose_x_0"), val = tensor(true)]; + tensor mh_w_67_transpose_y_0 = const()[name = tensor("mh_w_67_transpose_y_0"), val = tensor(false)]; + tensor mh_w_67_cast_fp16 = matmul(transpose_x = mh_w_67_transpose_x_0, transpose_y = mh_w_67_transpose_y_0, x = var_3624_cast_fp16, y = var_3628_cast_fp16)[name = tensor("mh_w_67_cast_fp16")]; + tensor mh_w_69_cast_fp16 = add(x = mh_w_67_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_69_cast_fp16")]; + tensor var_3636_cast_fp16 = softmax(axis = var_3521, x = mh_w_69_cast_fp16)[name = tensor("op_3636_cast_fp16")]; + tensor var_3637 = const()[name = tensor("op_3637"), val = tensor([1, 12, 64, 448])]; + tensor var_3638_cast_fp16 = reshape(shape = var_3637, x = value_45_cast_fp16)[name = tensor("op_3638_cast_fp16")]; + tensor attn_45_transpose_x_0 = const()[name = tensor("attn_45_transpose_x_0"), val = tensor(false)]; + tensor attn_45_transpose_y_0 = const()[name = tensor("attn_45_transpose_y_0"), val = tensor(true)]; + tensor attn_45_cast_fp16 = matmul(transpose_x = attn_45_transpose_x_0, transpose_y = attn_45_transpose_y_0, x = var_3638_cast_fp16, y = var_3636_cast_fp16)[name = tensor("attn_45_cast_fp16")]; + tensor var_3641 = const()[name = tensor("op_3641"), val = tensor([1, 768, 1, 1])]; + tensor input_111_cast_fp16 = reshape(shape = var_3641, x = attn_45_cast_fp16)[name = tensor("input_111_cast_fp16")]; + tensor var_3651_pad_type_0 = const()[name = tensor("op_3651_pad_type_0"), val = tensor("valid")]; + tensor var_3651_strides_0 = const()[name = tensor("op_3651_strides_0"), val = tensor([1, 1])]; + tensor var_3651_pad_0 = const()[name = tensor("op_3651_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3651_dilations_0 = const()[name = tensor("op_3651_dilations_0"), val = tensor([1, 1])]; + tensor var_3651_groups_0 = const()[name = tensor("op_3651_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148610240))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148905216))), name = tensor("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148905344)))]; + tensor var_3651_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3651_dilations_0, groups = var_3651_groups_0, pad = var_3651_pad_0, pad_type = var_3651_pad_type_0, strides = var_3651_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = tensor("op_3651_cast_fp16")]; + tensor var_3657_pad_type_0 = const()[name = tensor("op_3657_pad_type_0"), val = tensor("valid")]; + tensor var_3657_strides_0 = const()[name = tensor("op_3657_strides_0"), val = tensor([1, 1])]; + tensor var_3657_pad_0 = const()[name = tensor("op_3657_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3657_dilations_0 = const()[name = tensor("op_3657_dilations_0"), val = tensor([1, 1])]; + tensor var_3657_groups_0 = const()[name = tensor("op_3657_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148916096))), name = tensor("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148906944))), shape = tensor([768, 768, 1, 1])]; + tensor var_3657_cast_fp16 = conv(dilations = var_3657_dilations_0, groups = var_3657_groups_0, pad = var_3657_pad_0, pad_type = var_3657_pad_type_0, strides = var_3657_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_111_cast_fp16)[name = tensor("op_3657_cast_fp16")]; + tensor obj_161_cast_fp16 = add(x = var_3651_cast_fp16, y = var_3657_cast_fp16)[name = tensor("obj_161_cast_fp16")]; + tensor inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = obj_161_cast_fp16)[name = tensor("inputs_69_cast_fp16")]; + tensor out_69_axes_0 = const()[name = tensor("out_69_axes_0"), val = tensor([1])]; + tensor var_3672_to_fp16 = const()[name = tensor("op_3672_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_69_cast_fp16 = layer_norm(axes = out_69_axes_0, epsilon = var_3672_to_fp16, x = inputs_69_cast_fp16)[name = tensor("out_69_cast_fp16")]; + tensor obj_163_gamma_0_to_fp16 = const()[name = tensor("obj_163_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148989888)))]; + tensor obj_163_beta_0_to_fp16 = const()[name = tensor("obj_163_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148991488)))]; + tensor obj_163_epsilon_0_to_fp16 = const()[name = tensor("obj_163_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_163_cast_fp16 = batch_norm(beta = obj_163_beta_0_to_fp16, epsilon = obj_163_epsilon_0_to_fp16, gamma = obj_163_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_69_cast_fp16)[name = tensor("obj_163_cast_fp16")]; + tensor var_3694_pad_type_0 = const()[name = tensor("op_3694_pad_type_0"), val = tensor("valid")]; + tensor var_3694_strides_0 = const()[name = tensor("op_3694_strides_0"), val = tensor([1, 1])]; + tensor var_3694_pad_0 = const()[name = tensor("op_3694_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3694_dilations_0 = const()[name = tensor("op_3694_dilations_0"), val = tensor([1, 1])]; + tensor var_3694_groups_0 = const()[name = tensor("op_3694_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148993088))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149288064))), name = tensor("layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149288192)))]; + tensor var_3694_cast_fp16 = conv(bias = layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3694_dilations_0, groups = var_3694_groups_0, pad = var_3694_pad_0, pad_type = var_3694_pad_type_0, strides = var_3694_strides_0, weight = layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_163_cast_fp16)[name = tensor("op_3694_cast_fp16")]; + tensor var_3700_pad_type_0 = const()[name = tensor("op_3700_pad_type_0"), val = tensor("valid")]; + tensor var_3700_strides_0 = const()[name = tensor("op_3700_strides_0"), val = tensor([1, 1])]; + tensor var_3700_pad_0 = const()[name = tensor("op_3700_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3700_dilations_0 = const()[name = tensor("op_3700_dilations_0"), val = tensor([1, 1])]; + tensor var_3700_groups_0 = const()[name = tensor("op_3700_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149296768))), name = tensor("layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149289792))), shape = tensor([768, 768, 1, 1])]; + tensor var_3700_cast_fp16 = conv(dilations = var_3700_dilations_0, groups = var_3700_groups_0, pad = var_3700_pad_0, pad_type = var_3700_pad_type_0, strides = var_3700_strides_0, weight = layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_163_cast_fp16)[name = tensor("op_3700_cast_fp16")]; + tensor query_cast_fp16 = add(x = var_3694_cast_fp16, y = var_3700_cast_fp16)[name = tensor("query_cast_fp16")]; + tensor var_3709_pad_type_0 = const()[name = tensor("op_3709_pad_type_0"), val = tensor("valid")]; + tensor var_3709_strides_0 = const()[name = tensor("op_3709_strides_0"), val = tensor([1, 1])]; + tensor var_3709_pad_0 = const()[name = tensor("op_3709_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3709_dilations_0 = const()[name = tensor("op_3709_dilations_0"), val = tensor([1, 1])]; + tensor var_3709_groups_0 = const()[name = tensor("op_3709_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149370560))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149665536))), name = tensor("layers_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_3709_cast_fp16 = conv(dilations = var_3709_dilations_0, groups = var_3709_groups_0, pad = var_3709_pad_0, pad_type = var_3709_pad_type_0, strides = var_3709_strides_0, weight = layers_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_3709_cast_fp16")]; + tensor var_3715_pad_type_0 = const()[name = tensor("op_3715_pad_type_0"), val = tensor("valid")]; + tensor var_3715_strides_0 = const()[name = tensor("op_3715_strides_0"), val = tensor([1, 1])]; + tensor var_3715_pad_0 = const()[name = tensor("op_3715_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3715_dilations_0 = const()[name = tensor("op_3715_dilations_0"), val = tensor([1, 1])]; + tensor var_3715_groups_0 = const()[name = tensor("op_3715_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149673920))), name = tensor("layers_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149665664))), shape = tensor([768, 768, 1, 1])]; + tensor var_3715_cast_fp16 = conv(dilations = var_3715_dilations_0, groups = var_3715_groups_0, pad = var_3715_pad_0, pad_type = var_3715_pad_type_0, strides = var_3715_strides_0, weight = layers_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_3715_cast_fp16")]; + tensor key_cast_fp16 = add(x = var_3709_cast_fp16, y = var_3715_cast_fp16)[name = tensor("key_cast_fp16")]; + tensor var_3725_pad_type_0 = const()[name = tensor("op_3725_pad_type_0"), val = tensor("valid")]; + tensor var_3725_strides_0 = const()[name = tensor("op_3725_strides_0"), val = tensor([1, 1])]; + tensor var_3725_pad_0 = const()[name = tensor("op_3725_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3725_dilations_0 = const()[name = tensor("op_3725_dilations_0"), val = tensor([1, 1])]; + tensor var_3725_groups_0 = const()[name = tensor("op_3725_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149747712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150042688))), name = tensor("layers_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150042816)))]; + tensor var_3725_cast_fp16 = conv(bias = layers_11_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3725_dilations_0, groups = var_3725_groups_0, pad = var_3725_pad_0, pad_type = var_3725_pad_type_0, strides = var_3725_strides_0, weight = layers_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_3725_cast_fp16")]; + tensor var_3731_pad_type_0 = const()[name = tensor("op_3731_pad_type_0"), val = tensor("valid")]; + tensor var_3731_strides_0 = const()[name = tensor("op_3731_strides_0"), val = tensor([1, 1])]; + tensor var_3731_pad_0 = const()[name = tensor("op_3731_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3731_dilations_0 = const()[name = tensor("op_3731_dilations_0"), val = tensor([1, 1])]; + tensor var_3731_groups_0 = const()[name = tensor("op_3731_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150053248))), name = tensor("layers_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150044416))), shape = tensor([768, 768, 1, 1])]; + tensor var_3731_cast_fp16 = conv(dilations = var_3731_dilations_0, groups = var_3731_groups_0, pad = var_3731_pad_0, pad_type = var_3731_pad_type_0, strides = var_3731_strides_0, weight = layers_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_3731_cast_fp16")]; + tensor value_cast_fp16 = add(x = var_3725_cast_fp16, y = var_3731_cast_fp16)[name = tensor("value_cast_fp16")]; + tensor var_3735 = const()[name = tensor("op_3735"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_cast_fp16 = reshape(shape = var_3735, x = query_cast_fp16)[name = tensor("mh_q_cast_fp16")]; + tensor var_3737_to_fp16 = const()[name = tensor("op_3737_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3738_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_3737_to_fp16)[name = tensor("op_3738_cast_fp16")]; + tensor var_3741 = const()[name = tensor("op_3741"), val = tensor([1, 12, 64, 1500])]; + tensor var_3742_cast_fp16 = reshape(shape = var_3741, x = key_cast_fp16)[name = tensor("op_3742_cast_fp16")]; + tensor mh_w_transpose_x_0 = const()[name = tensor("mh_w_transpose_x_0"), val = tensor(true)]; + tensor mh_w_transpose_y_0 = const()[name = tensor("mh_w_transpose_y_0"), val = tensor(false)]; + tensor mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_3738_cast_fp16, y = var_3742_cast_fp16)[name = tensor("mh_w_cast_fp16")]; + tensor obj_167_cast_fp16 = softmax(axis = var_3521, x = mh_w_cast_fp16)[name = tensor("obj_167_cast_fp16")]; + tensor var_3746 = const()[name = tensor("op_3746"), val = tensor([1, 12, 64, 1500])]; + tensor var_3747_cast_fp16 = reshape(shape = var_3746, x = value_cast_fp16)[name = tensor("op_3747_cast_fp16")]; + tensor attn_transpose_x_0 = const()[name = tensor("attn_transpose_x_0"), val = tensor(false)]; + tensor attn_transpose_y_0 = const()[name = tensor("attn_transpose_y_0"), val = tensor(true)]; + tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_3747_cast_fp16, y = obj_167_cast_fp16)[name = tensor("attn_cast_fp16")]; + tensor var_3750 = const()[name = tensor("op_3750"), val = tensor([1, 768, 1, 1])]; + tensor input_113_cast_fp16 = reshape(shape = var_3750, x = attn_cast_fp16)[name = tensor("input_113_cast_fp16")]; + tensor var_3760_pad_type_0 = const()[name = tensor("op_3760_pad_type_0"), val = tensor("valid")]; + tensor var_3760_strides_0 = const()[name = tensor("op_3760_strides_0"), val = tensor([1, 1])]; + tensor var_3760_pad_0 = const()[name = tensor("op_3760_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3760_dilations_0 = const()[name = tensor("op_3760_dilations_0"), val = tensor([1, 1])]; + tensor var_3760_groups_0 = const()[name = tensor("op_3760_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150127040))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150422016))), name = tensor("layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150422144)))]; + tensor var_3760_cast_fp16 = conv(bias = layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3760_dilations_0, groups = var_3760_groups_0, pad = var_3760_pad_0, pad_type = var_3760_pad_type_0, strides = var_3760_strides_0, weight = layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = tensor("op_3760_cast_fp16")]; + tensor var_3766_pad_type_0 = const()[name = tensor("op_3766_pad_type_0"), val = tensor("valid")]; + tensor var_3766_strides_0 = const()[name = tensor("op_3766_strides_0"), val = tensor([1, 1])]; + tensor var_3766_pad_0 = const()[name = tensor("op_3766_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3766_dilations_0 = const()[name = tensor("op_3766_dilations_0"), val = tensor([1, 1])]; + tensor var_3766_groups_0 = const()[name = tensor("op_3766_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150437184))), name = tensor("layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150423744))), shape = tensor([768, 768, 1, 1])]; + tensor var_3766_cast_fp16 = conv(dilations = var_3766_dilations_0, groups = var_3766_groups_0, pad = var_3766_pad_0, pad_type = var_3766_pad_type_0, strides = var_3766_strides_0, weight = layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_113_cast_fp16)[name = tensor("op_3766_cast_fp16")]; + tensor obj_165_cast_fp16 = add(x = var_3760_cast_fp16, y = var_3766_cast_fp16)[name = tensor("obj_165_cast_fp16")]; + tensor inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = obj_165_cast_fp16)[name = tensor("inputs_71_cast_fp16")]; + tensor out_71_axes_0 = const()[name = tensor("out_71_axes_0"), val = tensor([1])]; + tensor var_3780_to_fp16 = const()[name = tensor("op_3780_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_71_cast_fp16 = layer_norm(axes = out_71_axes_0, epsilon = var_3780_to_fp16, x = inputs_71_cast_fp16)[name = tensor("out_71_cast_fp16")]; + tensor input_115_gamma_0_to_fp16 = const()[name = tensor("input_115_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150510976)))]; + tensor input_115_beta_0_to_fp16 = const()[name = tensor("input_115_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150512576)))]; + tensor input_115_epsilon_0_to_fp16 = const()[name = tensor("input_115_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_115_cast_fp16 = batch_norm(beta = input_115_beta_0_to_fp16, epsilon = input_115_epsilon_0_to_fp16, gamma = input_115_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_71_cast_fp16)[name = tensor("input_115_cast_fp16")]; + tensor var_3798_pad_type_0 = const()[name = tensor("op_3798_pad_type_0"), val = tensor("valid")]; + tensor var_3798_strides_0 = const()[name = tensor("op_3798_strides_0"), val = tensor([1, 1])]; + tensor var_3798_pad_0 = const()[name = tensor("op_3798_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3798_dilations_0 = const()[name = tensor("op_3798_dilations_0"), val = tensor([1, 1])]; + tensor var_3798_groups_0 = const()[name = tensor("op_3798_groups_0"), val = tensor(1)]; + tensor layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150514176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151693888))), name = tensor("layers_11_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151694016)))]; + tensor var_3798_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_3798_dilations_0, groups = var_3798_groups_0, pad = var_3798_pad_0, pad_type = var_3798_pad_type_0, strides = var_3798_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = tensor("op_3798_cast_fp16")]; + tensor var_3804_pad_type_0 = const()[name = tensor("op_3804_pad_type_0"), val = tensor("valid")]; + tensor var_3804_strides_0 = const()[name = tensor("op_3804_strides_0"), val = tensor([1, 1])]; + tensor var_3804_pad_0 = const()[name = tensor("op_3804_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3804_dilations_0 = const()[name = tensor("op_3804_dilations_0"), val = tensor([1, 1])]; + tensor var_3804_groups_0 = const()[name = tensor("op_3804_groups_0"), val = tensor(1)]; + tensor layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151725952))), name = tensor("layers_11_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151700224))), shape = tensor([3072, 768, 1, 1])]; + tensor var_3804_cast_fp16 = conv(dilations = var_3804_dilations_0, groups = var_3804_groups_0, pad = var_3804_pad_0, pad_type = var_3804_pad_type_0, strides = var_3804_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_115_cast_fp16)[name = tensor("op_3804_cast_fp16")]; + tensor input_117_cast_fp16 = add(x = var_3798_cast_fp16, y = var_3804_cast_fp16)[name = tensor("input_117_cast_fp16")]; + tensor input_mode_0 = const()[name = tensor("input_mode_0"), val = tensor("EXACT")]; + tensor input_cast_fp16 = gelu(mode = input_mode_0, x = input_117_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_3815_pad_type_0 = const()[name = tensor("op_3815_pad_type_0"), val = tensor("valid")]; + tensor var_3815_strides_0 = const()[name = tensor("op_3815_strides_0"), val = tensor([1, 1])]; + tensor var_3815_pad_0 = const()[name = tensor("op_3815_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3815_dilations_0 = const()[name = tensor("op_3815_dilations_0"), val = tensor([1, 1])]; + tensor var_3815_groups_0 = const()[name = tensor("op_3815_groups_0"), val = tensor(1)]; + tensor layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152020928))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153200640))), name = tensor("layers_11_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153200768)))]; + tensor var_3815_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_3815_dilations_0, groups = var_3815_groups_0, pad = var_3815_pad_0, pad_type = var_3815_pad_type_0, strides = var_3815_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_cast_fp16)[name = tensor("op_3815_cast_fp16")]; + tensor var_3821_pad_type_0 = const()[name = tensor("op_3821_pad_type_0"), val = tensor("valid")]; + tensor var_3821_strides_0 = const()[name = tensor("op_3821_strides_0"), val = tensor([1, 1])]; + tensor var_3821_pad_0 = const()[name = tensor("op_3821_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3821_dilations_0 = const()[name = tensor("op_3821_dilations_0"), val = tensor([1, 1])]; + tensor var_3821_groups_0 = const()[name = tensor("op_3821_groups_0"), val = tensor(1)]; + tensor layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153236288))), name = tensor("layers_11_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153202368))), shape = tensor([768, 3072, 1, 1])]; + tensor var_3821_cast_fp16 = conv(dilations = var_3821_dilations_0, groups = var_3821_groups_0, pad = var_3821_pad_0, pad_type = var_3821_pad_type_0, strides = var_3821_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_cast_fp16)[name = tensor("op_3821_cast_fp16")]; + tensor hidden_states_25_cast_fp16 = add(x = var_3815_cast_fp16, y = var_3821_cast_fp16)[name = tensor("hidden_states_25_cast_fp16")]; + tensor inputs_cast_fp16 = add(x = inputs_71_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor("inputs_cast_fp16")]; + tensor out_axes_0 = const()[name = tensor("out_axes_0"), val = tensor([1])]; + tensor var_3841_to_fp16 = const()[name = tensor("op_3841_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_3841_to_fp16, x = inputs_cast_fp16)[name = tensor("out_cast_fp16")]; + tensor hidden_states_gamma_0_to_fp16 = const()[name = tensor("hidden_states_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153531264)))]; + tensor hidden_states_beta_0_to_fp16 = const()[name = tensor("hidden_states_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153532864)))]; + tensor hidden_states_epsilon_0_to_fp16 = const()[name = tensor("hidden_states_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor("hidden_states_cast_fp16")]; + tensor var_3852_axes_0 = const()[name = tensor("op_3852_axes_0"), val = tensor([2])]; + tensor var_3852_cast_fp16 = squeeze(axes = var_3852_axes_0, x = hidden_states_cast_fp16)[name = tensor("op_3852_cast_fp16")]; + tensor var_3855_perm_0 = const()[name = tensor("op_3855_perm_0"), val = tensor([0, 2, 1])]; + tensor linear_0_bias_0_to_fp16 = const()[name = tensor("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153534464)))]; + tensor var_3855_cast_fp16 = transpose(perm = var_3855_perm_0, x = var_3852_cast_fp16)[name = tensor("transpose_0")]; + tensor logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = var_3855_cast_fp16)[name = tensor("linear_0_cast_fp16")]; + tensor var_3859 = const()[name = tensor("op_3859"), val = tensor(1)]; + tensor obj_171_interleave_0 = const()[name = tensor("obj_171_interleave_0"), val = tensor(false)]; + tensor key_cache_updates = concat(axis = var_3859, interleave = obj_171_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_7_cast_fp16, current_key_9_cast_fp16, current_key_11_cast_fp16, current_key_13_cast_fp16, current_key_15_cast_fp16, current_key_17_cast_fp16, current_key_19_cast_fp16, current_key_21_cast_fp16, current_key_cast_fp16))[name = tensor("obj_171_cast_fp16")]; + tensor var_3862 = const()[name = tensor("op_3862"), val = tensor(1)]; + tensor obj_173_interleave_0 = const()[name = tensor("obj_173_interleave_0"), val = tensor(false)]; + tensor value_cache_updates = concat(axis = var_3862, interleave = obj_173_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_7_cast_fp16, current_value_9_cast_fp16, current_value_11_cast_fp16, current_value_13_cast_fp16, current_value_15_cast_fp16, current_value_17_cast_fp16, current_value_19_cast_fp16, current_value_21_cast_fp16, current_value_cast_fp16))[name = tensor("obj_173_cast_fp16")]; + tensor var_3873_begin_0 = const()[name = tensor("op_3873_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_3873_end_0 = const()[name = tensor("op_3873_end_0"), val = tensor([1, 7, 1, 1500])]; + tensor var_3873_end_mask_0 = const()[name = tensor("op_3873_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3873_cast_fp16 = slice_by_index(begin = var_3873_begin_0, end = var_3873_end_0, end_mask = var_3873_end_mask_0, x = obj_97_cast_fp16)[name = tensor("op_3873_cast_fp16")]; + tensor var_3876_begin_0 = const()[name = tensor("op_3876_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3876_end_0 = const()[name = tensor("op_3876_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3876_end_mask_0 = const()[name = tensor("op_3876_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3876_squeeze_mask_0 = const()[name = tensor("op_3876_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3876_cast_fp16 = slice_by_index(begin = var_3876_begin_0, end = var_3876_end_0, end_mask = var_3876_end_mask_0, squeeze_mask = var_3876_squeeze_mask_0, x = var_3873_cast_fp16)[name = tensor("op_3876_cast_fp16")]; + tensor var_3891_begin_0 = const()[name = tensor("op_3891_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3891_end_0 = const()[name = tensor("op_3891_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3891_end_mask_0 = const()[name = tensor("op_3891_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3891_cast_fp16 = slice_by_index(begin = var_3891_begin_0, end = var_3891_end_0, end_mask = var_3891_end_mask_0, x = obj_111_cast_fp16)[name = tensor("op_3891_cast_fp16")]; + tensor var_3894_begin_0 = const()[name = tensor("op_3894_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3894_end_0 = const()[name = tensor("op_3894_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3894_end_mask_0 = const()[name = tensor("op_3894_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3894_squeeze_mask_0 = const()[name = tensor("op_3894_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3894_cast_fp16 = slice_by_index(begin = var_3894_begin_0, end = var_3894_end_0, end_mask = var_3894_end_mask_0, squeeze_mask = var_3894_squeeze_mask_0, x = var_3891_cast_fp16)[name = tensor("op_3894_cast_fp16")]; + tensor var_3909_begin_0 = const()[name = tensor("op_3909_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_3909_end_0 = const()[name = tensor("op_3909_end_0"), val = tensor([1, 4, 1, 1500])]; + tensor var_3909_end_mask_0 = const()[name = tensor("op_3909_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3909_cast_fp16 = slice_by_index(begin = var_3909_begin_0, end = var_3909_end_0, end_mask = var_3909_end_mask_0, x = obj_111_cast_fp16)[name = tensor("op_3909_cast_fp16")]; + tensor var_3912_begin_0 = const()[name = tensor("op_3912_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3912_end_0 = const()[name = tensor("op_3912_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3912_end_mask_0 = const()[name = tensor("op_3912_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3912_squeeze_mask_0 = const()[name = tensor("op_3912_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3912_cast_fp16 = slice_by_index(begin = var_3912_begin_0, end = var_3912_end_0, end_mask = var_3912_end_mask_0, squeeze_mask = var_3912_squeeze_mask_0, x = var_3909_cast_fp16)[name = tensor("op_3912_cast_fp16")]; + tensor var_3927_begin_0 = const()[name = tensor("op_3927_begin_0"), val = tensor([0, 8, 0, 0])]; + tensor var_3927_end_0 = const()[name = tensor("op_3927_end_0"), val = tensor([1, 9, 1, 1500])]; + tensor var_3927_end_mask_0 = const()[name = tensor("op_3927_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3927_cast_fp16 = slice_by_index(begin = var_3927_begin_0, end = var_3927_end_0, end_mask = var_3927_end_mask_0, x = obj_111_cast_fp16)[name = tensor("op_3927_cast_fp16")]; + tensor var_3930_begin_0 = const()[name = tensor("op_3930_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3930_end_0 = const()[name = tensor("op_3930_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3930_end_mask_0 = const()[name = tensor("op_3930_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3930_squeeze_mask_0 = const()[name = tensor("op_3930_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3930_cast_fp16 = slice_by_index(begin = var_3930_begin_0, end = var_3930_end_0, end_mask = var_3930_end_mask_0, squeeze_mask = var_3930_squeeze_mask_0, x = var_3927_cast_fp16)[name = tensor("op_3930_cast_fp16")]; + tensor var_3945_begin_0 = const()[name = tensor("op_3945_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_3945_end_0 = const()[name = tensor("op_3945_end_0"), val = tensor([1, 3, 1, 1500])]; + tensor var_3945_end_mask_0 = const()[name = tensor("op_3945_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3945_cast_fp16 = slice_by_index(begin = var_3945_begin_0, end = var_3945_end_0, end_mask = var_3945_end_mask_0, x = obj_125_cast_fp16)[name = tensor("op_3945_cast_fp16")]; + tensor var_3948_begin_0 = const()[name = tensor("op_3948_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3948_end_0 = const()[name = tensor("op_3948_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3948_end_mask_0 = const()[name = tensor("op_3948_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3948_squeeze_mask_0 = const()[name = tensor("op_3948_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3948_cast_fp16 = slice_by_index(begin = var_3948_begin_0, end = var_3948_end_0, end_mask = var_3948_end_mask_0, squeeze_mask = var_3948_squeeze_mask_0, x = var_3945_cast_fp16)[name = tensor("op_3948_cast_fp16")]; + tensor var_3963_begin_0 = const()[name = tensor("op_3963_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_3963_end_0 = const()[name = tensor("op_3963_end_0"), val = tensor([1, 6, 1, 1500])]; + tensor var_3963_end_mask_0 = const()[name = tensor("op_3963_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3963_cast_fp16 = slice_by_index(begin = var_3963_begin_0, end = var_3963_end_0, end_mask = var_3963_end_mask_0, x = obj_125_cast_fp16)[name = tensor("op_3963_cast_fp16")]; + tensor var_3966_begin_0 = const()[name = tensor("op_3966_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3966_end_0 = const()[name = tensor("op_3966_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3966_end_mask_0 = const()[name = tensor("op_3966_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3966_squeeze_mask_0 = const()[name = tensor("op_3966_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3966_cast_fp16 = slice_by_index(begin = var_3966_begin_0, end = var_3966_end_0, end_mask = var_3966_end_mask_0, squeeze_mask = var_3966_squeeze_mask_0, x = var_3963_cast_fp16)[name = tensor("op_3966_cast_fp16")]; + tensor var_3981_begin_0 = const()[name = tensor("op_3981_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_3981_end_0 = const()[name = tensor("op_3981_end_0"), val = tensor([1, 8, 1, 1500])]; + tensor var_3981_end_mask_0 = const()[name = tensor("op_3981_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3981_cast_fp16 = slice_by_index(begin = var_3981_begin_0, end = var_3981_end_0, end_mask = var_3981_end_mask_0, x = obj_125_cast_fp16)[name = tensor("op_3981_cast_fp16")]; + tensor var_3984_begin_0 = const()[name = tensor("op_3984_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3984_end_0 = const()[name = tensor("op_3984_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3984_end_mask_0 = const()[name = tensor("op_3984_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3984_squeeze_mask_0 = const()[name = tensor("op_3984_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3984_cast_fp16 = slice_by_index(begin = var_3984_begin_0, end = var_3984_end_0, end_mask = var_3984_end_mask_0, squeeze_mask = var_3984_squeeze_mask_0, x = var_3981_cast_fp16)[name = tensor("op_3984_cast_fp16")]; + tensor var_3999_begin_0 = const()[name = tensor("op_3999_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3999_end_0 = const()[name = tensor("op_3999_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3999_end_mask_0 = const()[name = tensor("op_3999_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3999_cast_fp16 = slice_by_index(begin = var_3999_begin_0, end = var_3999_end_0, end_mask = var_3999_end_mask_0, x = obj_139_cast_fp16)[name = tensor("op_3999_cast_fp16")]; + tensor var_4002_begin_0 = const()[name = tensor("op_4002_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4002_end_0 = const()[name = tensor("op_4002_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4002_end_mask_0 = const()[name = tensor("op_4002_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4002_squeeze_mask_0 = const()[name = tensor("op_4002_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_4002_cast_fp16 = slice_by_index(begin = var_4002_begin_0, end = var_4002_end_0, end_mask = var_4002_end_mask_0, squeeze_mask = var_4002_squeeze_mask_0, x = var_3999_cast_fp16)[name = tensor("op_4002_cast_fp16")]; + tensor var_4017_begin_0 = const()[name = tensor("op_4017_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_4017_end_0 = const()[name = tensor("op_4017_end_0"), val = tensor([1, 5, 1, 1500])]; + tensor var_4017_end_mask_0 = const()[name = tensor("op_4017_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4017_cast_fp16 = slice_by_index(begin = var_4017_begin_0, end = var_4017_end_0, end_mask = var_4017_end_mask_0, x = obj_139_cast_fp16)[name = tensor("op_4017_cast_fp16")]; + tensor var_4020_begin_0 = const()[name = tensor("op_4020_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4020_end_0 = const()[name = tensor("op_4020_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4020_end_mask_0 = const()[name = tensor("op_4020_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4020_squeeze_mask_0 = const()[name = tensor("op_4020_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_4020_cast_fp16 = slice_by_index(begin = var_4020_begin_0, end = var_4020_end_0, end_mask = var_4020_end_mask_0, squeeze_mask = var_4020_squeeze_mask_0, x = var_4017_cast_fp16)[name = tensor("op_4020_cast_fp16")]; + tensor var_4035_begin_0 = const()[name = tensor("op_4035_begin_0"), val = tensor([0, 8, 0, 0])]; + tensor var_4035_end_0 = const()[name = tensor("op_4035_end_0"), val = tensor([1, 9, 1, 1500])]; + tensor var_4035_end_mask_0 = const()[name = tensor("op_4035_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4035_cast_fp16 = slice_by_index(begin = var_4035_begin_0, end = var_4035_end_0, end_mask = var_4035_end_mask_0, x = obj_139_cast_fp16)[name = tensor("op_4035_cast_fp16")]; + tensor var_4038_begin_0 = const()[name = tensor("op_4038_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4038_end_0 = const()[name = tensor("op_4038_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4038_end_mask_0 = const()[name = tensor("op_4038_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4038_squeeze_mask_0 = const()[name = tensor("op_4038_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_4038_cast_fp16 = slice_by_index(begin = var_4038_begin_0, end = var_4038_end_0, end_mask = var_4038_end_mask_0, squeeze_mask = var_4038_squeeze_mask_0, x = var_4035_cast_fp16)[name = tensor("op_4038_cast_fp16")]; + tensor var_4053_begin_0 = const()[name = tensor("op_4053_begin_0"), val = tensor([0, 10, 0, 0])]; + tensor var_4053_end_0 = const()[name = tensor("op_4053_end_0"), val = tensor([1, 11, 1, 1500])]; + tensor var_4053_end_mask_0 = const()[name = tensor("op_4053_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4053_cast_fp16 = slice_by_index(begin = var_4053_begin_0, end = var_4053_end_0, end_mask = var_4053_end_mask_0, x = obj_139_cast_fp16)[name = tensor("op_4053_cast_fp16")]; + tensor var_4056_begin_0 = const()[name = tensor("op_4056_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4056_end_0 = const()[name = tensor("op_4056_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4056_end_mask_0 = const()[name = tensor("op_4056_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4056_squeeze_mask_0 = const()[name = tensor("op_4056_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_4056_cast_fp16 = slice_by_index(begin = var_4056_begin_0, end = var_4056_end_0, end_mask = var_4056_end_mask_0, squeeze_mask = var_4056_squeeze_mask_0, x = var_4053_cast_fp16)[name = tensor("op_4056_cast_fp16")]; + tensor var_4071_begin_0 = const()[name = tensor("op_4071_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4071_end_0 = const()[name = tensor("op_4071_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4071_end_mask_0 = const()[name = tensor("op_4071_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4071_cast_fp16 = slice_by_index(begin = var_4071_begin_0, end = var_4071_end_0, end_mask = var_4071_end_mask_0, x = obj_153_cast_fp16)[name = tensor("op_4071_cast_fp16")]; + tensor var_4074_begin_0 = const()[name = tensor("op_4074_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4074_end_0 = const()[name = tensor("op_4074_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4074_end_mask_0 = const()[name = tensor("op_4074_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4074_squeeze_mask_0 = const()[name = tensor("op_4074_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_4074_cast_fp16 = slice_by_index(begin = var_4074_begin_0, end = var_4074_end_0, end_mask = var_4074_end_mask_0, squeeze_mask = var_4074_squeeze_mask_0, x = var_4071_cast_fp16)[name = tensor("op_4074_cast_fp16")]; + tensor var_4089_begin_0 = const()[name = tensor("op_4089_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_4089_end_0 = const()[name = tensor("op_4089_end_0"), val = tensor([1, 2, 1, 1500])]; + tensor var_4089_end_mask_0 = const()[name = tensor("op_4089_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4089_cast_fp16 = slice_by_index(begin = var_4089_begin_0, end = var_4089_end_0, end_mask = var_4089_end_mask_0, x = obj_153_cast_fp16)[name = tensor("op_4089_cast_fp16")]; + tensor var_4092_begin_0 = const()[name = tensor("op_4092_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4092_end_0 = const()[name = tensor("op_4092_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4092_end_mask_0 = const()[name = tensor("op_4092_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4092_squeeze_mask_0 = const()[name = tensor("op_4092_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_4092_cast_fp16 = slice_by_index(begin = var_4092_begin_0, end = var_4092_end_0, end_mask = var_4092_end_mask_0, squeeze_mask = var_4092_squeeze_mask_0, x = var_4089_cast_fp16)[name = tensor("op_4092_cast_fp16")]; + tensor var_4107_begin_0 = const()[name = tensor("op_4107_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_4107_end_0 = const()[name = tensor("op_4107_end_0"), val = tensor([1, 3, 1, 1500])]; + tensor var_4107_end_mask_0 = const()[name = tensor("op_4107_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4107_cast_fp16 = slice_by_index(begin = var_4107_begin_0, end = var_4107_end_0, end_mask = var_4107_end_mask_0, x = obj_153_cast_fp16)[name = tensor("op_4107_cast_fp16")]; + tensor var_4110_begin_0 = const()[name = tensor("op_4110_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4110_end_0 = const()[name = tensor("op_4110_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4110_end_mask_0 = const()[name = tensor("op_4110_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4110_squeeze_mask_0 = const()[name = tensor("op_4110_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_4110_cast_fp16 = slice_by_index(begin = var_4110_begin_0, end = var_4110_end_0, end_mask = var_4110_end_mask_0, squeeze_mask = var_4110_squeeze_mask_0, x = var_4107_cast_fp16)[name = tensor("op_4110_cast_fp16")]; + tensor var_4125_begin_0 = const()[name = tensor("op_4125_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_4125_end_0 = const()[name = tensor("op_4125_end_0"), val = tensor([1, 4, 1, 1500])]; + tensor var_4125_end_mask_0 = const()[name = tensor("op_4125_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4125_cast_fp16 = slice_by_index(begin = var_4125_begin_0, end = var_4125_end_0, end_mask = var_4125_end_mask_0, x = obj_153_cast_fp16)[name = tensor("op_4125_cast_fp16")]; + tensor var_4128_begin_0 = const()[name = tensor("op_4128_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4128_end_0 = const()[name = tensor("op_4128_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4128_end_mask_0 = const()[name = tensor("op_4128_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4128_squeeze_mask_0 = const()[name = tensor("op_4128_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_4128_cast_fp16 = slice_by_index(begin = var_4128_begin_0, end = var_4128_end_0, end_mask = var_4128_end_mask_0, squeeze_mask = var_4128_squeeze_mask_0, x = var_4125_cast_fp16)[name = tensor("op_4128_cast_fp16")]; + tensor var_4143_begin_0 = const()[name = tensor("op_4143_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_4143_end_0 = const()[name = tensor("op_4143_end_0"), val = tensor([1, 7, 1, 1500])]; + tensor var_4143_end_mask_0 = const()[name = tensor("op_4143_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4143_cast_fp16 = slice_by_index(begin = var_4143_begin_0, end = var_4143_end_0, end_mask = var_4143_end_mask_0, x = obj_153_cast_fp16)[name = tensor("op_4143_cast_fp16")]; + tensor var_4146_begin_0 = const()[name = tensor("op_4146_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4146_end_0 = const()[name = tensor("op_4146_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4146_end_mask_0 = const()[name = tensor("op_4146_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4146_squeeze_mask_0 = const()[name = tensor("op_4146_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_4146_cast_fp16 = slice_by_index(begin = var_4146_begin_0, end = var_4146_end_0, end_mask = var_4146_end_mask_0, squeeze_mask = var_4146_squeeze_mask_0, x = var_4143_cast_fp16)[name = tensor("op_4146_cast_fp16")]; + tensor var_4161_begin_0 = const()[name = tensor("op_4161_begin_0"), val = tensor([0, 11, 0, 0])]; + tensor var_4161_end_0 = const()[name = tensor("op_4161_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4161_end_mask_0 = const()[name = tensor("op_4161_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4161_cast_fp16 = slice_by_index(begin = var_4161_begin_0, end = var_4161_end_0, end_mask = var_4161_end_mask_0, x = obj_153_cast_fp16)[name = tensor("op_4161_cast_fp16")]; + tensor var_4164_begin_0 = const()[name = tensor("op_4164_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4164_end_0 = const()[name = tensor("op_4164_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4164_end_mask_0 = const()[name = tensor("op_4164_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4164_squeeze_mask_0 = const()[name = tensor("op_4164_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_4164_cast_fp16 = slice_by_index(begin = var_4164_begin_0, end = var_4164_end_0, end_mask = var_4164_end_mask_0, squeeze_mask = var_4164_squeeze_mask_0, x = var_4161_cast_fp16)[name = tensor("op_4164_cast_fp16")]; + tensor var_4179_begin_0 = const()[name = tensor("op_4179_begin_0"), val = tensor([0, 2, 0, 0])]; + tensor var_4179_end_0 = const()[name = tensor("op_4179_end_0"), val = tensor([1, 3, 1, 1500])]; + tensor var_4179_end_mask_0 = const()[name = tensor("op_4179_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4179_cast_fp16 = slice_by_index(begin = var_4179_begin_0, end = var_4179_end_0, end_mask = var_4179_end_mask_0, x = obj_167_cast_fp16)[name = tensor("op_4179_cast_fp16")]; + tensor var_4182_begin_0 = const()[name = tensor("op_4182_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4182_end_0 = const()[name = tensor("op_4182_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4182_end_mask_0 = const()[name = tensor("op_4182_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4182_squeeze_mask_0 = const()[name = tensor("op_4182_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_4182_cast_fp16 = slice_by_index(begin = var_4182_begin_0, end = var_4182_end_0, end_mask = var_4182_end_mask_0, squeeze_mask = var_4182_squeeze_mask_0, x = var_4179_cast_fp16)[name = tensor("op_4182_cast_fp16")]; + tensor var_4197_begin_0 = const()[name = tensor("op_4197_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_4197_end_0 = const()[name = tensor("op_4197_end_0"), val = tensor([1, 5, 1, 1500])]; + tensor var_4197_end_mask_0 = const()[name = tensor("op_4197_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4197_cast_fp16 = slice_by_index(begin = var_4197_begin_0, end = var_4197_end_0, end_mask = var_4197_end_mask_0, x = obj_167_cast_fp16)[name = tensor("op_4197_cast_fp16")]; + tensor var_4200_begin_0 = const()[name = tensor("op_4200_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4200_end_0 = const()[name = tensor("op_4200_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4200_end_mask_0 = const()[name = tensor("op_4200_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4200_squeeze_mask_0 = const()[name = tensor("op_4200_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_4200_cast_fp16 = slice_by_index(begin = var_4200_begin_0, end = var_4200_end_0, end_mask = var_4200_end_mask_0, squeeze_mask = var_4200_squeeze_mask_0, x = var_4197_cast_fp16)[name = tensor("op_4200_cast_fp16")]; + tensor var_4207 = const()[name = tensor("op_4207"), val = tensor(1)]; + tensor var_4208_interleave_0 = const()[name = tensor("op_4208_interleave_0"), val = tensor(false)]; + tensor var_4208_cast_fp16 = concat(axis = var_4207, interleave = var_4208_interleave_0, values = (var_3876_cast_fp16, var_3894_cast_fp16, var_3912_cast_fp16, var_3930_cast_fp16, var_3948_cast_fp16, var_3966_cast_fp16, var_3984_cast_fp16, var_4002_cast_fp16, var_4020_cast_fp16, var_4038_cast_fp16, var_4056_cast_fp16, var_4074_cast_fp16, var_4092_cast_fp16, var_4110_cast_fp16, var_4128_cast_fp16, var_4146_cast_fp16, var_4164_cast_fp16, var_4182_cast_fp16, var_4200_cast_fp16))[name = tensor("op_4208_cast_fp16")]; + tensor obj_axes_0 = const()[name = tensor("obj_axes_0"), val = tensor([1])]; + tensor obj_keep_dims_0 = const()[name = tensor("obj_keep_dims_0"), val = tensor(false)]; + tensor alignment_heads_weights = reduce_mean(axes = obj_axes_0, keep_dims = obj_keep_dims_0, x = var_4208_cast_fp16)[name = tensor("obj_cast_fp16")]; + } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights); +} \ No newline at end of file