diff --git "a/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/model.mil" "b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/model.mil" @@ -0,0 +1,1625 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3404.16.1"}, {"coremlc-version", "3404.23.1"}})] +{ + func main(tensor melspectrogram_features) { + tensor var_76_pad_type_0 = const()[name = tensor("op_76_pad_type_0"), val = tensor("custom")]; + tensor var_76_pad_0 = const()[name = tensor("op_76_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_76_strides_0 = const()[name = tensor("op_76_strides_0"), val = tensor([1, 1])]; + tensor var_76_dilations_0 = const()[name = tensor("op_76_dilations_0"), val = tensor([1, 1])]; + tensor var_76_groups_0 = const()[name = tensor("op_76_groups_0"), val = tensor(1)]; + tensor var_45_to_fp16 = const()[name = tensor("op_45_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor var_57_to_fp16 = const()[name = tensor("op_57_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(368768)))]; + tensor var_76_cast_fp16 = conv(bias = var_57_to_fp16, dilations = var_76_dilations_0, groups = var_76_groups_0, pad = var_76_pad_0, pad_type = var_76_pad_type_0, strides = var_76_strides_0, weight = var_45_to_fp16, x = melspectrogram_features)[name = tensor("op_76_cast_fp16")]; + tensor var_114_pad_type_0 = const()[name = tensor("op_114_pad_type_0"), val = tensor("custom")]; + tensor var_114_pad_0 = const()[name = tensor("op_114_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_114_strides_0 = const()[name = tensor("op_114_strides_0"), val = tensor([1, 1])]; + tensor var_114_dilations_0 = const()[name = tensor("op_114_dilations_0"), val = tensor([1, 1])]; + tensor var_114_groups_0 = const()[name = tensor("op_114_groups_0"), val = tensor(1)]; + tensor op_89_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(370368))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462592))), name = tensor("op_89_to_fp16_palettized"), shape = tensor([768, 80, 1, 3])]; + tensor var_95_to_fp16 = const()[name = tensor("op_95_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462720)))]; + tensor var_114_cast_fp16 = conv(bias = var_95_to_fp16, dilations = var_114_dilations_0, groups = var_114_groups_0, pad = var_114_pad_0, pad_type = var_114_pad_type_0, strides = var_114_strides_0, weight = op_89_to_fp16_palettized, x = melspectrogram_features)[name = tensor("op_114_cast_fp16")]; + tensor var_116_cast_fp16 = add(x = var_76_cast_fp16, y = var_114_cast_fp16)[name = tensor("op_116_cast_fp16")]; + tensor hidden_states_1_mode_0 = const()[name = tensor("hidden_states_1_mode_0"), val = tensor("EXACT")]; + tensor hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_116_cast_fp16)[name = tensor("hidden_states_1_cast_fp16")]; + tensor var_162_pad_type_0 = const()[name = tensor("op_162_pad_type_0"), val = tensor("custom")]; + tensor var_162_pad_0 = const()[name = tensor("op_162_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_162_strides_0 = const()[name = tensor("op_162_strides_0"), val = tensor([2, 2])]; + tensor var_162_dilations_0 = const()[name = tensor("op_162_dilations_0"), val = tensor([1, 1])]; + tensor var_162_groups_0 = const()[name = tensor("op_162_groups_0"), val = tensor(1)]; + tensor var_131_to_fp16 = const()[name = tensor("op_131_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(464320)))]; + tensor var_162_cast_fp16 = conv(bias = var_57_to_fp16, dilations = var_162_dilations_0, groups = var_162_groups_0, pad = var_162_pad_0, pad_type = var_162_pad_type_0, strides = var_162_strides_0, weight = var_131_to_fp16, x = hidden_states_1_cast_fp16)[name = tensor("op_162_cast_fp16")]; + tensor var_200_pad_type_0 = const()[name = tensor("op_200_pad_type_0"), val = tensor("custom")]; + tensor var_200_pad_0 = const()[name = tensor("op_200_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_200_strides_0 = const()[name = tensor("op_200_strides_0"), val = tensor([2, 2])]; + tensor var_200_dilations_0 = const()[name = tensor("op_200_dilations_0"), val = tensor([1, 1])]; + tensor var_200_groups_0 = const()[name = tensor("op_200_groups_0"), val = tensor(1)]; + tensor op_175_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4003328))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4888128))), name = tensor("op_175_to_fp16_palettized"), shape = tensor([768, 768, 1, 3])]; + tensor var_181_to_fp16 = const()[name = tensor("op_181_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4888256)))]; + tensor var_200_cast_fp16 = conv(bias = var_181_to_fp16, dilations = var_200_dilations_0, groups = var_200_groups_0, pad = var_200_pad_0, pad_type = var_200_pad_type_0, strides = var_200_strides_0, weight = op_175_to_fp16_palettized, x = hidden_states_1_cast_fp16)[name = tensor("op_200_cast_fp16")]; + tensor var_202_cast_fp16 = add(x = var_162_cast_fp16, y = var_200_cast_fp16)[name = tensor("op_202_cast_fp16")]; + tensor hidden_states_3_mode_0 = const()[name = tensor("hidden_states_3_mode_0"), val = tensor("EXACT")]; + tensor hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_202_cast_fp16)[name = tensor("hidden_states_3_cast_fp16")]; + tensor var_222_to_fp16 = const()[name = tensor("op_222_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4889856)))]; + tensor inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_222_to_fp16)[name = tensor("inputs_1_cast_fp16")]; + tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; + tensor out_1_axes_0 = const()[name = tensor("out_1_axes_0"), val = tensor([1])]; + tensor var_254_to_fp16 = const()[name = tensor("op_254_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_254_to_fp16, x = inputs_1_cast_fp16)[name = tensor("out_1_cast_fp16")]; + tensor obj_1_variance_0_to_fp16 = const()[name = tensor("obj_1_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7193920)))]; + tensor obj_1_gamma_0_to_fp16 = const()[name = tensor("obj_1_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7195520)))]; + tensor obj_1_beta_0_to_fp16 = const()[name = tensor("obj_1_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7197120)))]; + tensor obj_1_epsilon_0_to_fp16 = const()[name = tensor("obj_1_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor("obj_1_cast_fp16")]; + tensor var_276_pad_type_0 = const()[name = tensor("op_276_pad_type_0"), val = tensor("valid")]; + tensor var_276_strides_0 = const()[name = tensor("op_276_strides_0"), val = tensor([1, 1])]; + tensor var_276_pad_0 = const()[name = tensor("op_276_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_276_dilations_0 = const()[name = tensor("op_276_dilations_0"), val = tensor([1, 1])]; + tensor var_276_groups_0 = const()[name = tensor("op_276_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7198720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7493696))), name = tensor("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7493824)))]; + tensor var_276_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_276_dilations_0, groups = var_276_groups_0, pad = var_276_pad_0, pad_type = var_276_pad_type_0, strides = var_276_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_276_cast_fp16")]; + tensor var_282_pad_type_0 = const()[name = tensor("op_282_pad_type_0"), val = tensor("valid")]; + tensor var_282_strides_0 = const()[name = tensor("op_282_strides_0"), val = tensor([1, 1])]; + tensor var_282_pad_0 = const()[name = tensor("op_282_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_282_dilations_0 = const()[name = tensor("op_282_dilations_0"), val = tensor([1, 1])]; + tensor var_282_groups_0 = const()[name = tensor("op_282_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7518208))), name = tensor("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7495424))), shape = tensor([768, 768, 1, 1])]; + tensor var_282_cast_fp16 = conv(dilations = var_282_dilations_0, groups = var_282_groups_0, pad = var_282_pad_0, pad_type = var_282_pad_type_0, strides = var_282_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor query_1_cast_fp16 = add(x = var_276_cast_fp16, y = var_282_cast_fp16)[name = tensor("query_1_cast_fp16")]; + tensor var_291_pad_type_0 = const()[name = tensor("op_291_pad_type_0"), val = tensor("valid")]; + tensor var_291_strides_0 = const()[name = tensor("op_291_strides_0"), val = tensor([1, 1])]; + tensor var_291_pad_0 = const()[name = tensor("op_291_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_291_dilations_0 = const()[name = tensor("op_291_dilations_0"), val = tensor([1, 1])]; + tensor var_291_groups_0 = const()[name = tensor("op_291_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7592000))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7886976))), name = tensor("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_291_cast_fp16 = conv(dilations = var_291_dilations_0, groups = var_291_groups_0, pad = var_291_pad_0, pad_type = var_291_pad_type_0, strides = var_291_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_291_cast_fp16")]; + tensor var_297_pad_type_0 = const()[name = tensor("op_297_pad_type_0"), val = tensor("valid")]; + tensor var_297_strides_0 = const()[name = tensor("op_297_strides_0"), val = tensor([1, 1])]; + tensor var_297_pad_0 = const()[name = tensor("op_297_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_297_dilations_0 = const()[name = tensor("op_297_dilations_0"), val = tensor([1, 1])]; + tensor var_297_groups_0 = const()[name = tensor("op_297_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7908352))), name = tensor("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7887104))), shape = tensor([768, 768, 1, 1])]; + tensor var_297_cast_fp16 = conv(dilations = var_297_dilations_0, groups = var_297_groups_0, pad = var_297_pad_0, pad_type = var_297_pad_type_0, strides = var_297_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_297_cast_fp16")]; + tensor key_1_cast_fp16 = add(x = var_291_cast_fp16, y = var_297_cast_fp16)[name = tensor("key_1_cast_fp16")]; + tensor var_307_pad_type_0 = const()[name = tensor("op_307_pad_type_0"), val = tensor("valid")]; + tensor var_307_strides_0 = const()[name = tensor("op_307_strides_0"), val = tensor([1, 1])]; + tensor var_307_pad_0 = const()[name = tensor("op_307_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_307_dilations_0 = const()[name = tensor("op_307_dilations_0"), val = tensor([1, 1])]; + tensor var_307_groups_0 = const()[name = tensor("op_307_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7982144))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8277120))), name = tensor("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8277248)))]; + tensor var_307_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_307_dilations_0, groups = var_307_groups_0, pad = var_307_pad_0, pad_type = var_307_pad_type_0, strides = var_307_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_307_cast_fp16")]; + tensor var_313_pad_type_0 = const()[name = tensor("op_313_pad_type_0"), val = tensor("valid")]; + tensor var_313_strides_0 = const()[name = tensor("op_313_strides_0"), val = tensor([1, 1])]; + tensor var_313_pad_0 = const()[name = tensor("op_313_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_313_dilations_0 = const()[name = tensor("op_313_dilations_0"), val = tensor([1, 1])]; + tensor var_313_groups_0 = const()[name = tensor("op_313_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8302400))), name = tensor("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8278848))), shape = tensor([768, 768, 1, 1])]; + tensor var_313_cast_fp16 = conv(dilations = var_313_dilations_0, groups = var_313_groups_0, pad = var_313_pad_0, pad_type = var_313_pad_type_0, strides = var_313_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor value_1_cast_fp16 = add(x = var_307_cast_fp16, y = var_313_cast_fp16)[name = tensor("value_1_cast_fp16")]; + tensor var_317 = const()[name = tensor("op_317"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_1_cast_fp16 = reshape(shape = var_317, x = query_1_cast_fp16)[name = tensor("mh_q_1_cast_fp16")]; + tensor var_319_to_fp16 = const()[name = tensor("op_319_to_fp16"), val = tensor(0x1p-3)]; + tensor var_320_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_319_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor var_323 = const()[name = tensor("op_323"), val = tensor([1, 12, 64, 1500])]; + tensor var_324_cast_fp16 = reshape(shape = var_323, x = key_1_cast_fp16)[name = tensor("op_324_cast_fp16")]; + tensor mh_w_1_transpose_x_0 = const()[name = tensor("mh_w_1_transpose_x_0"), val = tensor(true)]; + tensor mh_w_1_transpose_y_0 = const()[name = tensor("mh_w_1_transpose_y_0"), val = tensor(false)]; + tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_320_cast_fp16, y = var_324_cast_fp16)[name = tensor("mh_w_1_cast_fp16")]; + tensor var_327_cast_fp16 = softmax(axis = var_232, x = mh_w_1_cast_fp16)[name = tensor("op_327_cast_fp16")]; + tensor var_328 = const()[name = tensor("op_328"), val = tensor([1, 12, 64, 1500])]; + tensor var_329_cast_fp16 = reshape(shape = var_328, x = value_1_cast_fp16)[name = tensor("op_329_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_329_cast_fp16, y = var_327_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_332 = const()[name = tensor("op_332"), val = tensor([1, 768, 1, 1500])]; + tensor input_1_cast_fp16 = reshape(shape = var_332, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_342_pad_type_0 = const()[name = tensor("op_342_pad_type_0"), val = tensor("valid")]; + tensor var_342_strides_0 = const()[name = tensor("op_342_strides_0"), val = tensor([1, 1])]; + tensor var_342_pad_0 = const()[name = tensor("op_342_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_342_dilations_0 = const()[name = tensor("op_342_dilations_0"), val = tensor([1, 1])]; + tensor var_342_groups_0 = const()[name = tensor("op_342_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8376192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8671168))), name = tensor("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8671296)))]; + tensor var_342_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_342_dilations_0, groups = var_342_groups_0, pad = var_342_pad_0, pad_type = var_342_pad_type_0, strides = var_342_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = tensor("op_342_cast_fp16")]; + tensor var_348_pad_type_0 = const()[name = tensor("op_348_pad_type_0"), val = tensor("valid")]; + tensor var_348_strides_0 = const()[name = tensor("op_348_strides_0"), val = tensor([1, 1])]; + tensor var_348_pad_0 = const()[name = tensor("op_348_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_348_dilations_0 = const()[name = tensor("op_348_dilations_0"), val = tensor([1, 1])]; + tensor var_348_groups_0 = const()[name = tensor("op_348_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8692480))), name = tensor("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8672896))), shape = tensor([768, 768, 1, 1])]; + tensor var_348_cast_fp16 = conv(dilations = var_348_dilations_0, groups = var_348_groups_0, pad = var_348_pad_0, pad_type = var_348_pad_type_0, strides = var_348_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor obj_3_cast_fp16 = add(x = var_342_cast_fp16, y = var_348_cast_fp16)[name = tensor("obj_3_cast_fp16")]; + tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = tensor("inputs_3_cast_fp16")]; + tensor out_3_axes_0 = const()[name = tensor("out_3_axes_0"), val = tensor([1])]; + tensor var_359_to_fp16 = const()[name = tensor("op_359_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_359_to_fp16, x = inputs_3_cast_fp16)[name = tensor("out_3_cast_fp16")]; + tensor input_3_gamma_0_to_fp16 = const()[name = tensor("input_3_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8766272)))]; + tensor input_3_beta_0_to_fp16 = const()[name = tensor("input_3_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8767872)))]; + tensor input_3_epsilon_0_to_fp16 = const()[name = tensor("input_3_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_377_pad_type_0 = const()[name = tensor("op_377_pad_type_0"), val = tensor("valid")]; + tensor var_377_strides_0 = const()[name = tensor("op_377_strides_0"), val = tensor([1, 1])]; + tensor var_377_pad_0 = const()[name = tensor("op_377_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_377_dilations_0 = const()[name = tensor("op_377_dilations_0"), val = tensor([1, 1])]; + tensor var_377_groups_0 = const()[name = tensor("op_377_groups_0"), val = tensor(1)]; + tensor layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8769472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9949184))), name = tensor("layers_0_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9949312)))]; + tensor var_377_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_377_dilations_0, groups = var_377_groups_0, pad = var_377_pad_0, pad_type = var_377_pad_type_0, strides = var_377_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = tensor("op_377_cast_fp16")]; + tensor var_383_pad_type_0 = const()[name = tensor("op_383_pad_type_0"), val = tensor("valid")]; + tensor var_383_strides_0 = const()[name = tensor("op_383_strides_0"), val = tensor([1, 1])]; + tensor var_383_pad_0 = const()[name = tensor("op_383_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_dilations_0 = const()[name = tensor("op_383_dilations_0"), val = tensor([1, 1])]; + tensor var_383_groups_0 = const()[name = tensor("op_383_groups_0"), val = tensor(1)]; + tensor layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10044736))), name = tensor("layers_0_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9955520))), shape = tensor([3072, 768, 1, 1])]; + tensor var_383_cast_fp16 = conv(dilations = var_383_dilations_0, groups = var_383_groups_0, pad = var_383_pad_0, pad_type = var_383_pad_type_0, strides = var_383_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor input_5_cast_fp16 = add(x = var_377_cast_fp16, y = var_383_cast_fp16)[name = tensor("input_5_cast_fp16")]; + tensor input_7_mode_0 = const()[name = tensor("input_7_mode_0"), val = tensor("EXACT")]; + tensor input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_394_pad_type_0 = const()[name = tensor("op_394_pad_type_0"), val = tensor("valid")]; + tensor var_394_strides_0 = const()[name = tensor("op_394_strides_0"), val = tensor([1, 1])]; + tensor var_394_pad_0 = const()[name = tensor("op_394_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_394_dilations_0 = const()[name = tensor("op_394_dilations_0"), val = tensor([1, 1])]; + tensor var_394_groups_0 = const()[name = tensor("op_394_groups_0"), val = tensor(1)]; + tensor layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10339712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11519424))), name = tensor("layers_0_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11519552)))]; + tensor var_394_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_394_dilations_0, groups = var_394_groups_0, pad = var_394_pad_0, pad_type = var_394_pad_type_0, strides = var_394_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_7_cast_fp16)[name = tensor("op_394_cast_fp16")]; + tensor var_400_pad_type_0 = const()[name = tensor("op_400_pad_type_0"), val = tensor("valid")]; + tensor var_400_strides_0 = const()[name = tensor("op_400_strides_0"), val = tensor([1, 1])]; + tensor var_400_pad_0 = const()[name = tensor("op_400_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_400_dilations_0 = const()[name = tensor("op_400_dilations_0"), val = tensor([1, 1])]; + tensor var_400_groups_0 = const()[name = tensor("op_400_groups_0"), val = tensor(1)]; + tensor layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11594560))), name = tensor("layers_0_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11521152))), shape = tensor([768, 3072, 1, 1])]; + tensor var_400_cast_fp16 = conv(dilations = var_400_dilations_0, groups = var_400_groups_0, pad = var_400_pad_0, pad_type = var_400_pad_type_0, strides = var_400_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_7_cast_fp16)[name = tensor("op_400_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = var_394_cast_fp16, y = var_400_cast_fp16)[name = tensor("hidden_states_5_cast_fp16")]; + tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor("inputs_5_cast_fp16")]; + tensor var_406 = const()[name = tensor("op_406"), val = tensor(3)]; + tensor out_5_axes_0 = const()[name = tensor("out_5_axes_0"), val = tensor([1])]; + tensor var_428_to_fp16 = const()[name = tensor("op_428_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_428_to_fp16, x = inputs_5_cast_fp16)[name = tensor("out_5_cast_fp16")]; + tensor obj_5_gamma_0_to_fp16 = const()[name = tensor("obj_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11889536)))]; + tensor obj_5_beta_0_to_fp16 = const()[name = tensor("obj_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11891136)))]; + tensor obj_5_epsilon_0_to_fp16 = const()[name = tensor("obj_5_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor("obj_5_cast_fp16")]; + tensor var_450_pad_type_0 = const()[name = tensor("op_450_pad_type_0"), val = tensor("valid")]; + tensor var_450_strides_0 = const()[name = tensor("op_450_strides_0"), val = tensor([1, 1])]; + tensor var_450_pad_0 = const()[name = tensor("op_450_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_450_dilations_0 = const()[name = tensor("op_450_dilations_0"), val = tensor([1, 1])]; + tensor var_450_groups_0 = const()[name = tensor("op_450_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11892736))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12187712))), name = tensor("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12187840)))]; + tensor var_450_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_450_dilations_0, groups = var_450_groups_0, pad = var_450_pad_0, pad_type = var_450_pad_type_0, strides = var_450_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = tensor("op_450_cast_fp16")]; + tensor var_456_pad_type_0 = const()[name = tensor("op_456_pad_type_0"), val = tensor("valid")]; + tensor var_456_strides_0 = const()[name = tensor("op_456_strides_0"), val = tensor([1, 1])]; + tensor var_456_pad_0 = const()[name = tensor("op_456_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_456_dilations_0 = const()[name = tensor("op_456_dilations_0"), val = tensor([1, 1])]; + tensor var_456_groups_0 = const()[name = tensor("op_456_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12206656))), name = tensor("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12189440))), shape = tensor([768, 768, 1, 1])]; + tensor var_456_cast_fp16 = conv(dilations = var_456_dilations_0, groups = var_456_groups_0, pad = var_456_pad_0, pad_type = var_456_pad_type_0, strides = var_456_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = tensor("op_456_cast_fp16")]; + tensor query_3_cast_fp16 = add(x = var_450_cast_fp16, y = var_456_cast_fp16)[name = tensor("query_3_cast_fp16")]; + tensor var_465_pad_type_0 = const()[name = tensor("op_465_pad_type_0"), val = tensor("valid")]; + tensor var_465_strides_0 = const()[name = tensor("op_465_strides_0"), val = tensor([1, 1])]; + tensor var_465_pad_0 = const()[name = tensor("op_465_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_465_dilations_0 = const()[name = tensor("op_465_dilations_0"), val = tensor([1, 1])]; + tensor var_465_groups_0 = const()[name = tensor("op_465_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12280448))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12575424))), name = tensor("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_465_cast_fp16 = conv(dilations = var_465_dilations_0, groups = var_465_groups_0, pad = var_465_pad_0, pad_type = var_465_pad_type_0, strides = var_465_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = tensor("op_465_cast_fp16")]; + tensor var_471_pad_type_0 = const()[name = tensor("op_471_pad_type_0"), val = tensor("valid")]; + tensor var_471_strides_0 = const()[name = tensor("op_471_strides_0"), val = tensor([1, 1])]; + tensor var_471_pad_0 = const()[name = tensor("op_471_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_471_dilations_0 = const()[name = tensor("op_471_dilations_0"), val = tensor([1, 1])]; + tensor var_471_groups_0 = const()[name = tensor("op_471_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12591360))), name = tensor("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12575552))), shape = tensor([768, 768, 1, 1])]; + tensor var_471_cast_fp16 = conv(dilations = var_471_dilations_0, groups = var_471_groups_0, pad = var_471_pad_0, pad_type = var_471_pad_type_0, strides = var_471_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = tensor("op_471_cast_fp16")]; + tensor key_3_cast_fp16 = add(x = var_465_cast_fp16, y = var_471_cast_fp16)[name = tensor("key_3_cast_fp16")]; + tensor var_481_pad_type_0 = const()[name = tensor("op_481_pad_type_0"), val = tensor("valid")]; + tensor var_481_strides_0 = const()[name = tensor("op_481_strides_0"), val = tensor([1, 1])]; + tensor var_481_pad_0 = const()[name = tensor("op_481_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_481_dilations_0 = const()[name = tensor("op_481_dilations_0"), val = tensor([1, 1])]; + tensor var_481_groups_0 = const()[name = tensor("op_481_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12665152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12960128))), name = tensor("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12960256)))]; + tensor var_481_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_481_dilations_0, groups = var_481_groups_0, pad = var_481_pad_0, pad_type = var_481_pad_type_0, strides = var_481_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_487_pad_type_0 = const()[name = tensor("op_487_pad_type_0"), val = tensor("valid")]; + tensor var_487_strides_0 = const()[name = tensor("op_487_strides_0"), val = tensor([1, 1])]; + tensor var_487_pad_0 = const()[name = tensor("op_487_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_487_dilations_0 = const()[name = tensor("op_487_dilations_0"), val = tensor([1, 1])]; + tensor var_487_groups_0 = const()[name = tensor("op_487_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12975872))), name = tensor("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12961856))), shape = tensor([768, 768, 1, 1])]; + tensor var_487_cast_fp16 = conv(dilations = var_487_dilations_0, groups = var_487_groups_0, pad = var_487_pad_0, pad_type = var_487_pad_type_0, strides = var_487_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = tensor("op_487_cast_fp16")]; + tensor value_3_cast_fp16 = add(x = var_481_cast_fp16, y = var_487_cast_fp16)[name = tensor("value_3_cast_fp16")]; + tensor var_491 = const()[name = tensor("op_491"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_3_cast_fp16 = reshape(shape = var_491, x = query_3_cast_fp16)[name = tensor("mh_q_3_cast_fp16")]; + tensor var_493_to_fp16 = const()[name = tensor("op_493_to_fp16"), val = tensor(0x1p-3)]; + tensor var_494_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_493_to_fp16)[name = tensor("op_494_cast_fp16")]; + tensor var_497 = const()[name = tensor("op_497"), val = tensor([1, 12, 64, 1500])]; + tensor var_498_cast_fp16 = reshape(shape = var_497, x = key_3_cast_fp16)[name = tensor("op_498_cast_fp16")]; + tensor mh_w_3_transpose_x_0 = const()[name = tensor("mh_w_3_transpose_x_0"), val = tensor(true)]; + tensor mh_w_3_transpose_y_0 = const()[name = tensor("mh_w_3_transpose_y_0"), val = tensor(false)]; + tensor mh_w_3_cast_fp16 = matmul(transpose_x = mh_w_3_transpose_x_0, transpose_y = mh_w_3_transpose_y_0, x = var_494_cast_fp16, y = var_498_cast_fp16)[name = tensor("mh_w_3_cast_fp16")]; + tensor var_501_cast_fp16 = softmax(axis = var_406, x = mh_w_3_cast_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_502 = const()[name = tensor("op_502"), val = tensor([1, 12, 64, 1500])]; + tensor var_503_cast_fp16 = reshape(shape = var_502, x = value_3_cast_fp16)[name = tensor("op_503_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_503_cast_fp16, y = var_501_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 768, 1, 1500])]; + tensor input_9_cast_fp16 = reshape(shape = var_506, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_516_pad_type_0 = const()[name = tensor("op_516_pad_type_0"), val = tensor("valid")]; + tensor var_516_strides_0 = const()[name = tensor("op_516_strides_0"), val = tensor([1, 1])]; + tensor var_516_pad_0 = const()[name = tensor("op_516_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_516_dilations_0 = const()[name = tensor("op_516_dilations_0"), val = tensor([1, 1])]; + tensor var_516_groups_0 = const()[name = tensor("op_516_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13049664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13344640))), name = tensor("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13344768)))]; + tensor var_516_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_516_dilations_0, groups = var_516_groups_0, pad = var_516_pad_0, pad_type = var_516_pad_type_0, strides = var_516_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = tensor("op_516_cast_fp16")]; + tensor var_522_pad_type_0 = const()[name = tensor("op_522_pad_type_0"), val = tensor("valid")]; + tensor var_522_strides_0 = const()[name = tensor("op_522_strides_0"), val = tensor([1, 1])]; + tensor var_522_pad_0 = const()[name = tensor("op_522_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_522_dilations_0 = const()[name = tensor("op_522_dilations_0"), val = tensor([1, 1])]; + tensor var_522_groups_0 = const()[name = tensor("op_522_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13357056))), name = tensor("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13346368))), shape = tensor([768, 768, 1, 1])]; + tensor var_522_cast_fp16 = conv(dilations = var_522_dilations_0, groups = var_522_groups_0, pad = var_522_pad_0, pad_type = var_522_pad_type_0, strides = var_522_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = tensor("op_522_cast_fp16")]; + tensor obj_7_cast_fp16 = add(x = var_516_cast_fp16, y = var_522_cast_fp16)[name = tensor("obj_7_cast_fp16")]; + tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = tensor("inputs_7_cast_fp16")]; + tensor out_7_axes_0 = const()[name = tensor("out_7_axes_0"), val = tensor([1])]; + tensor var_533_to_fp16 = const()[name = tensor("op_533_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_533_to_fp16, x = inputs_7_cast_fp16)[name = tensor("out_7_cast_fp16")]; + tensor input_11_gamma_0_to_fp16 = const()[name = tensor("input_11_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13430848)))]; + tensor input_11_beta_0_to_fp16 = const()[name = tensor("input_11_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13432448)))]; + tensor input_11_epsilon_0_to_fp16 = const()[name = tensor("input_11_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_551_pad_type_0 = const()[name = tensor("op_551_pad_type_0"), val = tensor("valid")]; + tensor var_551_strides_0 = const()[name = tensor("op_551_strides_0"), val = tensor([1, 1])]; + tensor var_551_pad_0 = const()[name = tensor("op_551_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_551_dilations_0 = const()[name = tensor("op_551_dilations_0"), val = tensor([1, 1])]; + tensor var_551_groups_0 = const()[name = tensor("op_551_groups_0"), val = tensor(1)]; + tensor layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13434048))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14613760))), name = tensor("layers_1_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14613888)))]; + tensor var_551_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_551_dilations_0, groups = var_551_groups_0, pad = var_551_pad_0, pad_type = var_551_pad_type_0, strides = var_551_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = tensor("op_551_cast_fp16")]; + tensor var_557_pad_type_0 = const()[name = tensor("op_557_pad_type_0"), val = tensor("valid")]; + tensor var_557_strides_0 = const()[name = tensor("op_557_strides_0"), val = tensor([1, 1])]; + tensor var_557_pad_0 = const()[name = tensor("op_557_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_557_dilations_0 = const()[name = tensor("op_557_dilations_0"), val = tensor([1, 1])]; + tensor var_557_groups_0 = const()[name = tensor("op_557_groups_0"), val = tensor(1)]; + tensor layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14688448))), name = tensor("layers_1_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14620096))), shape = tensor([3072, 768, 1, 1])]; + tensor var_557_cast_fp16 = conv(dilations = var_557_dilations_0, groups = var_557_groups_0, pad = var_557_pad_0, pad_type = var_557_pad_type_0, strides = var_557_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = tensor("op_557_cast_fp16")]; + tensor input_13_cast_fp16 = add(x = var_551_cast_fp16, y = var_557_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor input_15_mode_0 = const()[name = tensor("input_15_mode_0"), val = tensor("EXACT")]; + tensor input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_568_pad_type_0 = const()[name = tensor("op_568_pad_type_0"), val = tensor("valid")]; + tensor var_568_strides_0 = const()[name = tensor("op_568_strides_0"), val = tensor([1, 1])]; + tensor var_568_pad_0 = const()[name = tensor("op_568_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_568_dilations_0 = const()[name = tensor("op_568_dilations_0"), val = tensor([1, 1])]; + tensor var_568_groups_0 = const()[name = tensor("op_568_groups_0"), val = tensor(1)]; + tensor layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14983424))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16163136))), name = tensor("layers_1_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16163264)))]; + tensor var_568_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_568_dilations_0, groups = var_568_groups_0, pad = var_568_pad_0, pad_type = var_568_pad_type_0, strides = var_568_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = tensor("op_568_cast_fp16")]; + tensor var_574_pad_type_0 = const()[name = tensor("op_574_pad_type_0"), val = tensor("valid")]; + tensor var_574_strides_0 = const()[name = tensor("op_574_strides_0"), val = tensor([1, 1])]; + tensor var_574_pad_0 = const()[name = tensor("op_574_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_574_dilations_0 = const()[name = tensor("op_574_dilations_0"), val = tensor([1, 1])]; + tensor var_574_groups_0 = const()[name = tensor("op_574_groups_0"), val = tensor(1)]; + tensor layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16225152))), name = tensor("layers_1_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16164864))), shape = tensor([768, 3072, 1, 1])]; + tensor var_574_cast_fp16 = conv(dilations = var_574_dilations_0, groups = var_574_groups_0, pad = var_574_pad_0, pad_type = var_574_pad_type_0, strides = var_574_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = tensor("op_574_cast_fp16")]; + tensor hidden_states_7_cast_fp16 = add(x = var_568_cast_fp16, y = var_574_cast_fp16)[name = tensor("hidden_states_7_cast_fp16")]; + tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor("inputs_9_cast_fp16")]; + tensor var_580 = const()[name = tensor("op_580"), val = tensor(3)]; + tensor out_9_axes_0 = const()[name = tensor("out_9_axes_0"), val = tensor([1])]; + tensor var_602_to_fp16 = const()[name = tensor("op_602_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_602_to_fp16, x = inputs_9_cast_fp16)[name = tensor("out_9_cast_fp16")]; + tensor obj_9_gamma_0_to_fp16 = const()[name = tensor("obj_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16520128)))]; + tensor obj_9_beta_0_to_fp16 = const()[name = tensor("obj_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16521728)))]; + tensor obj_9_epsilon_0_to_fp16 = const()[name = tensor("obj_9_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor("obj_9_cast_fp16")]; + tensor var_624_pad_type_0 = const()[name = tensor("op_624_pad_type_0"), val = tensor("valid")]; + tensor var_624_strides_0 = const()[name = tensor("op_624_strides_0"), val = tensor([1, 1])]; + tensor var_624_pad_0 = const()[name = tensor("op_624_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_624_dilations_0 = const()[name = tensor("op_624_dilations_0"), val = tensor([1, 1])]; + tensor var_624_groups_0 = const()[name = tensor("op_624_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16523328))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16818304))), name = tensor("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16818432)))]; + tensor var_624_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_624_dilations_0, groups = var_624_groups_0, pad = var_624_pad_0, pad_type = var_624_pad_type_0, strides = var_624_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_624_cast_fp16")]; + tensor var_630_pad_type_0 = const()[name = tensor("op_630_pad_type_0"), val = tensor("valid")]; + tensor var_630_strides_0 = const()[name = tensor("op_630_strides_0"), val = tensor([1, 1])]; + tensor var_630_pad_0 = const()[name = tensor("op_630_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_630_dilations_0 = const()[name = tensor("op_630_dilations_0"), val = tensor([1, 1])]; + tensor var_630_groups_0 = const()[name = tensor("op_630_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16834112))), name = tensor("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16820032))), shape = tensor([768, 768, 1, 1])]; + tensor var_630_cast_fp16 = conv(dilations = var_630_dilations_0, groups = var_630_groups_0, pad = var_630_pad_0, pad_type = var_630_pad_type_0, strides = var_630_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_630_cast_fp16")]; + tensor query_5_cast_fp16 = add(x = var_624_cast_fp16, y = var_630_cast_fp16)[name = tensor("query_5_cast_fp16")]; + tensor var_639_pad_type_0 = const()[name = tensor("op_639_pad_type_0"), val = tensor("valid")]; + tensor var_639_strides_0 = const()[name = tensor("op_639_strides_0"), val = tensor([1, 1])]; + tensor var_639_pad_0 = const()[name = tensor("op_639_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_639_dilations_0 = const()[name = tensor("op_639_dilations_0"), val = tensor([1, 1])]; + tensor var_639_groups_0 = const()[name = tensor("op_639_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16907904))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17202880))), name = tensor("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_639_cast_fp16 = conv(dilations = var_639_dilations_0, groups = var_639_groups_0, pad = var_639_pad_0, pad_type = var_639_pad_type_0, strides = var_639_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_639_cast_fp16")]; + tensor var_645_pad_type_0 = const()[name = tensor("op_645_pad_type_0"), val = tensor("valid")]; + tensor var_645_strides_0 = const()[name = tensor("op_645_strides_0"), val = tensor([1, 1])]; + tensor var_645_pad_0 = const()[name = tensor("op_645_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_645_dilations_0 = const()[name = tensor("op_645_dilations_0"), val = tensor([1, 1])]; + tensor var_645_groups_0 = const()[name = tensor("op_645_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17216384))), name = tensor("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17203008))), shape = tensor([768, 768, 1, 1])]; + tensor var_645_cast_fp16 = conv(dilations = var_645_dilations_0, groups = var_645_groups_0, pad = var_645_pad_0, pad_type = var_645_pad_type_0, strides = var_645_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_645_cast_fp16")]; + tensor key_5_cast_fp16 = add(x = var_639_cast_fp16, y = var_645_cast_fp16)[name = tensor("key_5_cast_fp16")]; + tensor var_655_pad_type_0 = const()[name = tensor("op_655_pad_type_0"), val = tensor("valid")]; + tensor var_655_strides_0 = const()[name = tensor("op_655_strides_0"), val = tensor([1, 1])]; + tensor var_655_pad_0 = const()[name = tensor("op_655_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_655_dilations_0 = const()[name = tensor("op_655_dilations_0"), val = tensor([1, 1])]; + tensor var_655_groups_0 = const()[name = tensor("op_655_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17290176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17585152))), name = tensor("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17585280)))]; + tensor var_655_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_655_dilations_0, groups = var_655_groups_0, pad = var_655_pad_0, pad_type = var_655_pad_type_0, strides = var_655_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_655_cast_fp16")]; + tensor var_661_pad_type_0 = const()[name = tensor("op_661_pad_type_0"), val = tensor("valid")]; + tensor var_661_strides_0 = const()[name = tensor("op_661_strides_0"), val = tensor([1, 1])]; + tensor var_661_pad_0 = const()[name = tensor("op_661_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_661_dilations_0 = const()[name = tensor("op_661_dilations_0"), val = tensor([1, 1])]; + tensor var_661_groups_0 = const()[name = tensor("op_661_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17596992))), name = tensor("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17586880))), shape = tensor([768, 768, 1, 1])]; + tensor var_661_cast_fp16 = conv(dilations = var_661_dilations_0, groups = var_661_groups_0, pad = var_661_pad_0, pad_type = var_661_pad_type_0, strides = var_661_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_661_cast_fp16")]; + tensor value_5_cast_fp16 = add(x = var_655_cast_fp16, y = var_661_cast_fp16)[name = tensor("value_5_cast_fp16")]; + tensor var_665 = const()[name = tensor("op_665"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_5_cast_fp16 = reshape(shape = var_665, x = query_5_cast_fp16)[name = tensor("mh_q_5_cast_fp16")]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1p-3)]; + tensor var_668_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_667_to_fp16)[name = tensor("op_668_cast_fp16")]; + tensor var_671 = const()[name = tensor("op_671"), val = tensor([1, 12, 64, 1500])]; + tensor var_672_cast_fp16 = reshape(shape = var_671, x = key_5_cast_fp16)[name = tensor("op_672_cast_fp16")]; + tensor mh_w_5_transpose_x_0 = const()[name = tensor("mh_w_5_transpose_x_0"), val = tensor(true)]; + tensor mh_w_5_transpose_y_0 = const()[name = tensor("mh_w_5_transpose_y_0"), val = tensor(false)]; + tensor mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_668_cast_fp16, y = var_672_cast_fp16)[name = tensor("mh_w_5_cast_fp16")]; + tensor var_675_cast_fp16 = softmax(axis = var_580, x = mh_w_5_cast_fp16)[name = tensor("op_675_cast_fp16")]; + tensor var_676 = const()[name = tensor("op_676"), val = tensor([1, 12, 64, 1500])]; + tensor var_677_cast_fp16 = reshape(shape = var_676, x = value_5_cast_fp16)[name = tensor("op_677_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_677_cast_fp16, y = var_675_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_680 = const()[name = tensor("op_680"), val = tensor([1, 768, 1, 1500])]; + tensor input_17_cast_fp16 = reshape(shape = var_680, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_690_pad_type_0 = const()[name = tensor("op_690_pad_type_0"), val = tensor("valid")]; + tensor var_690_strides_0 = const()[name = tensor("op_690_strides_0"), val = tensor([1, 1])]; + tensor var_690_pad_0 = const()[name = tensor("op_690_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_690_dilations_0 = const()[name = tensor("op_690_dilations_0"), val = tensor([1, 1])]; + tensor var_690_groups_0 = const()[name = tensor("op_690_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17670784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17965760))), name = tensor("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17965888)))]; + tensor var_690_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_690_dilations_0, groups = var_690_groups_0, pad = var_690_pad_0, pad_type = var_690_pad_type_0, strides = var_690_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_17_cast_fp16)[name = tensor("op_690_cast_fp16")]; + tensor var_696_pad_type_0 = const()[name = tensor("op_696_pad_type_0"), val = tensor("valid")]; + tensor var_696_strides_0 = const()[name = tensor("op_696_strides_0"), val = tensor([1, 1])]; + tensor var_696_pad_0 = const()[name = tensor("op_696_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_696_dilations_0 = const()[name = tensor("op_696_dilations_0"), val = tensor([1, 1])]; + tensor var_696_groups_0 = const()[name = tensor("op_696_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17976064))), name = tensor("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17967488))), shape = tensor([768, 768, 1, 1])]; + tensor var_696_cast_fp16 = conv(dilations = var_696_dilations_0, groups = var_696_groups_0, pad = var_696_pad_0, pad_type = var_696_pad_type_0, strides = var_696_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_17_cast_fp16)[name = tensor("op_696_cast_fp16")]; + tensor obj_11_cast_fp16 = add(x = var_690_cast_fp16, y = var_696_cast_fp16)[name = tensor("obj_11_cast_fp16")]; + tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = tensor("inputs_11_cast_fp16")]; + tensor out_11_axes_0 = const()[name = tensor("out_11_axes_0"), val = tensor([1])]; + tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_707_to_fp16, x = inputs_11_cast_fp16)[name = tensor("out_11_cast_fp16")]; + tensor input_19_gamma_0_to_fp16 = const()[name = tensor("input_19_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18049856)))]; + tensor input_19_beta_0_to_fp16 = const()[name = tensor("input_19_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18051456)))]; + tensor input_19_epsilon_0_to_fp16 = const()[name = tensor("input_19_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_725_pad_type_0 = const()[name = tensor("op_725_pad_type_0"), val = tensor("valid")]; + tensor var_725_strides_0 = const()[name = tensor("op_725_strides_0"), val = tensor([1, 1])]; + tensor var_725_pad_0 = const()[name = tensor("op_725_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_725_dilations_0 = const()[name = tensor("op_725_dilations_0"), val = tensor([1, 1])]; + tensor var_725_groups_0 = const()[name = tensor("op_725_groups_0"), val = tensor(1)]; + tensor layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18053056))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19232768))), name = tensor("layers_2_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19232896)))]; + tensor var_725_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_725_dilations_0, groups = var_725_groups_0, pad = var_725_pad_0, pad_type = var_725_pad_type_0, strides = var_725_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = tensor("op_725_cast_fp16")]; + tensor var_731_pad_type_0 = const()[name = tensor("op_731_pad_type_0"), val = tensor("valid")]; + tensor var_731_strides_0 = const()[name = tensor("op_731_strides_0"), val = tensor([1, 1])]; + tensor var_731_pad_0 = const()[name = tensor("op_731_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_731_dilations_0 = const()[name = tensor("op_731_dilations_0"), val = tensor([1, 1])]; + tensor var_731_groups_0 = const()[name = tensor("op_731_groups_0"), val = tensor(1)]; + tensor layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19298752))), name = tensor("layers_2_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19239104))), shape = tensor([3072, 768, 1, 1])]; + tensor var_731_cast_fp16 = conv(dilations = var_731_dilations_0, groups = var_731_groups_0, pad = var_731_pad_0, pad_type = var_731_pad_type_0, strides = var_731_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = tensor("op_731_cast_fp16")]; + tensor input_21_cast_fp16 = add(x = var_725_cast_fp16, y = var_731_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor input_23_mode_0 = const()[name = tensor("input_23_mode_0"), val = tensor("EXACT")]; + tensor input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_742_pad_type_0 = const()[name = tensor("op_742_pad_type_0"), val = tensor("valid")]; + tensor var_742_strides_0 = const()[name = tensor("op_742_strides_0"), val = tensor([1, 1])]; + tensor var_742_pad_0 = const()[name = tensor("op_742_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_742_dilations_0 = const()[name = tensor("op_742_dilations_0"), val = tensor([1, 1])]; + tensor var_742_groups_0 = const()[name = tensor("op_742_groups_0"), val = tensor(1)]; + tensor layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19593728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20773440))), name = tensor("layers_2_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20773568)))]; + tensor var_742_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_742_dilations_0, groups = var_742_groups_0, pad = var_742_pad_0, pad_type = var_742_pad_type_0, strides = var_742_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = tensor("op_742_cast_fp16")]; + tensor var_748_pad_type_0 = const()[name = tensor("op_748_pad_type_0"), val = tensor("valid")]; + tensor var_748_strides_0 = const()[name = tensor("op_748_strides_0"), val = tensor([1, 1])]; + tensor var_748_pad_0 = const()[name = tensor("op_748_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_748_dilations_0 = const()[name = tensor("op_748_dilations_0"), val = tensor([1, 1])]; + tensor var_748_groups_0 = const()[name = tensor("op_748_groups_0"), val = tensor(1)]; + tensor layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20834944))), name = tensor("layers_2_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20775168))), shape = tensor([768, 3072, 1, 1])]; + tensor var_748_cast_fp16 = conv(dilations = var_748_dilations_0, groups = var_748_groups_0, pad = var_748_pad_0, pad_type = var_748_pad_type_0, strides = var_748_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = tensor("op_748_cast_fp16")]; + tensor hidden_states_9_cast_fp16 = add(x = var_742_cast_fp16, y = var_748_cast_fp16)[name = tensor("hidden_states_9_cast_fp16")]; + tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor("inputs_13_cast_fp16")]; + tensor var_754 = const()[name = tensor("op_754"), val = tensor(3)]; + tensor out_13_axes_0 = const()[name = tensor("out_13_axes_0"), val = tensor([1])]; + tensor var_776_to_fp16 = const()[name = tensor("op_776_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_776_to_fp16, x = inputs_13_cast_fp16)[name = tensor("out_13_cast_fp16")]; + tensor obj_13_gamma_0_to_fp16 = const()[name = tensor("obj_13_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21129920)))]; + tensor obj_13_beta_0_to_fp16 = const()[name = tensor("obj_13_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21131520)))]; + tensor obj_13_epsilon_0_to_fp16 = const()[name = tensor("obj_13_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor("obj_13_cast_fp16")]; + tensor var_798_pad_type_0 = const()[name = tensor("op_798_pad_type_0"), val = tensor("valid")]; + tensor var_798_strides_0 = const()[name = tensor("op_798_strides_0"), val = tensor([1, 1])]; + tensor var_798_pad_0 = const()[name = tensor("op_798_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_798_dilations_0 = const()[name = tensor("op_798_dilations_0"), val = tensor([1, 1])]; + tensor var_798_groups_0 = const()[name = tensor("op_798_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21133120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21428096))), name = tensor("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21428224)))]; + tensor var_798_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_798_dilations_0, groups = var_798_groups_0, pad = var_798_pad_0, pad_type = var_798_pad_type_0, strides = var_798_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = tensor("op_798_cast_fp16")]; + tensor var_804_pad_type_0 = const()[name = tensor("op_804_pad_type_0"), val = tensor("valid")]; + tensor var_804_strides_0 = const()[name = tensor("op_804_strides_0"), val = tensor([1, 1])]; + tensor var_804_pad_0 = const()[name = tensor("op_804_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_804_dilations_0 = const()[name = tensor("op_804_dilations_0"), val = tensor([1, 1])]; + tensor var_804_groups_0 = const()[name = tensor("op_804_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21440384))), name = tensor("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21429824))), shape = tensor([768, 768, 1, 1])]; + tensor var_804_cast_fp16 = conv(dilations = var_804_dilations_0, groups = var_804_groups_0, pad = var_804_pad_0, pad_type = var_804_pad_type_0, strides = var_804_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = tensor("op_804_cast_fp16")]; + tensor query_7_cast_fp16 = add(x = var_798_cast_fp16, y = var_804_cast_fp16)[name = tensor("query_7_cast_fp16")]; + tensor var_813_pad_type_0 = const()[name = tensor("op_813_pad_type_0"), val = tensor("valid")]; + tensor var_813_strides_0 = const()[name = tensor("op_813_strides_0"), val = tensor([1, 1])]; + tensor var_813_pad_0 = const()[name = tensor("op_813_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_813_dilations_0 = const()[name = tensor("op_813_dilations_0"), val = tensor([1, 1])]; + tensor var_813_groups_0 = const()[name = tensor("op_813_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21514176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21809152))), name = tensor("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_813_cast_fp16 = conv(dilations = var_813_dilations_0, groups = var_813_groups_0, pad = var_813_pad_0, pad_type = var_813_pad_type_0, strides = var_813_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = tensor("op_813_cast_fp16")]; + tensor var_819_pad_type_0 = const()[name = tensor("op_819_pad_type_0"), val = tensor("valid")]; + tensor var_819_strides_0 = const()[name = tensor("op_819_strides_0"), val = tensor([1, 1])]; + tensor var_819_pad_0 = const()[name = tensor("op_819_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_819_dilations_0 = const()[name = tensor("op_819_dilations_0"), val = tensor([1, 1])]; + tensor var_819_groups_0 = const()[name = tensor("op_819_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21819776))), name = tensor("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21809280))), shape = tensor([768, 768, 1, 1])]; + tensor var_819_cast_fp16 = conv(dilations = var_819_dilations_0, groups = var_819_groups_0, pad = var_819_pad_0, pad_type = var_819_pad_type_0, strides = var_819_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = tensor("op_819_cast_fp16")]; + tensor key_7_cast_fp16 = add(x = var_813_cast_fp16, y = var_819_cast_fp16)[name = tensor("key_7_cast_fp16")]; + tensor var_829_pad_type_0 = const()[name = tensor("op_829_pad_type_0"), val = tensor("valid")]; + tensor var_829_strides_0 = const()[name = tensor("op_829_strides_0"), val = tensor([1, 1])]; + tensor var_829_pad_0 = const()[name = tensor("op_829_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_829_dilations_0 = const()[name = tensor("op_829_dilations_0"), val = tensor([1, 1])]; + tensor var_829_groups_0 = const()[name = tensor("op_829_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21893568))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22188544))), name = tensor("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22188672)))]; + tensor var_829_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_829_dilations_0, groups = var_829_groups_0, pad = var_829_pad_0, pad_type = var_829_pad_type_0, strides = var_829_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = tensor("op_829_cast_fp16")]; + tensor var_835_pad_type_0 = const()[name = tensor("op_835_pad_type_0"), val = tensor("valid")]; + tensor var_835_strides_0 = const()[name = tensor("op_835_strides_0"), val = tensor([1, 1])]; + tensor var_835_pad_0 = const()[name = tensor("op_835_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_835_dilations_0 = const()[name = tensor("op_835_dilations_0"), val = tensor([1, 1])]; + tensor var_835_groups_0 = const()[name = tensor("op_835_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22198784))), name = tensor("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22190272))), shape = tensor([768, 768, 1, 1])]; + tensor var_835_cast_fp16 = conv(dilations = var_835_dilations_0, groups = var_835_groups_0, pad = var_835_pad_0, pad_type = var_835_pad_type_0, strides = var_835_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = tensor("op_835_cast_fp16")]; + tensor value_7_cast_fp16 = add(x = var_829_cast_fp16, y = var_835_cast_fp16)[name = tensor("value_7_cast_fp16")]; + tensor var_839 = const()[name = tensor("op_839"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_7_cast_fp16 = reshape(shape = var_839, x = query_7_cast_fp16)[name = tensor("mh_q_7_cast_fp16")]; + tensor var_841_to_fp16 = const()[name = tensor("op_841_to_fp16"), val = tensor(0x1p-3)]; + tensor var_842_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_841_to_fp16)[name = tensor("op_842_cast_fp16")]; + tensor var_845 = const()[name = tensor("op_845"), val = tensor([1, 12, 64, 1500])]; + tensor var_846_cast_fp16 = reshape(shape = var_845, x = key_7_cast_fp16)[name = tensor("op_846_cast_fp16")]; + tensor mh_w_7_transpose_x_0 = const()[name = tensor("mh_w_7_transpose_x_0"), val = tensor(true)]; + tensor mh_w_7_transpose_y_0 = const()[name = tensor("mh_w_7_transpose_y_0"), val = tensor(false)]; + tensor mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_842_cast_fp16, y = var_846_cast_fp16)[name = tensor("mh_w_7_cast_fp16")]; + tensor var_849_cast_fp16 = softmax(axis = var_754, x = mh_w_7_cast_fp16)[name = tensor("op_849_cast_fp16")]; + tensor var_850 = const()[name = tensor("op_850"), val = tensor([1, 12, 64, 1500])]; + tensor var_851_cast_fp16 = reshape(shape = var_850, x = value_7_cast_fp16)[name = tensor("op_851_cast_fp16")]; + tensor attn_7_transpose_x_0 = const()[name = tensor("attn_7_transpose_x_0"), val = tensor(false)]; + tensor attn_7_transpose_y_0 = const()[name = tensor("attn_7_transpose_y_0"), val = tensor(true)]; + tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_851_cast_fp16, y = var_849_cast_fp16)[name = tensor("attn_7_cast_fp16")]; + tensor var_854 = const()[name = tensor("op_854"), val = tensor([1, 768, 1, 1500])]; + tensor input_25_cast_fp16 = reshape(shape = var_854, x = attn_7_cast_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_864_pad_type_0 = const()[name = tensor("op_864_pad_type_0"), val = tensor("valid")]; + tensor var_864_strides_0 = const()[name = tensor("op_864_strides_0"), val = tensor([1, 1])]; + tensor var_864_pad_0 = const()[name = tensor("op_864_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_864_dilations_0 = const()[name = tensor("op_864_dilations_0"), val = tensor([1, 1])]; + tensor var_864_groups_0 = const()[name = tensor("op_864_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22272576))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22567552))), name = tensor("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22567680)))]; + tensor var_864_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_864_dilations_0, groups = var_864_groups_0, pad = var_864_pad_0, pad_type = var_864_pad_type_0, strides = var_864_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = tensor("op_864_cast_fp16")]; + tensor var_870_pad_type_0 = const()[name = tensor("op_870_pad_type_0"), val = tensor("valid")]; + tensor var_870_strides_0 = const()[name = tensor("op_870_strides_0"), val = tensor([1, 1])]; + tensor var_870_pad_0 = const()[name = tensor("op_870_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_870_dilations_0 = const()[name = tensor("op_870_dilations_0"), val = tensor([1, 1])]; + tensor var_870_groups_0 = const()[name = tensor("op_870_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22576640))), name = tensor("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22569280))), shape = tensor([768, 768, 1, 1])]; + tensor var_870_cast_fp16 = conv(dilations = var_870_dilations_0, groups = var_870_groups_0, pad = var_870_pad_0, pad_type = var_870_pad_type_0, strides = var_870_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = tensor("op_870_cast_fp16")]; + tensor obj_15_cast_fp16 = add(x = var_864_cast_fp16, y = var_870_cast_fp16)[name = tensor("obj_15_cast_fp16")]; + tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = tensor("inputs_15_cast_fp16")]; + tensor out_15_axes_0 = const()[name = tensor("out_15_axes_0"), val = tensor([1])]; + tensor var_881_to_fp16 = const()[name = tensor("op_881_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_881_to_fp16, x = inputs_15_cast_fp16)[name = tensor("out_15_cast_fp16")]; + tensor input_27_gamma_0_to_fp16 = const()[name = tensor("input_27_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22650432)))]; + tensor input_27_beta_0_to_fp16 = const()[name = tensor("input_27_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22652032)))]; + tensor input_27_epsilon_0_to_fp16 = const()[name = tensor("input_27_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_899_pad_type_0 = const()[name = tensor("op_899_pad_type_0"), val = tensor("valid")]; + tensor var_899_strides_0 = const()[name = tensor("op_899_strides_0"), val = tensor([1, 1])]; + tensor var_899_pad_0 = const()[name = tensor("op_899_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_899_dilations_0 = const()[name = tensor("op_899_dilations_0"), val = tensor([1, 1])]; + tensor var_899_groups_0 = const()[name = tensor("op_899_groups_0"), val = tensor(1)]; + tensor layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22653632))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23833344))), name = tensor("layers_3_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23833472)))]; + tensor var_899_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_899_dilations_0, groups = var_899_groups_0, pad = var_899_pad_0, pad_type = var_899_pad_type_0, strides = var_899_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = tensor("op_899_cast_fp16")]; + tensor var_905_pad_type_0 = const()[name = tensor("op_905_pad_type_0"), val = tensor("valid")]; + tensor var_905_strides_0 = const()[name = tensor("op_905_strides_0"), val = tensor([1, 1])]; + tensor var_905_pad_0 = const()[name = tensor("op_905_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_905_dilations_0 = const()[name = tensor("op_905_dilations_0"), val = tensor([1, 1])]; + tensor var_905_groups_0 = const()[name = tensor("op_905_groups_0"), val = tensor(1)]; + tensor layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23883648))), name = tensor("layers_3_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23839680))), shape = tensor([3072, 768, 1, 1])]; + tensor var_905_cast_fp16 = conv(dilations = var_905_dilations_0, groups = var_905_groups_0, pad = var_905_pad_0, pad_type = var_905_pad_type_0, strides = var_905_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_27_cast_fp16)[name = tensor("op_905_cast_fp16")]; + tensor input_29_cast_fp16 = add(x = var_899_cast_fp16, y = var_905_cast_fp16)[name = tensor("input_29_cast_fp16")]; + tensor input_31_mode_0 = const()[name = tensor("input_31_mode_0"), val = tensor("EXACT")]; + tensor input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor("input_31_cast_fp16")]; + tensor var_916_pad_type_0 = const()[name = tensor("op_916_pad_type_0"), val = tensor("valid")]; + tensor var_916_strides_0 = const()[name = tensor("op_916_strides_0"), val = tensor([1, 1])]; + tensor var_916_pad_0 = const()[name = tensor("op_916_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_916_dilations_0 = const()[name = tensor("op_916_dilations_0"), val = tensor([1, 1])]; + tensor var_916_groups_0 = const()[name = tensor("op_916_groups_0"), val = tensor(1)]; + tensor layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24178624))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25358336))), name = tensor("layers_3_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25358464)))]; + tensor var_916_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_916_dilations_0, groups = var_916_groups_0, pad = var_916_pad_0, pad_type = var_916_pad_type_0, strides = var_916_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = tensor("op_916_cast_fp16")]; + tensor var_922_pad_type_0 = const()[name = tensor("op_922_pad_type_0"), val = tensor("valid")]; + tensor var_922_strides_0 = const()[name = tensor("op_922_strides_0"), val = tensor([1, 1])]; + tensor var_922_pad_0 = const()[name = tensor("op_922_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_922_dilations_0 = const()[name = tensor("op_922_dilations_0"), val = tensor([1, 1])]; + tensor var_922_groups_0 = const()[name = tensor("op_922_groups_0"), val = tensor(1)]; + tensor layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25407808))), name = tensor("layers_3_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25360064))), shape = tensor([768, 3072, 1, 1])]; + tensor var_922_cast_fp16 = conv(dilations = var_922_dilations_0, groups = var_922_groups_0, pad = var_922_pad_0, pad_type = var_922_pad_type_0, strides = var_922_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = tensor("op_922_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = var_916_cast_fp16, y = var_922_cast_fp16)[name = tensor("hidden_states_11_cast_fp16")]; + tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor("inputs_17_cast_fp16")]; + tensor var_928 = const()[name = tensor("op_928"), val = tensor(3)]; + tensor out_17_axes_0 = const()[name = tensor("out_17_axes_0"), val = tensor([1])]; + tensor var_950_to_fp16 = const()[name = tensor("op_950_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_950_to_fp16, x = inputs_17_cast_fp16)[name = tensor("out_17_cast_fp16")]; + tensor obj_17_gamma_0_to_fp16 = const()[name = tensor("obj_17_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25702784)))]; + tensor obj_17_beta_0_to_fp16 = const()[name = tensor("obj_17_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25704384)))]; + tensor obj_17_epsilon_0_to_fp16 = const()[name = tensor("obj_17_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor("obj_17_cast_fp16")]; + tensor var_972_pad_type_0 = const()[name = tensor("op_972_pad_type_0"), val = tensor("valid")]; + tensor var_972_strides_0 = const()[name = tensor("op_972_strides_0"), val = tensor([1, 1])]; + tensor var_972_pad_0 = const()[name = tensor("op_972_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_972_dilations_0 = const()[name = tensor("op_972_dilations_0"), val = tensor([1, 1])]; + tensor var_972_groups_0 = const()[name = tensor("op_972_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25705984))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26000960))), name = tensor("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26001088)))]; + tensor var_972_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_972_dilations_0, groups = var_972_groups_0, pad = var_972_pad_0, pad_type = var_972_pad_type_0, strides = var_972_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_978_pad_type_0 = const()[name = tensor("op_978_pad_type_0"), val = tensor("valid")]; + tensor var_978_strides_0 = const()[name = tensor("op_978_strides_0"), val = tensor([1, 1])]; + tensor var_978_pad_0 = const()[name = tensor("op_978_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_978_dilations_0 = const()[name = tensor("op_978_dilations_0"), val = tensor([1, 1])]; + tensor var_978_groups_0 = const()[name = tensor("op_978_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26012480))), name = tensor("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26002688))), shape = tensor([768, 768, 1, 1])]; + tensor var_978_cast_fp16 = conv(dilations = var_978_dilations_0, groups = var_978_groups_0, pad = var_978_pad_0, pad_type = var_978_pad_type_0, strides = var_978_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = tensor("op_978_cast_fp16")]; + tensor query_9_cast_fp16 = add(x = var_972_cast_fp16, y = var_978_cast_fp16)[name = tensor("query_9_cast_fp16")]; + tensor var_987_pad_type_0 = const()[name = tensor("op_987_pad_type_0"), val = tensor("valid")]; + tensor var_987_strides_0 = const()[name = tensor("op_987_strides_0"), val = tensor([1, 1])]; + tensor var_987_pad_0 = const()[name = tensor("op_987_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_987_dilations_0 = const()[name = tensor("op_987_dilations_0"), val = tensor([1, 1])]; + tensor var_987_groups_0 = const()[name = tensor("op_987_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26086272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26381248))), name = tensor("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_987_cast_fp16 = conv(dilations = var_987_dilations_0, groups = var_987_groups_0, pad = var_987_pad_0, pad_type = var_987_pad_type_0, strides = var_987_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = tensor("op_987_cast_fp16")]; + tensor var_993_pad_type_0 = const()[name = tensor("op_993_pad_type_0"), val = tensor("valid")]; + tensor var_993_strides_0 = const()[name = tensor("op_993_strides_0"), val = tensor([1, 1])]; + tensor var_993_pad_0 = const()[name = tensor("op_993_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_993_dilations_0 = const()[name = tensor("op_993_dilations_0"), val = tensor([1, 1])]; + tensor var_993_groups_0 = const()[name = tensor("op_993_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26392192))), name = tensor("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26381376))), shape = tensor([768, 768, 1, 1])]; + tensor var_993_cast_fp16 = conv(dilations = var_993_dilations_0, groups = var_993_groups_0, pad = var_993_pad_0, pad_type = var_993_pad_type_0, strides = var_993_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = tensor("op_993_cast_fp16")]; + tensor key_9_cast_fp16 = add(x = var_987_cast_fp16, y = var_993_cast_fp16)[name = tensor("key_9_cast_fp16")]; + tensor var_1003_pad_type_0 = const()[name = tensor("op_1003_pad_type_0"), val = tensor("valid")]; + tensor var_1003_strides_0 = const()[name = tensor("op_1003_strides_0"), val = tensor([1, 1])]; + tensor var_1003_pad_0 = const()[name = tensor("op_1003_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1003_dilations_0 = const()[name = tensor("op_1003_dilations_0"), val = tensor([1, 1])]; + tensor var_1003_groups_0 = const()[name = tensor("op_1003_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26465984))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26760960))), name = tensor("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26761088)))]; + tensor var_1003_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1003_dilations_0, groups = var_1003_groups_0, pad = var_1003_pad_0, pad_type = var_1003_pad_type_0, strides = var_1003_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = tensor("op_1003_cast_fp16")]; + tensor var_1009_pad_type_0 = const()[name = tensor("op_1009_pad_type_0"), val = tensor("valid")]; + tensor var_1009_strides_0 = const()[name = tensor("op_1009_strides_0"), val = tensor([1, 1])]; + tensor var_1009_pad_0 = const()[name = tensor("op_1009_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1009_dilations_0 = const()[name = tensor("op_1009_dilations_0"), val = tensor([1, 1])]; + tensor var_1009_groups_0 = const()[name = tensor("op_1009_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26769984))), name = tensor("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26762688))), shape = tensor([768, 768, 1, 1])]; + tensor var_1009_cast_fp16 = conv(dilations = var_1009_dilations_0, groups = var_1009_groups_0, pad = var_1009_pad_0, pad_type = var_1009_pad_type_0, strides = var_1009_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = tensor("op_1009_cast_fp16")]; + tensor value_9_cast_fp16 = add(x = var_1003_cast_fp16, y = var_1009_cast_fp16)[name = tensor("value_9_cast_fp16")]; + tensor var_1013 = const()[name = tensor("op_1013"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_9_cast_fp16 = reshape(shape = var_1013, x = query_9_cast_fp16)[name = tensor("mh_q_9_cast_fp16")]; + tensor var_1015_to_fp16 = const()[name = tensor("op_1015_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1016_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_1015_to_fp16)[name = tensor("op_1016_cast_fp16")]; + tensor var_1019 = const()[name = tensor("op_1019"), val = tensor([1, 12, 64, 1500])]; + tensor var_1020_cast_fp16 = reshape(shape = var_1019, x = key_9_cast_fp16)[name = tensor("op_1020_cast_fp16")]; + tensor mh_w_9_transpose_x_0 = const()[name = tensor("mh_w_9_transpose_x_0"), val = tensor(true)]; + tensor mh_w_9_transpose_y_0 = const()[name = tensor("mh_w_9_transpose_y_0"), val = tensor(false)]; + tensor mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_1016_cast_fp16, y = var_1020_cast_fp16)[name = tensor("mh_w_9_cast_fp16")]; + tensor var_1023_cast_fp16 = softmax(axis = var_928, x = mh_w_9_cast_fp16)[name = tensor("op_1023_cast_fp16")]; + tensor var_1024 = const()[name = tensor("op_1024"), val = tensor([1, 12, 64, 1500])]; + tensor var_1025_cast_fp16 = reshape(shape = var_1024, x = value_9_cast_fp16)[name = tensor("op_1025_cast_fp16")]; + tensor attn_9_transpose_x_0 = const()[name = tensor("attn_9_transpose_x_0"), val = tensor(false)]; + tensor attn_9_transpose_y_0 = const()[name = tensor("attn_9_transpose_y_0"), val = tensor(true)]; + tensor attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_1025_cast_fp16, y = var_1023_cast_fp16)[name = tensor("attn_9_cast_fp16")]; + tensor var_1028 = const()[name = tensor("op_1028"), val = tensor([1, 768, 1, 1500])]; + tensor input_33_cast_fp16 = reshape(shape = var_1028, x = attn_9_cast_fp16)[name = tensor("input_33_cast_fp16")]; + tensor var_1038_pad_type_0 = const()[name = tensor("op_1038_pad_type_0"), val = tensor("valid")]; + tensor var_1038_strides_0 = const()[name = tensor("op_1038_strides_0"), val = tensor([1, 1])]; + tensor var_1038_pad_0 = const()[name = tensor("op_1038_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1038_dilations_0 = const()[name = tensor("op_1038_dilations_0"), val = tensor([1, 1])]; + tensor var_1038_groups_0 = const()[name = tensor("op_1038_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26843776))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27138752))), name = tensor("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27138880)))]; + tensor var_1038_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1038_dilations_0, groups = var_1038_groups_0, pad = var_1038_pad_0, pad_type = var_1038_pad_type_0, strides = var_1038_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = tensor("op_1038_cast_fp16")]; + tensor var_1044_pad_type_0 = const()[name = tensor("op_1044_pad_type_0"), val = tensor("valid")]; + tensor var_1044_strides_0 = const()[name = tensor("op_1044_strides_0"), val = tensor([1, 1])]; + tensor var_1044_pad_0 = const()[name = tensor("op_1044_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1044_dilations_0 = const()[name = tensor("op_1044_dilations_0"), val = tensor([1, 1])]; + tensor var_1044_groups_0 = const()[name = tensor("op_1044_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27146816))), name = tensor("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27140480))), shape = tensor([768, 768, 1, 1])]; + tensor var_1044_cast_fp16 = conv(dilations = var_1044_dilations_0, groups = var_1044_groups_0, pad = var_1044_pad_0, pad_type = var_1044_pad_type_0, strides = var_1044_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = tensor("op_1044_cast_fp16")]; + tensor obj_19_cast_fp16 = add(x = var_1038_cast_fp16, y = var_1044_cast_fp16)[name = tensor("obj_19_cast_fp16")]; + tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = tensor("inputs_19_cast_fp16")]; + tensor out_19_axes_0 = const()[name = tensor("out_19_axes_0"), val = tensor([1])]; + tensor var_1055_to_fp16 = const()[name = tensor("op_1055_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_1055_to_fp16, x = inputs_19_cast_fp16)[name = tensor("out_19_cast_fp16")]; + tensor input_35_gamma_0_to_fp16 = const()[name = tensor("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27220608)))]; + tensor input_35_beta_0_to_fp16 = const()[name = tensor("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27222208)))]; + tensor input_35_epsilon_0_to_fp16 = const()[name = tensor("input_35_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor("input_35_cast_fp16")]; + tensor var_1073_pad_type_0 = const()[name = tensor("op_1073_pad_type_0"), val = tensor("valid")]; + tensor var_1073_strides_0 = const()[name = tensor("op_1073_strides_0"), val = tensor([1, 1])]; + tensor var_1073_pad_0 = const()[name = tensor("op_1073_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1073_dilations_0 = const()[name = tensor("op_1073_dilations_0"), val = tensor([1, 1])]; + tensor var_1073_groups_0 = const()[name = tensor("op_1073_groups_0"), val = tensor(1)]; + tensor layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27223808))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28403520))), name = tensor("layers_4_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28403648)))]; + tensor var_1073_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_1073_dilations_0, groups = var_1073_groups_0, pad = var_1073_pad_0, pad_type = var_1073_pad_type_0, strides = var_1073_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = tensor("op_1073_cast_fp16")]; + tensor var_1079_pad_type_0 = const()[name = tensor("op_1079_pad_type_0"), val = tensor("valid")]; + tensor var_1079_strides_0 = const()[name = tensor("op_1079_strides_0"), val = tensor([1, 1])]; + tensor var_1079_pad_0 = const()[name = tensor("op_1079_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1079_dilations_0 = const()[name = tensor("op_1079_dilations_0"), val = tensor([1, 1])]; + tensor var_1079_groups_0 = const()[name = tensor("op_1079_groups_0"), val = tensor(1)]; + tensor layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28452288))), name = tensor("layers_4_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28409856))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1079_cast_fp16 = conv(dilations = var_1079_dilations_0, groups = var_1079_groups_0, pad = var_1079_pad_0, pad_type = var_1079_pad_type_0, strides = var_1079_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = tensor("op_1079_cast_fp16")]; + tensor input_37_cast_fp16 = add(x = var_1073_cast_fp16, y = var_1079_cast_fp16)[name = tensor("input_37_cast_fp16")]; + tensor input_39_mode_0 = const()[name = tensor("input_39_mode_0"), val = tensor("EXACT")]; + tensor input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor("input_39_cast_fp16")]; + tensor var_1090_pad_type_0 = const()[name = tensor("op_1090_pad_type_0"), val = tensor("valid")]; + tensor var_1090_strides_0 = const()[name = tensor("op_1090_strides_0"), val = tensor([1, 1])]; + tensor var_1090_pad_0 = const()[name = tensor("op_1090_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1090_dilations_0 = const()[name = tensor("op_1090_dilations_0"), val = tensor([1, 1])]; + tensor var_1090_groups_0 = const()[name = tensor("op_1090_groups_0"), val = tensor(1)]; + tensor layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28747264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29926976))), name = tensor("layers_4_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29927104)))]; + tensor var_1090_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_1090_dilations_0, groups = var_1090_groups_0, pad = var_1090_pad_0, pad_type = var_1090_pad_type_0, strides = var_1090_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1096_pad_type_0 = const()[name = tensor("op_1096_pad_type_0"), val = tensor("valid")]; + tensor var_1096_strides_0 = const()[name = tensor("op_1096_strides_0"), val = tensor([1, 1])]; + tensor var_1096_pad_0 = const()[name = tensor("op_1096_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1096_dilations_0 = const()[name = tensor("op_1096_dilations_0"), val = tensor([1, 1])]; + tensor var_1096_groups_0 = const()[name = tensor("op_1096_groups_0"), val = tensor(1)]; + tensor layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29977152))), name = tensor("layers_4_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29928704))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1096_cast_fp16 = conv(dilations = var_1096_dilations_0, groups = var_1096_groups_0, pad = var_1096_pad_0, pad_type = var_1096_pad_type_0, strides = var_1096_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = tensor("op_1096_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = var_1090_cast_fp16, y = var_1096_cast_fp16)[name = tensor("hidden_states_13_cast_fp16")]; + tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor("inputs_21_cast_fp16")]; + tensor var_1102 = const()[name = tensor("op_1102"), val = tensor(3)]; + tensor out_21_axes_0 = const()[name = tensor("out_21_axes_0"), val = tensor([1])]; + tensor var_1124_to_fp16 = const()[name = tensor("op_1124_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1124_to_fp16, x = inputs_21_cast_fp16)[name = tensor("out_21_cast_fp16")]; + tensor obj_21_gamma_0_to_fp16 = const()[name = tensor("obj_21_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30272128)))]; + tensor obj_21_beta_0_to_fp16 = const()[name = tensor("obj_21_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30273728)))]; + tensor obj_21_epsilon_0_to_fp16 = const()[name = tensor("obj_21_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor("obj_21_cast_fp16")]; + tensor var_1146_pad_type_0 = const()[name = tensor("op_1146_pad_type_0"), val = tensor("valid")]; + tensor var_1146_strides_0 = const()[name = tensor("op_1146_strides_0"), val = tensor([1, 1])]; + tensor var_1146_pad_0 = const()[name = tensor("op_1146_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1146_dilations_0 = const()[name = tensor("op_1146_dilations_0"), val = tensor([1, 1])]; + tensor var_1146_groups_0 = const()[name = tensor("op_1146_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30275328))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30570304))), name = tensor("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30570432)))]; + tensor var_1146_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1146_dilations_0, groups = var_1146_groups_0, pad = var_1146_pad_0, pad_type = var_1146_pad_type_0, strides = var_1146_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor var_1152_pad_type_0 = const()[name = tensor("op_1152_pad_type_0"), val = tensor("valid")]; + tensor var_1152_strides_0 = const()[name = tensor("op_1152_strides_0"), val = tensor([1, 1])]; + tensor var_1152_pad_0 = const()[name = tensor("op_1152_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1152_dilations_0 = const()[name = tensor("op_1152_dilations_0"), val = tensor([1, 1])]; + tensor var_1152_groups_0 = const()[name = tensor("op_1152_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30579968))), name = tensor("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30572032))), shape = tensor([768, 768, 1, 1])]; + tensor var_1152_cast_fp16 = conv(dilations = var_1152_dilations_0, groups = var_1152_groups_0, pad = var_1152_pad_0, pad_type = var_1152_pad_type_0, strides = var_1152_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = tensor("op_1152_cast_fp16")]; + tensor query_11_cast_fp16 = add(x = var_1146_cast_fp16, y = var_1152_cast_fp16)[name = tensor("query_11_cast_fp16")]; + tensor var_1161_pad_type_0 = const()[name = tensor("op_1161_pad_type_0"), val = tensor("valid")]; + tensor var_1161_strides_0 = const()[name = tensor("op_1161_strides_0"), val = tensor([1, 1])]; + tensor var_1161_pad_0 = const()[name = tensor("op_1161_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1161_dilations_0 = const()[name = tensor("op_1161_dilations_0"), val = tensor([1, 1])]; + tensor var_1161_groups_0 = const()[name = tensor("op_1161_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30653760))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30948736))), name = tensor("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1161_cast_fp16 = conv(dilations = var_1161_dilations_0, groups = var_1161_groups_0, pad = var_1161_pad_0, pad_type = var_1161_pad_type_0, strides = var_1161_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = tensor("op_1161_cast_fp16")]; + tensor var_1167_pad_type_0 = const()[name = tensor("op_1167_pad_type_0"), val = tensor("valid")]; + tensor var_1167_strides_0 = const()[name = tensor("op_1167_strides_0"), val = tensor([1, 1])]; + tensor var_1167_pad_0 = const()[name = tensor("op_1167_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1167_dilations_0 = const()[name = tensor("op_1167_dilations_0"), val = tensor([1, 1])]; + tensor var_1167_groups_0 = const()[name = tensor("op_1167_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30957056))), name = tensor("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30948864))), shape = tensor([768, 768, 1, 1])]; + tensor var_1167_cast_fp16 = conv(dilations = var_1167_dilations_0, groups = var_1167_groups_0, pad = var_1167_pad_0, pad_type = var_1167_pad_type_0, strides = var_1167_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = tensor("op_1167_cast_fp16")]; + tensor key_11_cast_fp16 = add(x = var_1161_cast_fp16, y = var_1167_cast_fp16)[name = tensor("key_11_cast_fp16")]; + tensor var_1177_pad_type_0 = const()[name = tensor("op_1177_pad_type_0"), val = tensor("valid")]; + tensor var_1177_strides_0 = const()[name = tensor("op_1177_strides_0"), val = tensor([1, 1])]; + tensor var_1177_pad_0 = const()[name = tensor("op_1177_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1177_dilations_0 = const()[name = tensor("op_1177_dilations_0"), val = tensor([1, 1])]; + tensor var_1177_groups_0 = const()[name = tensor("op_1177_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31030848))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31325824))), name = tensor("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31325952)))]; + tensor var_1177_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1177_dilations_0, groups = var_1177_groups_0, pad = var_1177_pad_0, pad_type = var_1177_pad_type_0, strides = var_1177_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = tensor("op_1177_cast_fp16")]; + tensor var_1183_pad_type_0 = const()[name = tensor("op_1183_pad_type_0"), val = tensor("valid")]; + tensor var_1183_strides_0 = const()[name = tensor("op_1183_strides_0"), val = tensor([1, 1])]; + tensor var_1183_pad_0 = const()[name = tensor("op_1183_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1183_dilations_0 = const()[name = tensor("op_1183_dilations_0"), val = tensor([1, 1])]; + tensor var_1183_groups_0 = const()[name = tensor("op_1183_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31334976))), name = tensor("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31327552))), shape = tensor([768, 768, 1, 1])]; + tensor var_1183_cast_fp16 = conv(dilations = var_1183_dilations_0, groups = var_1183_groups_0, pad = var_1183_pad_0, pad_type = var_1183_pad_type_0, strides = var_1183_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = tensor("op_1183_cast_fp16")]; + tensor value_11_cast_fp16 = add(x = var_1177_cast_fp16, y = var_1183_cast_fp16)[name = tensor("value_11_cast_fp16")]; + tensor var_1187 = const()[name = tensor("op_1187"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_11_cast_fp16 = reshape(shape = var_1187, x = query_11_cast_fp16)[name = tensor("mh_q_11_cast_fp16")]; + tensor var_1189_to_fp16 = const()[name = tensor("op_1189_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1190_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_1189_to_fp16)[name = tensor("op_1190_cast_fp16")]; + tensor var_1193 = const()[name = tensor("op_1193"), val = tensor([1, 12, 64, 1500])]; + tensor var_1194_cast_fp16 = reshape(shape = var_1193, x = key_11_cast_fp16)[name = tensor("op_1194_cast_fp16")]; + tensor mh_w_11_transpose_x_0 = const()[name = tensor("mh_w_11_transpose_x_0"), val = tensor(true)]; + tensor mh_w_11_transpose_y_0 = const()[name = tensor("mh_w_11_transpose_y_0"), val = tensor(false)]; + tensor mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_1190_cast_fp16, y = var_1194_cast_fp16)[name = tensor("mh_w_11_cast_fp16")]; + tensor var_1197_cast_fp16 = softmax(axis = var_1102, x = mh_w_11_cast_fp16)[name = tensor("op_1197_cast_fp16")]; + tensor var_1198 = const()[name = tensor("op_1198"), val = tensor([1, 12, 64, 1500])]; + tensor var_1199_cast_fp16 = reshape(shape = var_1198, x = value_11_cast_fp16)[name = tensor("op_1199_cast_fp16")]; + tensor attn_11_transpose_x_0 = const()[name = tensor("attn_11_transpose_x_0"), val = tensor(false)]; + tensor attn_11_transpose_y_0 = const()[name = tensor("attn_11_transpose_y_0"), val = tensor(true)]; + tensor attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_1199_cast_fp16, y = var_1197_cast_fp16)[name = tensor("attn_11_cast_fp16")]; + tensor var_1202 = const()[name = tensor("op_1202"), val = tensor([1, 768, 1, 1500])]; + tensor input_41_cast_fp16 = reshape(shape = var_1202, x = attn_11_cast_fp16)[name = tensor("input_41_cast_fp16")]; + tensor var_1212_pad_type_0 = const()[name = tensor("op_1212_pad_type_0"), val = tensor("valid")]; + tensor var_1212_strides_0 = const()[name = tensor("op_1212_strides_0"), val = tensor([1, 1])]; + tensor var_1212_pad_0 = const()[name = tensor("op_1212_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1212_dilations_0 = const()[name = tensor("op_1212_dilations_0"), val = tensor([1, 1])]; + tensor var_1212_groups_0 = const()[name = tensor("op_1212_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31408768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31703744))), name = tensor("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31703872)))]; + tensor var_1212_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1212_dilations_0, groups = var_1212_groups_0, pad = var_1212_pad_0, pad_type = var_1212_pad_type_0, strides = var_1212_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = tensor("op_1212_cast_fp16")]; + tensor var_1218_pad_type_0 = const()[name = tensor("op_1218_pad_type_0"), val = tensor("valid")]; + tensor var_1218_strides_0 = const()[name = tensor("op_1218_strides_0"), val = tensor([1, 1])]; + tensor var_1218_pad_0 = const()[name = tensor("op_1218_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1218_dilations_0 = const()[name = tensor("op_1218_dilations_0"), val = tensor([1, 1])]; + tensor var_1218_groups_0 = const()[name = tensor("op_1218_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31713792))), name = tensor("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31705472))), shape = tensor([768, 768, 1, 1])]; + tensor var_1218_cast_fp16 = conv(dilations = var_1218_dilations_0, groups = var_1218_groups_0, pad = var_1218_pad_0, pad_type = var_1218_pad_type_0, strides = var_1218_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor obj_23_cast_fp16 = add(x = var_1212_cast_fp16, y = var_1218_cast_fp16)[name = tensor("obj_23_cast_fp16")]; + tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_23_cast_fp16)[name = tensor("inputs_23_cast_fp16")]; + tensor out_23_axes_0 = const()[name = tensor("out_23_axes_0"), val = tensor([1])]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1229_to_fp16, x = inputs_23_cast_fp16)[name = tensor("out_23_cast_fp16")]; + tensor input_43_gamma_0_to_fp16 = const()[name = tensor("input_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31787584)))]; + tensor input_43_beta_0_to_fp16 = const()[name = tensor("input_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31789184)))]; + tensor input_43_epsilon_0_to_fp16 = const()[name = tensor("input_43_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor("input_43_cast_fp16")]; + tensor var_1247_pad_type_0 = const()[name = tensor("op_1247_pad_type_0"), val = tensor("valid")]; + tensor var_1247_strides_0 = const()[name = tensor("op_1247_strides_0"), val = tensor([1, 1])]; + tensor var_1247_pad_0 = const()[name = tensor("op_1247_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1247_dilations_0 = const()[name = tensor("op_1247_dilations_0"), val = tensor([1, 1])]; + tensor var_1247_groups_0 = const()[name = tensor("op_1247_groups_0"), val = tensor(1)]; + tensor layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31790784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32970496))), name = tensor("layers_5_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32970624)))]; + tensor var_1247_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_1247_dilations_0, groups = var_1247_groups_0, pad = var_1247_pad_0, pad_type = var_1247_pad_type_0, strides = var_1247_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = tensor("op_1247_cast_fp16")]; + tensor var_1253_pad_type_0 = const()[name = tensor("op_1253_pad_type_0"), val = tensor("valid")]; + tensor var_1253_strides_0 = const()[name = tensor("op_1253_strides_0"), val = tensor([1, 1])]; + tensor var_1253_pad_0 = const()[name = tensor("op_1253_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1253_dilations_0 = const()[name = tensor("op_1253_dilations_0"), val = tensor([1, 1])]; + tensor var_1253_groups_0 = const()[name = tensor("op_1253_groups_0"), val = tensor(1)]; + tensor layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33018432))), name = tensor("layers_5_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32976832))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1253_cast_fp16 = conv(dilations = var_1253_dilations_0, groups = var_1253_groups_0, pad = var_1253_pad_0, pad_type = var_1253_pad_type_0, strides = var_1253_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = tensor("op_1253_cast_fp16")]; + tensor input_45_cast_fp16 = add(x = var_1247_cast_fp16, y = var_1253_cast_fp16)[name = tensor("input_45_cast_fp16")]; + tensor input_47_mode_0 = const()[name = tensor("input_47_mode_0"), val = tensor("EXACT")]; + tensor input_47_cast_fp16 = gelu(mode = input_47_mode_0, x = input_45_cast_fp16)[name = tensor("input_47_cast_fp16")]; + tensor var_1264_pad_type_0 = const()[name = tensor("op_1264_pad_type_0"), val = tensor("valid")]; + tensor var_1264_strides_0 = const()[name = tensor("op_1264_strides_0"), val = tensor([1, 1])]; + tensor var_1264_pad_0 = const()[name = tensor("op_1264_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1264_dilations_0 = const()[name = tensor("op_1264_dilations_0"), val = tensor([1, 1])]; + tensor var_1264_groups_0 = const()[name = tensor("op_1264_groups_0"), val = tensor(1)]; + tensor layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33313408))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34493120))), name = tensor("layers_5_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34493248)))]; + tensor var_1264_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_1264_dilations_0, groups = var_1264_groups_0, pad = var_1264_pad_0, pad_type = var_1264_pad_type_0, strides = var_1264_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_47_cast_fp16)[name = tensor("op_1264_cast_fp16")]; + tensor var_1270_pad_type_0 = const()[name = tensor("op_1270_pad_type_0"), val = tensor("valid")]; + tensor var_1270_strides_0 = const()[name = tensor("op_1270_strides_0"), val = tensor([1, 1])]; + tensor var_1270_pad_0 = const()[name = tensor("op_1270_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1270_dilations_0 = const()[name = tensor("op_1270_dilations_0"), val = tensor([1, 1])]; + tensor var_1270_groups_0 = const()[name = tensor("op_1270_groups_0"), val = tensor(1)]; + tensor layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34539520))), name = tensor("layers_5_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34494848))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1270_cast_fp16 = conv(dilations = var_1270_dilations_0, groups = var_1270_groups_0, pad = var_1270_pad_0, pad_type = var_1270_pad_type_0, strides = var_1270_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_47_cast_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = add(x = var_1264_cast_fp16, y = var_1270_cast_fp16)[name = tensor("hidden_states_15_cast_fp16")]; + tensor inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_15_cast_fp16)[name = tensor("inputs_25_cast_fp16")]; + tensor var_1276 = const()[name = tensor("op_1276"), val = tensor(3)]; + tensor out_25_axes_0 = const()[name = tensor("out_25_axes_0"), val = tensor([1])]; + tensor var_1298_to_fp16 = const()[name = tensor("op_1298_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1298_to_fp16, x = inputs_25_cast_fp16)[name = tensor("out_25_cast_fp16")]; + tensor obj_25_gamma_0_to_fp16 = const()[name = tensor("obj_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34834496)))]; + tensor obj_25_beta_0_to_fp16 = const()[name = tensor("obj_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34836096)))]; + tensor obj_25_epsilon_0_to_fp16 = const()[name = tensor("obj_25_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor("obj_25_cast_fp16")]; + tensor var_1320_pad_type_0 = const()[name = tensor("op_1320_pad_type_0"), val = tensor("valid")]; + tensor var_1320_strides_0 = const()[name = tensor("op_1320_strides_0"), val = tensor([1, 1])]; + tensor var_1320_pad_0 = const()[name = tensor("op_1320_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1320_dilations_0 = const()[name = tensor("op_1320_dilations_0"), val = tensor([1, 1])]; + tensor var_1320_groups_0 = const()[name = tensor("op_1320_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34837696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35132672))), name = tensor("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35132800)))]; + tensor var_1320_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1320_dilations_0, groups = var_1320_groups_0, pad = var_1320_pad_0, pad_type = var_1320_pad_type_0, strides = var_1320_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1326_pad_type_0 = const()[name = tensor("op_1326_pad_type_0"), val = tensor("valid")]; + tensor var_1326_strides_0 = const()[name = tensor("op_1326_strides_0"), val = tensor([1, 1])]; + tensor var_1326_pad_0 = const()[name = tensor("op_1326_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1326_dilations_0 = const()[name = tensor("op_1326_dilations_0"), val = tensor([1, 1])]; + tensor var_1326_groups_0 = const()[name = tensor("op_1326_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35142208))), name = tensor("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35134400))), shape = tensor([768, 768, 1, 1])]; + tensor var_1326_cast_fp16 = conv(dilations = var_1326_dilations_0, groups = var_1326_groups_0, pad = var_1326_pad_0, pad_type = var_1326_pad_type_0, strides = var_1326_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = tensor("op_1326_cast_fp16")]; + tensor query_13_cast_fp16 = add(x = var_1320_cast_fp16, y = var_1326_cast_fp16)[name = tensor("query_13_cast_fp16")]; + tensor var_1335_pad_type_0 = const()[name = tensor("op_1335_pad_type_0"), val = tensor("valid")]; + tensor var_1335_strides_0 = const()[name = tensor("op_1335_strides_0"), val = tensor([1, 1])]; + tensor var_1335_pad_0 = const()[name = tensor("op_1335_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1335_dilations_0 = const()[name = tensor("op_1335_dilations_0"), val = tensor([1, 1])]; + tensor var_1335_groups_0 = const()[name = tensor("op_1335_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35216000))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35510976))), name = tensor("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1335_cast_fp16 = conv(dilations = var_1335_dilations_0, groups = var_1335_groups_0, pad = var_1335_pad_0, pad_type = var_1335_pad_type_0, strides = var_1335_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1341_pad_type_0 = const()[name = tensor("op_1341_pad_type_0"), val = tensor("valid")]; + tensor var_1341_strides_0 = const()[name = tensor("op_1341_strides_0"), val = tensor([1, 1])]; + tensor var_1341_pad_0 = const()[name = tensor("op_1341_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1341_dilations_0 = const()[name = tensor("op_1341_dilations_0"), val = tensor([1, 1])]; + tensor var_1341_groups_0 = const()[name = tensor("op_1341_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35518848))), name = tensor("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35511104))), shape = tensor([768, 768, 1, 1])]; + tensor var_1341_cast_fp16 = conv(dilations = var_1341_dilations_0, groups = var_1341_groups_0, pad = var_1341_pad_0, pad_type = var_1341_pad_type_0, strides = var_1341_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor key_13_cast_fp16 = add(x = var_1335_cast_fp16, y = var_1341_cast_fp16)[name = tensor("key_13_cast_fp16")]; + tensor var_1351_pad_type_0 = const()[name = tensor("op_1351_pad_type_0"), val = tensor("valid")]; + tensor var_1351_strides_0 = const()[name = tensor("op_1351_strides_0"), val = tensor([1, 1])]; + tensor var_1351_pad_0 = const()[name = tensor("op_1351_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1351_dilations_0 = const()[name = tensor("op_1351_dilations_0"), val = tensor([1, 1])]; + tensor var_1351_groups_0 = const()[name = tensor("op_1351_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35592640))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35887616))), name = tensor("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35887744)))]; + tensor var_1351_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1351_dilations_0, groups = var_1351_groups_0, pad = var_1351_pad_0, pad_type = var_1351_pad_type_0, strides = var_1351_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = tensor("op_1351_cast_fp16")]; + tensor var_1357_pad_type_0 = const()[name = tensor("op_1357_pad_type_0"), val = tensor("valid")]; + tensor var_1357_strides_0 = const()[name = tensor("op_1357_strides_0"), val = tensor([1, 1])]; + tensor var_1357_pad_0 = const()[name = tensor("op_1357_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1357_dilations_0 = const()[name = tensor("op_1357_dilations_0"), val = tensor([1, 1])]; + tensor var_1357_groups_0 = const()[name = tensor("op_1357_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35895296))), name = tensor("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35889344))), shape = tensor([768, 768, 1, 1])]; + tensor var_1357_cast_fp16 = conv(dilations = var_1357_dilations_0, groups = var_1357_groups_0, pad = var_1357_pad_0, pad_type = var_1357_pad_type_0, strides = var_1357_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = tensor("op_1357_cast_fp16")]; + tensor value_13_cast_fp16 = add(x = var_1351_cast_fp16, y = var_1357_cast_fp16)[name = tensor("value_13_cast_fp16")]; + tensor var_1361 = const()[name = tensor("op_1361"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_13_cast_fp16 = reshape(shape = var_1361, x = query_13_cast_fp16)[name = tensor("mh_q_13_cast_fp16")]; + tensor var_1363_to_fp16 = const()[name = tensor("op_1363_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1364_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1363_to_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1367 = const()[name = tensor("op_1367"), val = tensor([1, 12, 64, 1500])]; + tensor var_1368_cast_fp16 = reshape(shape = var_1367, x = key_13_cast_fp16)[name = tensor("op_1368_cast_fp16")]; + tensor mh_w_13_transpose_x_0 = const()[name = tensor("mh_w_13_transpose_x_0"), val = tensor(true)]; + tensor mh_w_13_transpose_y_0 = const()[name = tensor("mh_w_13_transpose_y_0"), val = tensor(false)]; + tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_1364_cast_fp16, y = var_1368_cast_fp16)[name = tensor("mh_w_13_cast_fp16")]; + tensor var_1371_cast_fp16 = softmax(axis = var_1276, x = mh_w_13_cast_fp16)[name = tensor("op_1371_cast_fp16")]; + tensor var_1372 = const()[name = tensor("op_1372"), val = tensor([1, 12, 64, 1500])]; + tensor var_1373_cast_fp16 = reshape(shape = var_1372, x = value_13_cast_fp16)[name = tensor("op_1373_cast_fp16")]; + tensor attn_13_transpose_x_0 = const()[name = tensor("attn_13_transpose_x_0"), val = tensor(false)]; + tensor attn_13_transpose_y_0 = const()[name = tensor("attn_13_transpose_y_0"), val = tensor(true)]; + tensor attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1373_cast_fp16, y = var_1371_cast_fp16)[name = tensor("attn_13_cast_fp16")]; + tensor var_1376 = const()[name = tensor("op_1376"), val = tensor([1, 768, 1, 1500])]; + tensor input_49_cast_fp16 = reshape(shape = var_1376, x = attn_13_cast_fp16)[name = tensor("input_49_cast_fp16")]; + tensor var_1386_pad_type_0 = const()[name = tensor("op_1386_pad_type_0"), val = tensor("valid")]; + tensor var_1386_strides_0 = const()[name = tensor("op_1386_strides_0"), val = tensor([1, 1])]; + tensor var_1386_pad_0 = const()[name = tensor("op_1386_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1386_dilations_0 = const()[name = tensor("op_1386_dilations_0"), val = tensor([1, 1])]; + tensor var_1386_groups_0 = const()[name = tensor("op_1386_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35969088))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36264064))), name = tensor("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36264192)))]; + tensor var_1386_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1386_dilations_0, groups = var_1386_groups_0, pad = var_1386_pad_0, pad_type = var_1386_pad_type_0, strides = var_1386_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor var_1392_pad_type_0 = const()[name = tensor("op_1392_pad_type_0"), val = tensor("valid")]; + tensor var_1392_strides_0 = const()[name = tensor("op_1392_strides_0"), val = tensor([1, 1])]; + tensor var_1392_pad_0 = const()[name = tensor("op_1392_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1392_dilations_0 = const()[name = tensor("op_1392_dilations_0"), val = tensor([1, 1])]; + tensor var_1392_groups_0 = const()[name = tensor("op_1392_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36270720))), name = tensor("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36265792))), shape = tensor([768, 768, 1, 1])]; + tensor var_1392_cast_fp16 = conv(dilations = var_1392_dilations_0, groups = var_1392_groups_0, pad = var_1392_pad_0, pad_type = var_1392_pad_type_0, strides = var_1392_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = tensor("op_1392_cast_fp16")]; + tensor obj_27_cast_fp16 = add(x = var_1386_cast_fp16, y = var_1392_cast_fp16)[name = tensor("obj_27_cast_fp16")]; + tensor inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_27_cast_fp16)[name = tensor("inputs_27_cast_fp16")]; + tensor out_27_axes_0 = const()[name = tensor("out_27_axes_0"), val = tensor([1])]; + tensor var_1403_to_fp16 = const()[name = tensor("op_1403_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1403_to_fp16, x = inputs_27_cast_fp16)[name = tensor("out_27_cast_fp16")]; + tensor input_51_gamma_0_to_fp16 = const()[name = tensor("input_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36344512)))]; + tensor input_51_beta_0_to_fp16 = const()[name = tensor("input_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36346112)))]; + tensor input_51_epsilon_0_to_fp16 = const()[name = tensor("input_51_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_51_cast_fp16 = batch_norm(beta = input_51_beta_0_to_fp16, epsilon = input_51_epsilon_0_to_fp16, gamma = input_51_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor("input_51_cast_fp16")]; + tensor var_1421_pad_type_0 = const()[name = tensor("op_1421_pad_type_0"), val = tensor("valid")]; + tensor var_1421_strides_0 = const()[name = tensor("op_1421_strides_0"), val = tensor([1, 1])]; + tensor var_1421_pad_0 = const()[name = tensor("op_1421_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1421_dilations_0 = const()[name = tensor("op_1421_dilations_0"), val = tensor([1, 1])]; + tensor var_1421_groups_0 = const()[name = tensor("op_1421_groups_0"), val = tensor(1)]; + tensor layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36347712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37527424))), name = tensor("layers_6_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37527552)))]; + tensor var_1421_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_1421_dilations_0, groups = var_1421_groups_0, pad = var_1421_pad_0, pad_type = var_1421_pad_type_0, strides = var_1421_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = tensor("op_1421_cast_fp16")]; + tensor var_1427_pad_type_0 = const()[name = tensor("op_1427_pad_type_0"), val = tensor("valid")]; + tensor var_1427_strides_0 = const()[name = tensor("op_1427_strides_0"), val = tensor([1, 1])]; + tensor var_1427_pad_0 = const()[name = tensor("op_1427_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1427_dilations_0 = const()[name = tensor("op_1427_dilations_0"), val = tensor([1, 1])]; + tensor var_1427_groups_0 = const()[name = tensor("op_1427_groups_0"), val = tensor(1)]; + tensor layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37566848))), name = tensor("layers_6_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37533760))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1427_cast_fp16 = conv(dilations = var_1427_dilations_0, groups = var_1427_groups_0, pad = var_1427_pad_0, pad_type = var_1427_pad_type_0, strides = var_1427_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = tensor("op_1427_cast_fp16")]; + tensor input_53_cast_fp16 = add(x = var_1421_cast_fp16, y = var_1427_cast_fp16)[name = tensor("input_53_cast_fp16")]; + tensor input_55_mode_0 = const()[name = tensor("input_55_mode_0"), val = tensor("EXACT")]; + tensor input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = input_53_cast_fp16)[name = tensor("input_55_cast_fp16")]; + tensor var_1438_pad_type_0 = const()[name = tensor("op_1438_pad_type_0"), val = tensor("valid")]; + tensor var_1438_strides_0 = const()[name = tensor("op_1438_strides_0"), val = tensor([1, 1])]; + tensor var_1438_pad_0 = const()[name = tensor("op_1438_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1438_dilations_0 = const()[name = tensor("op_1438_dilations_0"), val = tensor([1, 1])]; + tensor var_1438_groups_0 = const()[name = tensor("op_1438_groups_0"), val = tensor(1)]; + tensor layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37861824))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39041536))), name = tensor("layers_6_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39041664)))]; + tensor var_1438_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_1438_dilations_0, groups = var_1438_groups_0, pad = var_1438_pad_0, pad_type = var_1438_pad_type_0, strides = var_1438_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = tensor("op_1438_cast_fp16")]; + tensor var_1444_pad_type_0 = const()[name = tensor("op_1444_pad_type_0"), val = tensor("valid")]; + tensor var_1444_strides_0 = const()[name = tensor("op_1444_strides_0"), val = tensor([1, 1])]; + tensor var_1444_pad_0 = const()[name = tensor("op_1444_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1444_dilations_0 = const()[name = tensor("op_1444_dilations_0"), val = tensor([1, 1])]; + tensor var_1444_groups_0 = const()[name = tensor("op_1444_groups_0"), val = tensor(1)]; + tensor layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39079168))), name = tensor("layers_6_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39043264))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1444_cast_fp16 = conv(dilations = var_1444_dilations_0, groups = var_1444_groups_0, pad = var_1444_pad_0, pad_type = var_1444_pad_type_0, strides = var_1444_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = tensor("op_1444_cast_fp16")]; + tensor hidden_states_17_cast_fp16 = add(x = var_1438_cast_fp16, y = var_1444_cast_fp16)[name = tensor("hidden_states_17_cast_fp16")]; + tensor inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = hidden_states_17_cast_fp16)[name = tensor("inputs_29_cast_fp16")]; + tensor var_1450 = const()[name = tensor("op_1450"), val = tensor(3)]; + tensor out_29_axes_0 = const()[name = tensor("out_29_axes_0"), val = tensor([1])]; + tensor var_1472_to_fp16 = const()[name = tensor("op_1472_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1472_to_fp16, x = inputs_29_cast_fp16)[name = tensor("out_29_cast_fp16")]; + tensor obj_29_gamma_0_to_fp16 = const()[name = tensor("obj_29_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39374144)))]; + tensor obj_29_beta_0_to_fp16 = const()[name = tensor("obj_29_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39375744)))]; + tensor obj_29_epsilon_0_to_fp16 = const()[name = tensor("obj_29_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor("obj_29_cast_fp16")]; + tensor var_1494_pad_type_0 = const()[name = tensor("op_1494_pad_type_0"), val = tensor("valid")]; + tensor var_1494_strides_0 = const()[name = tensor("op_1494_strides_0"), val = tensor([1, 1])]; + tensor var_1494_pad_0 = const()[name = tensor("op_1494_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1494_dilations_0 = const()[name = tensor("op_1494_dilations_0"), val = tensor([1, 1])]; + tensor var_1494_groups_0 = const()[name = tensor("op_1494_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39377344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39672320))), name = tensor("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39672448)))]; + tensor var_1494_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1494_dilations_0, groups = var_1494_groups_0, pad = var_1494_pad_0, pad_type = var_1494_pad_type_0, strides = var_1494_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_1494_cast_fp16")]; + tensor var_1500_pad_type_0 = const()[name = tensor("op_1500_pad_type_0"), val = tensor("valid")]; + tensor var_1500_strides_0 = const()[name = tensor("op_1500_strides_0"), val = tensor([1, 1])]; + tensor var_1500_pad_0 = const()[name = tensor("op_1500_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1500_dilations_0 = const()[name = tensor("op_1500_dilations_0"), val = tensor([1, 1])]; + tensor var_1500_groups_0 = const()[name = tensor("op_1500_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39680704))), name = tensor("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39674048))), shape = tensor([768, 768, 1, 1])]; + tensor var_1500_cast_fp16 = conv(dilations = var_1500_dilations_0, groups = var_1500_groups_0, pad = var_1500_pad_0, pad_type = var_1500_pad_type_0, strides = var_1500_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_1500_cast_fp16")]; + tensor query_15_cast_fp16 = add(x = var_1494_cast_fp16, y = var_1500_cast_fp16)[name = tensor("query_15_cast_fp16")]; + tensor var_1509_pad_type_0 = const()[name = tensor("op_1509_pad_type_0"), val = tensor("valid")]; + tensor var_1509_strides_0 = const()[name = tensor("op_1509_strides_0"), val = tensor([1, 1])]; + tensor var_1509_pad_0 = const()[name = tensor("op_1509_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1509_dilations_0 = const()[name = tensor("op_1509_dilations_0"), val = tensor([1, 1])]; + tensor var_1509_groups_0 = const()[name = tensor("op_1509_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39754496))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40049472))), name = tensor("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1509_cast_fp16 = conv(dilations = var_1509_dilations_0, groups = var_1509_groups_0, pad = var_1509_pad_0, pad_type = var_1509_pad_type_0, strides = var_1509_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_1509_cast_fp16")]; + tensor var_1515_pad_type_0 = const()[name = tensor("op_1515_pad_type_0"), val = tensor("valid")]; + tensor var_1515_strides_0 = const()[name = tensor("op_1515_strides_0"), val = tensor([1, 1])]; + tensor var_1515_pad_0 = const()[name = tensor("op_1515_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1515_dilations_0 = const()[name = tensor("op_1515_dilations_0"), val = tensor([1, 1])]; + tensor var_1515_groups_0 = const()[name = tensor("op_1515_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40056832))), name = tensor("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40049600))), shape = tensor([768, 768, 1, 1])]; + tensor var_1515_cast_fp16 = conv(dilations = var_1515_dilations_0, groups = var_1515_groups_0, pad = var_1515_pad_0, pad_type = var_1515_pad_type_0, strides = var_1515_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_1515_cast_fp16")]; + tensor key_15_cast_fp16 = add(x = var_1509_cast_fp16, y = var_1515_cast_fp16)[name = tensor("key_15_cast_fp16")]; + tensor var_1525_pad_type_0 = const()[name = tensor("op_1525_pad_type_0"), val = tensor("valid")]; + tensor var_1525_strides_0 = const()[name = tensor("op_1525_strides_0"), val = tensor([1, 1])]; + tensor var_1525_pad_0 = const()[name = tensor("op_1525_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1525_dilations_0 = const()[name = tensor("op_1525_dilations_0"), val = tensor([1, 1])]; + tensor var_1525_groups_0 = const()[name = tensor("op_1525_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40130624))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40425600))), name = tensor("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40425728)))]; + tensor var_1525_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1525_dilations_0, groups = var_1525_groups_0, pad = var_1525_pad_0, pad_type = var_1525_pad_type_0, strides = var_1525_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_1525_cast_fp16")]; + tensor var_1531_pad_type_0 = const()[name = tensor("op_1531_pad_type_0"), val = tensor("valid")]; + tensor var_1531_strides_0 = const()[name = tensor("op_1531_strides_0"), val = tensor([1, 1])]; + tensor var_1531_pad_0 = const()[name = tensor("op_1531_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1531_dilations_0 = const()[name = tensor("op_1531_dilations_0"), val = tensor([1, 1])]; + tensor var_1531_groups_0 = const()[name = tensor("op_1531_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40433728))), name = tensor("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40427328))), shape = tensor([768, 768, 1, 1])]; + tensor var_1531_cast_fp16 = conv(dilations = var_1531_dilations_0, groups = var_1531_groups_0, pad = var_1531_pad_0, pad_type = var_1531_pad_type_0, strides = var_1531_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_1531_cast_fp16")]; + tensor value_15_cast_fp16 = add(x = var_1525_cast_fp16, y = var_1531_cast_fp16)[name = tensor("value_15_cast_fp16")]; + tensor var_1535 = const()[name = tensor("op_1535"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_15_cast_fp16 = reshape(shape = var_1535, x = query_15_cast_fp16)[name = tensor("mh_q_15_cast_fp16")]; + tensor var_1537_to_fp16 = const()[name = tensor("op_1537_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1538_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1537_to_fp16)[name = tensor("op_1538_cast_fp16")]; + tensor var_1541 = const()[name = tensor("op_1541"), val = tensor([1, 12, 64, 1500])]; + tensor var_1542_cast_fp16 = reshape(shape = var_1541, x = key_15_cast_fp16)[name = tensor("op_1542_cast_fp16")]; + tensor mh_w_15_transpose_x_0 = const()[name = tensor("mh_w_15_transpose_x_0"), val = tensor(true)]; + tensor mh_w_15_transpose_y_0 = const()[name = tensor("mh_w_15_transpose_y_0"), val = tensor(false)]; + tensor mh_w_15_cast_fp16 = matmul(transpose_x = mh_w_15_transpose_x_0, transpose_y = mh_w_15_transpose_y_0, x = var_1538_cast_fp16, y = var_1542_cast_fp16)[name = tensor("mh_w_15_cast_fp16")]; + tensor var_1545_cast_fp16 = softmax(axis = var_1450, x = mh_w_15_cast_fp16)[name = tensor("op_1545_cast_fp16")]; + tensor var_1546 = const()[name = tensor("op_1546"), val = tensor([1, 12, 64, 1500])]; + tensor var_1547_cast_fp16 = reshape(shape = var_1546, x = value_15_cast_fp16)[name = tensor("op_1547_cast_fp16")]; + tensor attn_15_transpose_x_0 = const()[name = tensor("attn_15_transpose_x_0"), val = tensor(false)]; + tensor attn_15_transpose_y_0 = const()[name = tensor("attn_15_transpose_y_0"), val = tensor(true)]; + tensor attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1547_cast_fp16, y = var_1545_cast_fp16)[name = tensor("attn_15_cast_fp16")]; + tensor var_1550 = const()[name = tensor("op_1550"), val = tensor([1, 768, 1, 1500])]; + tensor input_57_cast_fp16 = reshape(shape = var_1550, x = attn_15_cast_fp16)[name = tensor("input_57_cast_fp16")]; + tensor var_1560_pad_type_0 = const()[name = tensor("op_1560_pad_type_0"), val = tensor("valid")]; + tensor var_1560_strides_0 = const()[name = tensor("op_1560_strides_0"), val = tensor([1, 1])]; + tensor var_1560_pad_0 = const()[name = tensor("op_1560_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1560_dilations_0 = const()[name = tensor("op_1560_dilations_0"), val = tensor([1, 1])]; + tensor var_1560_groups_0 = const()[name = tensor("op_1560_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40507520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40802496))), name = tensor("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40802624)))]; + tensor var_1560_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1560_dilations_0, groups = var_1560_groups_0, pad = var_1560_pad_0, pad_type = var_1560_pad_type_0, strides = var_1560_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_57_cast_fp16)[name = tensor("op_1560_cast_fp16")]; + tensor var_1566_pad_type_0 = const()[name = tensor("op_1566_pad_type_0"), val = tensor("valid")]; + tensor var_1566_strides_0 = const()[name = tensor("op_1566_strides_0"), val = tensor([1, 1])]; + tensor var_1566_pad_0 = const()[name = tensor("op_1566_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1566_dilations_0 = const()[name = tensor("op_1566_dilations_0"), val = tensor([1, 1])]; + tensor var_1566_groups_0 = const()[name = tensor("op_1566_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40810112))), name = tensor("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40804224))), shape = tensor([768, 768, 1, 1])]; + tensor var_1566_cast_fp16 = conv(dilations = var_1566_dilations_0, groups = var_1566_groups_0, pad = var_1566_pad_0, pad_type = var_1566_pad_type_0, strides = var_1566_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_57_cast_fp16)[name = tensor("op_1566_cast_fp16")]; + tensor obj_31_cast_fp16 = add(x = var_1560_cast_fp16, y = var_1566_cast_fp16)[name = tensor("obj_31_cast_fp16")]; + tensor inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = obj_31_cast_fp16)[name = tensor("inputs_31_cast_fp16")]; + tensor out_31_axes_0 = const()[name = tensor("out_31_axes_0"), val = tensor([1])]; + tensor var_1577_to_fp16 = const()[name = tensor("op_1577_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1577_to_fp16, x = inputs_31_cast_fp16)[name = tensor("out_31_cast_fp16")]; + tensor input_59_gamma_0_to_fp16 = const()[name = tensor("input_59_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40883904)))]; + tensor input_59_beta_0_to_fp16 = const()[name = tensor("input_59_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40885504)))]; + tensor input_59_epsilon_0_to_fp16 = const()[name = tensor("input_59_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_59_cast_fp16 = batch_norm(beta = input_59_beta_0_to_fp16, epsilon = input_59_epsilon_0_to_fp16, gamma = input_59_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor("input_59_cast_fp16")]; + tensor var_1595_pad_type_0 = const()[name = tensor("op_1595_pad_type_0"), val = tensor("valid")]; + tensor var_1595_strides_0 = const()[name = tensor("op_1595_strides_0"), val = tensor([1, 1])]; + tensor var_1595_pad_0 = const()[name = tensor("op_1595_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1595_dilations_0 = const()[name = tensor("op_1595_dilations_0"), val = tensor([1, 1])]; + tensor var_1595_groups_0 = const()[name = tensor("op_1595_groups_0"), val = tensor(1)]; + tensor layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40887104))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42066816))), name = tensor("layers_7_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42066944)))]; + tensor var_1595_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_1595_dilations_0, groups = var_1595_groups_0, pad = var_1595_pad_0, pad_type = var_1595_pad_type_0, strides = var_1595_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = tensor("op_1595_cast_fp16")]; + tensor var_1601_pad_type_0 = const()[name = tensor("op_1601_pad_type_0"), val = tensor("valid")]; + tensor var_1601_strides_0 = const()[name = tensor("op_1601_strides_0"), val = tensor([1, 1])]; + tensor var_1601_pad_0 = const()[name = tensor("op_1601_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1601_dilations_0 = const()[name = tensor("op_1601_dilations_0"), val = tensor([1, 1])]; + tensor var_1601_groups_0 = const()[name = tensor("op_1601_groups_0"), val = tensor(1)]; + tensor layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42110976))), name = tensor("layers_7_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42073152))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1601_cast_fp16 = conv(dilations = var_1601_dilations_0, groups = var_1601_groups_0, pad = var_1601_pad_0, pad_type = var_1601_pad_type_0, strides = var_1601_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = tensor("op_1601_cast_fp16")]; + tensor input_61_cast_fp16 = add(x = var_1595_cast_fp16, y = var_1601_cast_fp16)[name = tensor("input_61_cast_fp16")]; + tensor input_63_mode_0 = const()[name = tensor("input_63_mode_0"), val = tensor("EXACT")]; + tensor input_63_cast_fp16 = gelu(mode = input_63_mode_0, x = input_61_cast_fp16)[name = tensor("input_63_cast_fp16")]; + tensor var_1612_pad_type_0 = const()[name = tensor("op_1612_pad_type_0"), val = tensor("valid")]; + tensor var_1612_strides_0 = const()[name = tensor("op_1612_strides_0"), val = tensor([1, 1])]; + tensor var_1612_pad_0 = const()[name = tensor("op_1612_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1612_dilations_0 = const()[name = tensor("op_1612_dilations_0"), val = tensor([1, 1])]; + tensor var_1612_groups_0 = const()[name = tensor("op_1612_groups_0"), val = tensor(1)]; + tensor layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42405952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43585664))), name = tensor("layers_7_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43585792)))]; + tensor var_1612_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_1612_dilations_0, groups = var_1612_groups_0, pad = var_1612_pad_0, pad_type = var_1612_pad_type_0, strides = var_1612_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1618_pad_type_0 = const()[name = tensor("op_1618_pad_type_0"), val = tensor("valid")]; + tensor var_1618_strides_0 = const()[name = tensor("op_1618_strides_0"), val = tensor([1, 1])]; + tensor var_1618_pad_0 = const()[name = tensor("op_1618_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1618_dilations_0 = const()[name = tensor("op_1618_dilations_0"), val = tensor([1, 1])]; + tensor var_1618_groups_0 = const()[name = tensor("op_1618_groups_0"), val = tensor(1)]; + tensor layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43616960))), name = tensor("layers_7_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43587392))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1618_cast_fp16 = conv(dilations = var_1618_dilations_0, groups = var_1618_groups_0, pad = var_1618_pad_0, pad_type = var_1618_pad_type_0, strides = var_1618_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = tensor("op_1618_cast_fp16")]; + tensor hidden_states_19_cast_fp16 = add(x = var_1612_cast_fp16, y = var_1618_cast_fp16)[name = tensor("hidden_states_19_cast_fp16")]; + tensor inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor("inputs_33_cast_fp16")]; + tensor var_1624 = const()[name = tensor("op_1624"), val = tensor(3)]; + tensor out_33_axes_0 = const()[name = tensor("out_33_axes_0"), val = tensor([1])]; + tensor var_1646_to_fp16 = const()[name = tensor("op_1646_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1646_to_fp16, x = inputs_33_cast_fp16)[name = tensor("out_33_cast_fp16")]; + tensor obj_33_gamma_0_to_fp16 = const()[name = tensor("obj_33_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43911936)))]; + tensor obj_33_beta_0_to_fp16 = const()[name = tensor("obj_33_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43913536)))]; + tensor obj_33_epsilon_0_to_fp16 = const()[name = tensor("obj_33_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor("obj_33_cast_fp16")]; + tensor var_1668_pad_type_0 = const()[name = tensor("op_1668_pad_type_0"), val = tensor("valid")]; + tensor var_1668_strides_0 = const()[name = tensor("op_1668_strides_0"), val = tensor([1, 1])]; + tensor var_1668_pad_0 = const()[name = tensor("op_1668_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1668_dilations_0 = const()[name = tensor("op_1668_dilations_0"), val = tensor([1, 1])]; + tensor var_1668_groups_0 = const()[name = tensor("op_1668_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43915136))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44210112))), name = tensor("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44210240)))]; + tensor var_1668_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1668_dilations_0, groups = var_1668_groups_0, pad = var_1668_pad_0, pad_type = var_1668_pad_type_0, strides = var_1668_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = tensor("op_1668_cast_fp16")]; + tensor var_1674_pad_type_0 = const()[name = tensor("op_1674_pad_type_0"), val = tensor("valid")]; + tensor var_1674_strides_0 = const()[name = tensor("op_1674_strides_0"), val = tensor([1, 1])]; + tensor var_1674_pad_0 = const()[name = tensor("op_1674_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1674_dilations_0 = const()[name = tensor("op_1674_dilations_0"), val = tensor([1, 1])]; + tensor var_1674_groups_0 = const()[name = tensor("op_1674_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44219008))), name = tensor("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44211840))), shape = tensor([768, 768, 1, 1])]; + tensor var_1674_cast_fp16 = conv(dilations = var_1674_dilations_0, groups = var_1674_groups_0, pad = var_1674_pad_0, pad_type = var_1674_pad_type_0, strides = var_1674_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = tensor("op_1674_cast_fp16")]; + tensor query_17_cast_fp16 = add(x = var_1668_cast_fp16, y = var_1674_cast_fp16)[name = tensor("query_17_cast_fp16")]; + tensor var_1683_pad_type_0 = const()[name = tensor("op_1683_pad_type_0"), val = tensor("valid")]; + tensor var_1683_strides_0 = const()[name = tensor("op_1683_strides_0"), val = tensor([1, 1])]; + tensor var_1683_pad_0 = const()[name = tensor("op_1683_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1683_dilations_0 = const()[name = tensor("op_1683_dilations_0"), val = tensor([1, 1])]; + tensor var_1683_groups_0 = const()[name = tensor("op_1683_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44292800))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44587776))), name = tensor("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1683_cast_fp16 = conv(dilations = var_1683_dilations_0, groups = var_1683_groups_0, pad = var_1683_pad_0, pad_type = var_1683_pad_type_0, strides = var_1683_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = tensor("op_1683_cast_fp16")]; + tensor var_1689_pad_type_0 = const()[name = tensor("op_1689_pad_type_0"), val = tensor("valid")]; + tensor var_1689_strides_0 = const()[name = tensor("op_1689_strides_0"), val = tensor([1, 1])]; + tensor var_1689_pad_0 = const()[name = tensor("op_1689_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1689_dilations_0 = const()[name = tensor("op_1689_dilations_0"), val = tensor([1, 1])]; + tensor var_1689_groups_0 = const()[name = tensor("op_1689_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44595136))), name = tensor("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44587904))), shape = tensor([768, 768, 1, 1])]; + tensor var_1689_cast_fp16 = conv(dilations = var_1689_dilations_0, groups = var_1689_groups_0, pad = var_1689_pad_0, pad_type = var_1689_pad_type_0, strides = var_1689_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = tensor("op_1689_cast_fp16")]; + tensor key_17_cast_fp16 = add(x = var_1683_cast_fp16, y = var_1689_cast_fp16)[name = tensor("key_17_cast_fp16")]; + tensor var_1699_pad_type_0 = const()[name = tensor("op_1699_pad_type_0"), val = tensor("valid")]; + tensor var_1699_strides_0 = const()[name = tensor("op_1699_strides_0"), val = tensor([1, 1])]; + tensor var_1699_pad_0 = const()[name = tensor("op_1699_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1699_dilations_0 = const()[name = tensor("op_1699_dilations_0"), val = tensor([1, 1])]; + tensor var_1699_groups_0 = const()[name = tensor("op_1699_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44668928))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44963904))), name = tensor("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44964032)))]; + tensor var_1699_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1699_dilations_0, groups = var_1699_groups_0, pad = var_1699_pad_0, pad_type = var_1699_pad_type_0, strides = var_1699_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = tensor("op_1699_cast_fp16")]; + tensor var_1705_pad_type_0 = const()[name = tensor("op_1705_pad_type_0"), val = tensor("valid")]; + tensor var_1705_strides_0 = const()[name = tensor("op_1705_strides_0"), val = tensor([1, 1])]; + tensor var_1705_pad_0 = const()[name = tensor("op_1705_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1705_dilations_0 = const()[name = tensor("op_1705_dilations_0"), val = tensor([1, 1])]; + tensor var_1705_groups_0 = const()[name = tensor("op_1705_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44971200))), name = tensor("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44965632))), shape = tensor([768, 768, 1, 1])]; + tensor var_1705_cast_fp16 = conv(dilations = var_1705_dilations_0, groups = var_1705_groups_0, pad = var_1705_pad_0, pad_type = var_1705_pad_type_0, strides = var_1705_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = tensor("op_1705_cast_fp16")]; + tensor value_17_cast_fp16 = add(x = var_1699_cast_fp16, y = var_1705_cast_fp16)[name = tensor("value_17_cast_fp16")]; + tensor var_1709 = const()[name = tensor("op_1709"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_17_cast_fp16 = reshape(shape = var_1709, x = query_17_cast_fp16)[name = tensor("mh_q_17_cast_fp16")]; + tensor var_1711_to_fp16 = const()[name = tensor("op_1711_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1712_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1711_to_fp16)[name = tensor("op_1712_cast_fp16")]; + tensor var_1715 = const()[name = tensor("op_1715"), val = tensor([1, 12, 64, 1500])]; + tensor var_1716_cast_fp16 = reshape(shape = var_1715, x = key_17_cast_fp16)[name = tensor("op_1716_cast_fp16")]; + tensor mh_w_17_transpose_x_0 = const()[name = tensor("mh_w_17_transpose_x_0"), val = tensor(true)]; + tensor mh_w_17_transpose_y_0 = const()[name = tensor("mh_w_17_transpose_y_0"), val = tensor(false)]; + tensor mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_1712_cast_fp16, y = var_1716_cast_fp16)[name = tensor("mh_w_17_cast_fp16")]; + tensor var_1719_cast_fp16 = softmax(axis = var_1624, x = mh_w_17_cast_fp16)[name = tensor("op_1719_cast_fp16")]; + tensor var_1720 = const()[name = tensor("op_1720"), val = tensor([1, 12, 64, 1500])]; + tensor var_1721_cast_fp16 = reshape(shape = var_1720, x = value_17_cast_fp16)[name = tensor("op_1721_cast_fp16")]; + tensor attn_17_transpose_x_0 = const()[name = tensor("attn_17_transpose_x_0"), val = tensor(false)]; + tensor attn_17_transpose_y_0 = const()[name = tensor("attn_17_transpose_y_0"), val = tensor(true)]; + tensor attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1721_cast_fp16, y = var_1719_cast_fp16)[name = tensor("attn_17_cast_fp16")]; + tensor var_1724 = const()[name = tensor("op_1724"), val = tensor([1, 768, 1, 1500])]; + tensor input_65_cast_fp16 = reshape(shape = var_1724, x = attn_17_cast_fp16)[name = tensor("input_65_cast_fp16")]; + tensor var_1734_pad_type_0 = const()[name = tensor("op_1734_pad_type_0"), val = tensor("valid")]; + tensor var_1734_strides_0 = const()[name = tensor("op_1734_strides_0"), val = tensor([1, 1])]; + tensor var_1734_pad_0 = const()[name = tensor("op_1734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1734_dilations_0 = const()[name = tensor("op_1734_dilations_0"), val = tensor([1, 1])]; + tensor var_1734_groups_0 = const()[name = tensor("op_1734_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45044992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45339968))), name = tensor("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45340096)))]; + tensor var_1734_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1734_dilations_0, groups = var_1734_groups_0, pad = var_1734_pad_0, pad_type = var_1734_pad_type_0, strides = var_1734_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = tensor("op_1734_cast_fp16")]; + tensor var_1740_pad_type_0 = const()[name = tensor("op_1740_pad_type_0"), val = tensor("valid")]; + tensor var_1740_strides_0 = const()[name = tensor("op_1740_strides_0"), val = tensor([1, 1])]; + tensor var_1740_pad_0 = const()[name = tensor("op_1740_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1740_dilations_0 = const()[name = tensor("op_1740_dilations_0"), val = tensor([1, 1])]; + tensor var_1740_groups_0 = const()[name = tensor("op_1740_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45347776))), name = tensor("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45341696))), shape = tensor([768, 768, 1, 1])]; + tensor var_1740_cast_fp16 = conv(dilations = var_1740_dilations_0, groups = var_1740_groups_0, pad = var_1740_pad_0, pad_type = var_1740_pad_type_0, strides = var_1740_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = tensor("op_1740_cast_fp16")]; + tensor obj_35_cast_fp16 = add(x = var_1734_cast_fp16, y = var_1740_cast_fp16)[name = tensor("obj_35_cast_fp16")]; + tensor inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_35_cast_fp16)[name = tensor("inputs_35_cast_fp16")]; + tensor out_35_axes_0 = const()[name = tensor("out_35_axes_0"), val = tensor([1])]; + tensor var_1751_to_fp16 = const()[name = tensor("op_1751_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1751_to_fp16, x = inputs_35_cast_fp16)[name = tensor("out_35_cast_fp16")]; + tensor input_67_gamma_0_to_fp16 = const()[name = tensor("input_67_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45421568)))]; + tensor input_67_beta_0_to_fp16 = const()[name = tensor("input_67_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45423168)))]; + tensor input_67_epsilon_0_to_fp16 = const()[name = tensor("input_67_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_67_cast_fp16 = batch_norm(beta = input_67_beta_0_to_fp16, epsilon = input_67_epsilon_0_to_fp16, gamma = input_67_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor("input_67_cast_fp16")]; + tensor var_1769_pad_type_0 = const()[name = tensor("op_1769_pad_type_0"), val = tensor("valid")]; + tensor var_1769_strides_0 = const()[name = tensor("op_1769_strides_0"), val = tensor([1, 1])]; + tensor var_1769_pad_0 = const()[name = tensor("op_1769_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1769_dilations_0 = const()[name = tensor("op_1769_dilations_0"), val = tensor([1, 1])]; + tensor var_1769_groups_0 = const()[name = tensor("op_1769_groups_0"), val = tensor(1)]; + tensor layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45424768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46604480))), name = tensor("layers_8_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46604608)))]; + tensor var_1769_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_1769_dilations_0, groups = var_1769_groups_0, pad = var_1769_pad_0, pad_type = var_1769_pad_type_0, strides = var_1769_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_67_cast_fp16)[name = tensor("op_1769_cast_fp16")]; + tensor var_1775_pad_type_0 = const()[name = tensor("op_1775_pad_type_0"), val = tensor("valid")]; + tensor var_1775_strides_0 = const()[name = tensor("op_1775_strides_0"), val = tensor([1, 1])]; + tensor var_1775_pad_0 = const()[name = tensor("op_1775_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1775_dilations_0 = const()[name = tensor("op_1775_dilations_0"), val = tensor([1, 1])]; + tensor var_1775_groups_0 = const()[name = tensor("op_1775_groups_0"), val = tensor(1)]; + tensor layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46641984))), name = tensor("layers_8_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46610816))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1775_cast_fp16 = conv(dilations = var_1775_dilations_0, groups = var_1775_groups_0, pad = var_1775_pad_0, pad_type = var_1775_pad_type_0, strides = var_1775_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_67_cast_fp16)[name = tensor("op_1775_cast_fp16")]; + tensor input_69_cast_fp16 = add(x = var_1769_cast_fp16, y = var_1775_cast_fp16)[name = tensor("input_69_cast_fp16")]; + tensor input_71_mode_0 = const()[name = tensor("input_71_mode_0"), val = tensor("EXACT")]; + tensor input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor("input_71_cast_fp16")]; + tensor var_1786_pad_type_0 = const()[name = tensor("op_1786_pad_type_0"), val = tensor("valid")]; + tensor var_1786_strides_0 = const()[name = tensor("op_1786_strides_0"), val = tensor([1, 1])]; + tensor var_1786_pad_0 = const()[name = tensor("op_1786_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1786_dilations_0 = const()[name = tensor("op_1786_dilations_0"), val = tensor([1, 1])]; + tensor var_1786_groups_0 = const()[name = tensor("op_1786_groups_0"), val = tensor(1)]; + tensor layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46936960))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48116672))), name = tensor("layers_8_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48116800)))]; + tensor var_1786_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_1786_dilations_0, groups = var_1786_groups_0, pad = var_1786_pad_0, pad_type = var_1786_pad_type_0, strides = var_1786_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = tensor("op_1786_cast_fp16")]; + tensor var_1792_pad_type_0 = const()[name = tensor("op_1792_pad_type_0"), val = tensor("valid")]; + tensor var_1792_strides_0 = const()[name = tensor("op_1792_strides_0"), val = tensor([1, 1])]; + tensor var_1792_pad_0 = const()[name = tensor("op_1792_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1792_dilations_0 = const()[name = tensor("op_1792_dilations_0"), val = tensor([1, 1])]; + tensor var_1792_groups_0 = const()[name = tensor("op_1792_groups_0"), val = tensor(1)]; + tensor layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48148992))), name = tensor("layers_8_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48118400))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1792_cast_fp16 = conv(dilations = var_1792_dilations_0, groups = var_1792_groups_0, pad = var_1792_pad_0, pad_type = var_1792_pad_type_0, strides = var_1792_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = tensor("op_1792_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = var_1786_cast_fp16, y = var_1792_cast_fp16)[name = tensor("hidden_states_21_cast_fp16")]; + tensor inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor("inputs_37_cast_fp16")]; + tensor var_1798 = const()[name = tensor("op_1798"), val = tensor(3)]; + tensor out_37_axes_0 = const()[name = tensor("out_37_axes_0"), val = tensor([1])]; + tensor var_1820_to_fp16 = const()[name = tensor("op_1820_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1820_to_fp16, x = inputs_37_cast_fp16)[name = tensor("out_37_cast_fp16")]; + tensor obj_37_gamma_0_to_fp16 = const()[name = tensor("obj_37_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48443968)))]; + tensor obj_37_beta_0_to_fp16 = const()[name = tensor("obj_37_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48445568)))]; + tensor obj_37_epsilon_0_to_fp16 = const()[name = tensor("obj_37_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = tensor("obj_37_cast_fp16")]; + tensor var_1842_pad_type_0 = const()[name = tensor("op_1842_pad_type_0"), val = tensor("valid")]; + tensor var_1842_strides_0 = const()[name = tensor("op_1842_strides_0"), val = tensor([1, 1])]; + tensor var_1842_pad_0 = const()[name = tensor("op_1842_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1842_dilations_0 = const()[name = tensor("op_1842_dilations_0"), val = tensor([1, 1])]; + tensor var_1842_groups_0 = const()[name = tensor("op_1842_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48447168))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48742144))), name = tensor("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48742272)))]; + tensor var_1842_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1842_dilations_0, groups = var_1842_groups_0, pad = var_1842_pad_0, pad_type = var_1842_pad_type_0, strides = var_1842_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_1842_cast_fp16")]; + tensor var_1848_pad_type_0 = const()[name = tensor("op_1848_pad_type_0"), val = tensor("valid")]; + tensor var_1848_strides_0 = const()[name = tensor("op_1848_strides_0"), val = tensor([1, 1])]; + tensor var_1848_pad_0 = const()[name = tensor("op_1848_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1848_dilations_0 = const()[name = tensor("op_1848_dilations_0"), val = tensor([1, 1])]; + tensor var_1848_groups_0 = const()[name = tensor("op_1848_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48750080))), name = tensor("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48743872))), shape = tensor([768, 768, 1, 1])]; + tensor var_1848_cast_fp16 = conv(dilations = var_1848_dilations_0, groups = var_1848_groups_0, pad = var_1848_pad_0, pad_type = var_1848_pad_type_0, strides = var_1848_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_1848_cast_fp16")]; + tensor query_19_cast_fp16 = add(x = var_1842_cast_fp16, y = var_1848_cast_fp16)[name = tensor("query_19_cast_fp16")]; + tensor var_1857_pad_type_0 = const()[name = tensor("op_1857_pad_type_0"), val = tensor("valid")]; + tensor var_1857_strides_0 = const()[name = tensor("op_1857_strides_0"), val = tensor([1, 1])]; + tensor var_1857_pad_0 = const()[name = tensor("op_1857_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1857_dilations_0 = const()[name = tensor("op_1857_dilations_0"), val = tensor([1, 1])]; + tensor var_1857_groups_0 = const()[name = tensor("op_1857_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48823872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49118848))), name = tensor("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1857_cast_fp16 = conv(dilations = var_1857_dilations_0, groups = var_1857_groups_0, pad = var_1857_pad_0, pad_type = var_1857_pad_type_0, strides = var_1857_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_1857_cast_fp16")]; + tensor var_1863_pad_type_0 = const()[name = tensor("op_1863_pad_type_0"), val = tensor("valid")]; + tensor var_1863_strides_0 = const()[name = tensor("op_1863_strides_0"), val = tensor([1, 1])]; + tensor var_1863_pad_0 = const()[name = tensor("op_1863_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1863_dilations_0 = const()[name = tensor("op_1863_dilations_0"), val = tensor([1, 1])]; + tensor var_1863_groups_0 = const()[name = tensor("op_1863_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49125760))), name = tensor("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49118976))), shape = tensor([768, 768, 1, 1])]; + tensor var_1863_cast_fp16 = conv(dilations = var_1863_dilations_0, groups = var_1863_groups_0, pad = var_1863_pad_0, pad_type = var_1863_pad_type_0, strides = var_1863_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_1863_cast_fp16")]; + tensor key_19_cast_fp16 = add(x = var_1857_cast_fp16, y = var_1863_cast_fp16)[name = tensor("key_19_cast_fp16")]; + tensor var_1873_pad_type_0 = const()[name = tensor("op_1873_pad_type_0"), val = tensor("valid")]; + tensor var_1873_strides_0 = const()[name = tensor("op_1873_strides_0"), val = tensor([1, 1])]; + tensor var_1873_pad_0 = const()[name = tensor("op_1873_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1873_dilations_0 = const()[name = tensor("op_1873_dilations_0"), val = tensor([1, 1])]; + tensor var_1873_groups_0 = const()[name = tensor("op_1873_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49199552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49494528))), name = tensor("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49494656)))]; + tensor var_1873_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1873_dilations_0, groups = var_1873_groups_0, pad = var_1873_pad_0, pad_type = var_1873_pad_type_0, strides = var_1873_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_1873_cast_fp16")]; + tensor var_1879_pad_type_0 = const()[name = tensor("op_1879_pad_type_0"), val = tensor("valid")]; + tensor var_1879_strides_0 = const()[name = tensor("op_1879_strides_0"), val = tensor([1, 1])]; + tensor var_1879_pad_0 = const()[name = tensor("op_1879_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1879_dilations_0 = const()[name = tensor("op_1879_dilations_0"), val = tensor([1, 1])]; + tensor var_1879_groups_0 = const()[name = tensor("op_1879_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49501760))), name = tensor("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49496256))), shape = tensor([768, 768, 1, 1])]; + tensor var_1879_cast_fp16 = conv(dilations = var_1879_dilations_0, groups = var_1879_groups_0, pad = var_1879_pad_0, pad_type = var_1879_pad_type_0, strides = var_1879_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_1879_cast_fp16")]; + tensor value_19_cast_fp16 = add(x = var_1873_cast_fp16, y = var_1879_cast_fp16)[name = tensor("value_19_cast_fp16")]; + tensor var_1883 = const()[name = tensor("op_1883"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_19_cast_fp16 = reshape(shape = var_1883, x = query_19_cast_fp16)[name = tensor("mh_q_19_cast_fp16")]; + tensor var_1885_to_fp16 = const()[name = tensor("op_1885_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1886_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1885_to_fp16)[name = tensor("op_1886_cast_fp16")]; + tensor var_1889 = const()[name = tensor("op_1889"), val = tensor([1, 12, 64, 1500])]; + tensor var_1890_cast_fp16 = reshape(shape = var_1889, x = key_19_cast_fp16)[name = tensor("op_1890_cast_fp16")]; + tensor mh_w_19_transpose_x_0 = const()[name = tensor("mh_w_19_transpose_x_0"), val = tensor(true)]; + tensor mh_w_19_transpose_y_0 = const()[name = tensor("mh_w_19_transpose_y_0"), val = tensor(false)]; + tensor mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_1886_cast_fp16, y = var_1890_cast_fp16)[name = tensor("mh_w_19_cast_fp16")]; + tensor var_1893_cast_fp16 = softmax(axis = var_1798, x = mh_w_19_cast_fp16)[name = tensor("op_1893_cast_fp16")]; + tensor var_1894 = const()[name = tensor("op_1894"), val = tensor([1, 12, 64, 1500])]; + tensor var_1895_cast_fp16 = reshape(shape = var_1894, x = value_19_cast_fp16)[name = tensor("op_1895_cast_fp16")]; + tensor attn_19_transpose_x_0 = const()[name = tensor("attn_19_transpose_x_0"), val = tensor(false)]; + tensor attn_19_transpose_y_0 = const()[name = tensor("attn_19_transpose_y_0"), val = tensor(true)]; + tensor attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1895_cast_fp16, y = var_1893_cast_fp16)[name = tensor("attn_19_cast_fp16")]; + tensor var_1898 = const()[name = tensor("op_1898"), val = tensor([1, 768, 1, 1500])]; + tensor input_73_cast_fp16 = reshape(shape = var_1898, x = attn_19_cast_fp16)[name = tensor("input_73_cast_fp16")]; + tensor var_1908_pad_type_0 = const()[name = tensor("op_1908_pad_type_0"), val = tensor("valid")]; + tensor var_1908_strides_0 = const()[name = tensor("op_1908_strides_0"), val = tensor([1, 1])]; + tensor var_1908_pad_0 = const()[name = tensor("op_1908_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1908_dilations_0 = const()[name = tensor("op_1908_dilations_0"), val = tensor([1, 1])]; + tensor var_1908_groups_0 = const()[name = tensor("op_1908_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49575552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49870528))), name = tensor("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49870656)))]; + tensor var_1908_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1908_dilations_0, groups = var_1908_groups_0, pad = var_1908_pad_0, pad_type = var_1908_pad_type_0, strides = var_1908_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = tensor("op_1908_cast_fp16")]; + tensor var_1914_pad_type_0 = const()[name = tensor("op_1914_pad_type_0"), val = tensor("valid")]; + tensor var_1914_strides_0 = const()[name = tensor("op_1914_strides_0"), val = tensor([1, 1])]; + tensor var_1914_pad_0 = const()[name = tensor("op_1914_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1914_dilations_0 = const()[name = tensor("op_1914_dilations_0"), val = tensor([1, 1])]; + tensor var_1914_groups_0 = const()[name = tensor("op_1914_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49877760))), name = tensor("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49872256))), shape = tensor([768, 768, 1, 1])]; + tensor var_1914_cast_fp16 = conv(dilations = var_1914_dilations_0, groups = var_1914_groups_0, pad = var_1914_pad_0, pad_type = var_1914_pad_type_0, strides = var_1914_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = tensor("op_1914_cast_fp16")]; + tensor obj_39_cast_fp16 = add(x = var_1908_cast_fp16, y = var_1914_cast_fp16)[name = tensor("obj_39_cast_fp16")]; + tensor inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_39_cast_fp16)[name = tensor("inputs_39_cast_fp16")]; + tensor out_39_axes_0 = const()[name = tensor("out_39_axes_0"), val = tensor([1])]; + tensor var_1925_to_fp16 = const()[name = tensor("op_1925_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_1925_to_fp16, x = inputs_39_cast_fp16)[name = tensor("out_39_cast_fp16")]; + tensor input_75_gamma_0_to_fp16 = const()[name = tensor("input_75_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49951552)))]; + tensor input_75_beta_0_to_fp16 = const()[name = tensor("input_75_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49953152)))]; + tensor input_75_epsilon_0_to_fp16 = const()[name = tensor("input_75_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = tensor("input_75_cast_fp16")]; + tensor var_1943_pad_type_0 = const()[name = tensor("op_1943_pad_type_0"), val = tensor("valid")]; + tensor var_1943_strides_0 = const()[name = tensor("op_1943_strides_0"), val = tensor([1, 1])]; + tensor var_1943_pad_0 = const()[name = tensor("op_1943_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1943_dilations_0 = const()[name = tensor("op_1943_dilations_0"), val = tensor([1, 1])]; + tensor var_1943_groups_0 = const()[name = tensor("op_1943_groups_0"), val = tensor(1)]; + tensor layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49954752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51134464))), name = tensor("layers_9_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51134592)))]; + tensor var_1943_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_1943_dilations_0, groups = var_1943_groups_0, pad = var_1943_pad_0, pad_type = var_1943_pad_type_0, strides = var_1943_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = tensor("op_1943_cast_fp16")]; + tensor var_1949_pad_type_0 = const()[name = tensor("op_1949_pad_type_0"), val = tensor("valid")]; + tensor var_1949_strides_0 = const()[name = tensor("op_1949_strides_0"), val = tensor([1, 1])]; + tensor var_1949_pad_0 = const()[name = tensor("op_1949_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1949_dilations_0 = const()[name = tensor("op_1949_dilations_0"), val = tensor([1, 1])]; + tensor var_1949_groups_0 = const()[name = tensor("op_1949_groups_0"), val = tensor(1)]; + tensor layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51171648))), name = tensor("layers_9_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51140800))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1949_cast_fp16 = conv(dilations = var_1949_dilations_0, groups = var_1949_groups_0, pad = var_1949_pad_0, pad_type = var_1949_pad_type_0, strides = var_1949_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = tensor("op_1949_cast_fp16")]; + tensor input_77_cast_fp16 = add(x = var_1943_cast_fp16, y = var_1949_cast_fp16)[name = tensor("input_77_cast_fp16")]; + tensor input_79_mode_0 = const()[name = tensor("input_79_mode_0"), val = tensor("EXACT")]; + tensor input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = tensor("input_79_cast_fp16")]; + tensor var_1960_pad_type_0 = const()[name = tensor("op_1960_pad_type_0"), val = tensor("valid")]; + tensor var_1960_strides_0 = const()[name = tensor("op_1960_strides_0"), val = tensor([1, 1])]; + tensor var_1960_pad_0 = const()[name = tensor("op_1960_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1960_dilations_0 = const()[name = tensor("op_1960_dilations_0"), val = tensor([1, 1])]; + tensor var_1960_groups_0 = const()[name = tensor("op_1960_groups_0"), val = tensor(1)]; + tensor layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51466624))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52646336))), name = tensor("layers_9_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52646464)))]; + tensor var_1960_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_1960_dilations_0, groups = var_1960_groups_0, pad = var_1960_pad_0, pad_type = var_1960_pad_type_0, strides = var_1960_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = tensor("op_1960_cast_fp16")]; + tensor var_1966_pad_type_0 = const()[name = tensor("op_1966_pad_type_0"), val = tensor("valid")]; + tensor var_1966_strides_0 = const()[name = tensor("op_1966_strides_0"), val = tensor([1, 1])]; + tensor var_1966_pad_0 = const()[name = tensor("op_1966_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1966_dilations_0 = const()[name = tensor("op_1966_dilations_0"), val = tensor([1, 1])]; + tensor var_1966_groups_0 = const()[name = tensor("op_1966_groups_0"), val = tensor(1)]; + tensor layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52680576))), name = tensor("layers_9_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52648064))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1966_cast_fp16 = conv(dilations = var_1966_dilations_0, groups = var_1966_groups_0, pad = var_1966_pad_0, pad_type = var_1966_pad_type_0, strides = var_1966_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = tensor("op_1966_cast_fp16")]; + tensor hidden_states_23_cast_fp16 = add(x = var_1960_cast_fp16, y = var_1966_cast_fp16)[name = tensor("hidden_states_23_cast_fp16")]; + tensor inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor("inputs_41_cast_fp16")]; + tensor var_1972 = const()[name = tensor("op_1972"), val = tensor(3)]; + tensor out_41_axes_0 = const()[name = tensor("out_41_axes_0"), val = tensor([1])]; + tensor var_1994_to_fp16 = const()[name = tensor("op_1994_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_1994_to_fp16, x = inputs_41_cast_fp16)[name = tensor("out_41_cast_fp16")]; + tensor obj_41_gamma_0_to_fp16 = const()[name = tensor("obj_41_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52975552)))]; + tensor obj_41_beta_0_to_fp16 = const()[name = tensor("obj_41_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52977152)))]; + tensor obj_41_epsilon_0_to_fp16 = const()[name = tensor("obj_41_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_41_cast_fp16 = batch_norm(beta = obj_41_beta_0_to_fp16, epsilon = obj_41_epsilon_0_to_fp16, gamma = obj_41_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = tensor("obj_41_cast_fp16")]; + tensor var_2016_pad_type_0 = const()[name = tensor("op_2016_pad_type_0"), val = tensor("valid")]; + tensor var_2016_strides_0 = const()[name = tensor("op_2016_strides_0"), val = tensor([1, 1])]; + tensor var_2016_pad_0 = const()[name = tensor("op_2016_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2016_dilations_0 = const()[name = tensor("op_2016_dilations_0"), val = tensor([1, 1])]; + tensor var_2016_groups_0 = const()[name = tensor("op_2016_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52978752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53273728))), name = tensor("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53273856)))]; + tensor var_2016_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2016_dilations_0, groups = var_2016_groups_0, pad = var_2016_pad_0, pad_type = var_2016_pad_type_0, strides = var_2016_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = tensor("op_2016_cast_fp16")]; + tensor var_2022_pad_type_0 = const()[name = tensor("op_2022_pad_type_0"), val = tensor("valid")]; + tensor var_2022_strides_0 = const()[name = tensor("op_2022_strides_0"), val = tensor([1, 1])]; + tensor var_2022_pad_0 = const()[name = tensor("op_2022_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2022_dilations_0 = const()[name = tensor("op_2022_dilations_0"), val = tensor([1, 1])]; + tensor var_2022_groups_0 = const()[name = tensor("op_2022_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53281536))), name = tensor("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53275456))), shape = tensor([768, 768, 1, 1])]; + tensor var_2022_cast_fp16 = conv(dilations = var_2022_dilations_0, groups = var_2022_groups_0, pad = var_2022_pad_0, pad_type = var_2022_pad_type_0, strides = var_2022_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = tensor("op_2022_cast_fp16")]; + tensor query_21_cast_fp16 = add(x = var_2016_cast_fp16, y = var_2022_cast_fp16)[name = tensor("query_21_cast_fp16")]; + tensor var_2031_pad_type_0 = const()[name = tensor("op_2031_pad_type_0"), val = tensor("valid")]; + tensor var_2031_strides_0 = const()[name = tensor("op_2031_strides_0"), val = tensor([1, 1])]; + tensor var_2031_pad_0 = const()[name = tensor("op_2031_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2031_dilations_0 = const()[name = tensor("op_2031_dilations_0"), val = tensor([1, 1])]; + tensor var_2031_groups_0 = const()[name = tensor("op_2031_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53355328))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53650304))), name = tensor("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2031_cast_fp16 = conv(dilations = var_2031_dilations_0, groups = var_2031_groups_0, pad = var_2031_pad_0, pad_type = var_2031_pad_type_0, strides = var_2031_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = tensor("op_2031_cast_fp16")]; + tensor var_2037_pad_type_0 = const()[name = tensor("op_2037_pad_type_0"), val = tensor("valid")]; + tensor var_2037_strides_0 = const()[name = tensor("op_2037_strides_0"), val = tensor([1, 1])]; + tensor var_2037_pad_0 = const()[name = tensor("op_2037_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2037_dilations_0 = const()[name = tensor("op_2037_dilations_0"), val = tensor([1, 1])]; + tensor var_2037_groups_0 = const()[name = tensor("op_2037_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53657216))), name = tensor("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53650432))), shape = tensor([768, 768, 1, 1])]; + tensor var_2037_cast_fp16 = conv(dilations = var_2037_dilations_0, groups = var_2037_groups_0, pad = var_2037_pad_0, pad_type = var_2037_pad_type_0, strides = var_2037_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = tensor("op_2037_cast_fp16")]; + tensor key_21_cast_fp16 = add(x = var_2031_cast_fp16, y = var_2037_cast_fp16)[name = tensor("key_21_cast_fp16")]; + tensor var_2047_pad_type_0 = const()[name = tensor("op_2047_pad_type_0"), val = tensor("valid")]; + tensor var_2047_strides_0 = const()[name = tensor("op_2047_strides_0"), val = tensor([1, 1])]; + tensor var_2047_pad_0 = const()[name = tensor("op_2047_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2047_dilations_0 = const()[name = tensor("op_2047_dilations_0"), val = tensor([1, 1])]; + tensor var_2047_groups_0 = const()[name = tensor("op_2047_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53731008))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54025984))), name = tensor("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54026112)))]; + tensor var_2047_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2047_dilations_0, groups = var_2047_groups_0, pad = var_2047_pad_0, pad_type = var_2047_pad_type_0, strides = var_2047_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = tensor("op_2047_cast_fp16")]; + tensor var_2053_pad_type_0 = const()[name = tensor("op_2053_pad_type_0"), val = tensor("valid")]; + tensor var_2053_strides_0 = const()[name = tensor("op_2053_strides_0"), val = tensor([1, 1])]; + tensor var_2053_pad_0 = const()[name = tensor("op_2053_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2053_dilations_0 = const()[name = tensor("op_2053_dilations_0"), val = tensor([1, 1])]; + tensor var_2053_groups_0 = const()[name = tensor("op_2053_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54033152))), name = tensor("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54027712))), shape = tensor([768, 768, 1, 1])]; + tensor var_2053_cast_fp16 = conv(dilations = var_2053_dilations_0, groups = var_2053_groups_0, pad = var_2053_pad_0, pad_type = var_2053_pad_type_0, strides = var_2053_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = tensor("op_2053_cast_fp16")]; + tensor value_21_cast_fp16 = add(x = var_2047_cast_fp16, y = var_2053_cast_fp16)[name = tensor("value_21_cast_fp16")]; + tensor var_2057 = const()[name = tensor("op_2057"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_21_cast_fp16 = reshape(shape = var_2057, x = query_21_cast_fp16)[name = tensor("mh_q_21_cast_fp16")]; + tensor var_2059_to_fp16 = const()[name = tensor("op_2059_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2060_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_2059_to_fp16)[name = tensor("op_2060_cast_fp16")]; + tensor var_2063 = const()[name = tensor("op_2063"), val = tensor([1, 12, 64, 1500])]; + tensor var_2064_cast_fp16 = reshape(shape = var_2063, x = key_21_cast_fp16)[name = tensor("op_2064_cast_fp16")]; + tensor mh_w_21_transpose_x_0 = const()[name = tensor("mh_w_21_transpose_x_0"), val = tensor(true)]; + tensor mh_w_21_transpose_y_0 = const()[name = tensor("mh_w_21_transpose_y_0"), val = tensor(false)]; + tensor mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_2060_cast_fp16, y = var_2064_cast_fp16)[name = tensor("mh_w_21_cast_fp16")]; + tensor var_2067_cast_fp16 = softmax(axis = var_1972, x = mh_w_21_cast_fp16)[name = tensor("op_2067_cast_fp16")]; + tensor var_2068 = const()[name = tensor("op_2068"), val = tensor([1, 12, 64, 1500])]; + tensor var_2069_cast_fp16 = reshape(shape = var_2068, x = value_21_cast_fp16)[name = tensor("op_2069_cast_fp16")]; + tensor attn_21_transpose_x_0 = const()[name = tensor("attn_21_transpose_x_0"), val = tensor(false)]; + tensor attn_21_transpose_y_0 = const()[name = tensor("attn_21_transpose_y_0"), val = tensor(true)]; + tensor attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_2069_cast_fp16, y = var_2067_cast_fp16)[name = tensor("attn_21_cast_fp16")]; + tensor var_2072 = const()[name = tensor("op_2072"), val = tensor([1, 768, 1, 1500])]; + tensor input_81_cast_fp16 = reshape(shape = var_2072, x = attn_21_cast_fp16)[name = tensor("input_81_cast_fp16")]; + tensor var_2082_pad_type_0 = const()[name = tensor("op_2082_pad_type_0"), val = tensor("valid")]; + tensor var_2082_strides_0 = const()[name = tensor("op_2082_strides_0"), val = tensor([1, 1])]; + tensor var_2082_pad_0 = const()[name = tensor("op_2082_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2082_dilations_0 = const()[name = tensor("op_2082_dilations_0"), val = tensor([1, 1])]; + tensor var_2082_groups_0 = const()[name = tensor("op_2082_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54106944))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54401920))), name = tensor("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54402048)))]; + tensor var_2082_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2082_dilations_0, groups = var_2082_groups_0, pad = var_2082_pad_0, pad_type = var_2082_pad_type_0, strides = var_2082_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = tensor("op_2082_cast_fp16")]; + tensor var_2088_pad_type_0 = const()[name = tensor("op_2088_pad_type_0"), val = tensor("valid")]; + tensor var_2088_strides_0 = const()[name = tensor("op_2088_strides_0"), val = tensor([1, 1])]; + tensor var_2088_pad_0 = const()[name = tensor("op_2088_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2088_dilations_0 = const()[name = tensor("op_2088_dilations_0"), val = tensor([1, 1])]; + tensor var_2088_groups_0 = const()[name = tensor("op_2088_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54409472))), name = tensor("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54403648))), shape = tensor([768, 768, 1, 1])]; + tensor var_2088_cast_fp16 = conv(dilations = var_2088_dilations_0, groups = var_2088_groups_0, pad = var_2088_pad_0, pad_type = var_2088_pad_type_0, strides = var_2088_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = tensor("op_2088_cast_fp16")]; + tensor obj_43_cast_fp16 = add(x = var_2082_cast_fp16, y = var_2088_cast_fp16)[name = tensor("obj_43_cast_fp16")]; + tensor inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = obj_43_cast_fp16)[name = tensor("inputs_43_cast_fp16")]; + tensor out_43_axes_0 = const()[name = tensor("out_43_axes_0"), val = tensor([1])]; + tensor var_2099_to_fp16 = const()[name = tensor("op_2099_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_2099_to_fp16, x = inputs_43_cast_fp16)[name = tensor("out_43_cast_fp16")]; + tensor input_83_gamma_0_to_fp16 = const()[name = tensor("input_83_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54483264)))]; + tensor input_83_beta_0_to_fp16 = const()[name = tensor("input_83_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54484864)))]; + tensor input_83_epsilon_0_to_fp16 = const()[name = tensor("input_83_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_83_cast_fp16 = batch_norm(beta = input_83_beta_0_to_fp16, epsilon = input_83_epsilon_0_to_fp16, gamma = input_83_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = tensor("input_83_cast_fp16")]; + tensor var_2117_pad_type_0 = const()[name = tensor("op_2117_pad_type_0"), val = tensor("valid")]; + tensor var_2117_strides_0 = const()[name = tensor("op_2117_strides_0"), val = tensor([1, 1])]; + tensor var_2117_pad_0 = const()[name = tensor("op_2117_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2117_dilations_0 = const()[name = tensor("op_2117_dilations_0"), val = tensor([1, 1])]; + tensor var_2117_groups_0 = const()[name = tensor("op_2117_groups_0"), val = tensor(1)]; + tensor layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54486464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55666176))), name = tensor("layers_10_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55666304)))]; + tensor var_2117_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_2117_dilations_0, groups = var_2117_groups_0, pad = var_2117_pad_0, pad_type = var_2117_pad_type_0, strides = var_2117_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = tensor("op_2117_cast_fp16")]; + tensor var_2123_pad_type_0 = const()[name = tensor("op_2123_pad_type_0"), val = tensor("valid")]; + tensor var_2123_strides_0 = const()[name = tensor("op_2123_strides_0"), val = tensor([1, 1])]; + tensor var_2123_pad_0 = const()[name = tensor("op_2123_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2123_dilations_0 = const()[name = tensor("op_2123_dilations_0"), val = tensor([1, 1])]; + tensor var_2123_groups_0 = const()[name = tensor("op_2123_groups_0"), val = tensor(1)]; + tensor layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55704320))), name = tensor("layers_10_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55672512))), shape = tensor([3072, 768, 1, 1])]; + tensor var_2123_cast_fp16 = conv(dilations = var_2123_dilations_0, groups = var_2123_groups_0, pad = var_2123_pad_0, pad_type = var_2123_pad_type_0, strides = var_2123_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = tensor("op_2123_cast_fp16")]; + tensor input_85_cast_fp16 = add(x = var_2117_cast_fp16, y = var_2123_cast_fp16)[name = tensor("input_85_cast_fp16")]; + tensor input_87_mode_0 = const()[name = tensor("input_87_mode_0"), val = tensor("EXACT")]; + tensor input_87_cast_fp16 = gelu(mode = input_87_mode_0, x = input_85_cast_fp16)[name = tensor("input_87_cast_fp16")]; + tensor var_2134_pad_type_0 = const()[name = tensor("op_2134_pad_type_0"), val = tensor("valid")]; + tensor var_2134_strides_0 = const()[name = tensor("op_2134_strides_0"), val = tensor([1, 1])]; + tensor var_2134_pad_0 = const()[name = tensor("op_2134_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2134_dilations_0 = const()[name = tensor("op_2134_dilations_0"), val = tensor([1, 1])]; + tensor var_2134_groups_0 = const()[name = tensor("op_2134_groups_0"), val = tensor(1)]; + tensor layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55999296))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57179008))), name = tensor("layers_10_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57179136)))]; + tensor var_2134_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_2134_dilations_0, groups = var_2134_groups_0, pad = var_2134_pad_0, pad_type = var_2134_pad_type_0, strides = var_2134_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_87_cast_fp16)[name = tensor("op_2134_cast_fp16")]; + tensor var_2140_pad_type_0 = const()[name = tensor("op_2140_pad_type_0"), val = tensor("valid")]; + tensor var_2140_strides_0 = const()[name = tensor("op_2140_strides_0"), val = tensor([1, 1])]; + tensor var_2140_pad_0 = const()[name = tensor("op_2140_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2140_dilations_0 = const()[name = tensor("op_2140_dilations_0"), val = tensor([1, 1])]; + tensor var_2140_groups_0 = const()[name = tensor("op_2140_groups_0"), val = tensor(1)]; + tensor layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57215232))), name = tensor("layers_10_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57180736))), shape = tensor([768, 3072, 1, 1])]; + tensor var_2140_cast_fp16 = conv(dilations = var_2140_dilations_0, groups = var_2140_groups_0, pad = var_2140_pad_0, pad_type = var_2140_pad_type_0, strides = var_2140_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_87_cast_fp16)[name = tensor("op_2140_cast_fp16")]; + tensor hidden_states_25_cast_fp16 = add(x = var_2134_cast_fp16, y = var_2140_cast_fp16)[name = tensor("hidden_states_25_cast_fp16")]; + tensor inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor("inputs_45_cast_fp16")]; + tensor var_2146 = const()[name = tensor("op_2146"), val = tensor(3)]; + tensor out_45_axes_0 = const()[name = tensor("out_45_axes_0"), val = tensor([1])]; + tensor var_2168_to_fp16 = const()[name = tensor("op_2168_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_2168_to_fp16, x = inputs_45_cast_fp16)[name = tensor("out_45_cast_fp16")]; + tensor obj_45_gamma_0_to_fp16 = const()[name = tensor("obj_45_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57510208)))]; + tensor obj_45_beta_0_to_fp16 = const()[name = tensor("obj_45_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57511808)))]; + tensor obj_45_epsilon_0_to_fp16 = const()[name = tensor("obj_45_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_45_cast_fp16 = batch_norm(beta = obj_45_beta_0_to_fp16, epsilon = obj_45_epsilon_0_to_fp16, gamma = obj_45_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = tensor("obj_45_cast_fp16")]; + tensor var_2190_pad_type_0 = const()[name = tensor("op_2190_pad_type_0"), val = tensor("valid")]; + tensor var_2190_strides_0 = const()[name = tensor("op_2190_strides_0"), val = tensor([1, 1])]; + tensor var_2190_pad_0 = const()[name = tensor("op_2190_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2190_dilations_0 = const()[name = tensor("op_2190_dilations_0"), val = tensor([1, 1])]; + tensor var_2190_groups_0 = const()[name = tensor("op_2190_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57513408))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57808384))), name = tensor("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57808512)))]; + tensor var_2190_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2190_dilations_0, groups = var_2190_groups_0, pad = var_2190_pad_0, pad_type = var_2190_pad_type_0, strides = var_2190_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = tensor("op_2190_cast_fp16")]; + tensor var_2196_pad_type_0 = const()[name = tensor("op_2196_pad_type_0"), val = tensor("valid")]; + tensor var_2196_strides_0 = const()[name = tensor("op_2196_strides_0"), val = tensor([1, 1])]; + tensor var_2196_pad_0 = const()[name = tensor("op_2196_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2196_dilations_0 = const()[name = tensor("op_2196_dilations_0"), val = tensor([1, 1])]; + tensor var_2196_groups_0 = const()[name = tensor("op_2196_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57816768))), name = tensor("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57810112))), shape = tensor([768, 768, 1, 1])]; + tensor var_2196_cast_fp16 = conv(dilations = var_2196_dilations_0, groups = var_2196_groups_0, pad = var_2196_pad_0, pad_type = var_2196_pad_type_0, strides = var_2196_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = tensor("op_2196_cast_fp16")]; + tensor query_cast_fp16 = add(x = var_2190_cast_fp16, y = var_2196_cast_fp16)[name = tensor("query_cast_fp16")]; + tensor var_2205_pad_type_0 = const()[name = tensor("op_2205_pad_type_0"), val = tensor("valid")]; + tensor var_2205_strides_0 = const()[name = tensor("op_2205_strides_0"), val = tensor([1, 1])]; + tensor var_2205_pad_0 = const()[name = tensor("op_2205_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2205_dilations_0 = const()[name = tensor("op_2205_dilations_0"), val = tensor([1, 1])]; + tensor var_2205_groups_0 = const()[name = tensor("op_2205_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57890560))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58185536))), name = tensor("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2205_cast_fp16 = conv(dilations = var_2205_dilations_0, groups = var_2205_groups_0, pad = var_2205_pad_0, pad_type = var_2205_pad_type_0, strides = var_2205_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = tensor("op_2205_cast_fp16")]; + tensor var_2211_pad_type_0 = const()[name = tensor("op_2211_pad_type_0"), val = tensor("valid")]; + tensor var_2211_strides_0 = const()[name = tensor("op_2211_strides_0"), val = tensor([1, 1])]; + tensor var_2211_pad_0 = const()[name = tensor("op_2211_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2211_dilations_0 = const()[name = tensor("op_2211_dilations_0"), val = tensor([1, 1])]; + tensor var_2211_groups_0 = const()[name = tensor("op_2211_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58192896))), name = tensor("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58185664))), shape = tensor([768, 768, 1, 1])]; + tensor var_2211_cast_fp16 = conv(dilations = var_2211_dilations_0, groups = var_2211_groups_0, pad = var_2211_pad_0, pad_type = var_2211_pad_type_0, strides = var_2211_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = tensor("op_2211_cast_fp16")]; + tensor key_cast_fp16 = add(x = var_2205_cast_fp16, y = var_2211_cast_fp16)[name = tensor("key_cast_fp16")]; + tensor var_2221_pad_type_0 = const()[name = tensor("op_2221_pad_type_0"), val = tensor("valid")]; + tensor var_2221_strides_0 = const()[name = tensor("op_2221_strides_0"), val = tensor([1, 1])]; + tensor var_2221_pad_0 = const()[name = tensor("op_2221_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2221_dilations_0 = const()[name = tensor("op_2221_dilations_0"), val = tensor([1, 1])]; + tensor var_2221_groups_0 = const()[name = tensor("op_2221_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58266688))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58561664))), name = tensor("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58561792)))]; + tensor var_2221_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2221_dilations_0, groups = var_2221_groups_0, pad = var_2221_pad_0, pad_type = var_2221_pad_type_0, strides = var_2221_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = tensor("op_2221_cast_fp16")]; + tensor var_2227_pad_type_0 = const()[name = tensor("op_2227_pad_type_0"), val = tensor("valid")]; + tensor var_2227_strides_0 = const()[name = tensor("op_2227_strides_0"), val = tensor([1, 1])]; + tensor var_2227_pad_0 = const()[name = tensor("op_2227_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2227_dilations_0 = const()[name = tensor("op_2227_dilations_0"), val = tensor([1, 1])]; + tensor var_2227_groups_0 = const()[name = tensor("op_2227_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58569728))), name = tensor("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58563392))), shape = tensor([768, 768, 1, 1])]; + tensor var_2227_cast_fp16 = conv(dilations = var_2227_dilations_0, groups = var_2227_groups_0, pad = var_2227_pad_0, pad_type = var_2227_pad_type_0, strides = var_2227_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = tensor("op_2227_cast_fp16")]; + tensor value_cast_fp16 = add(x = var_2221_cast_fp16, y = var_2227_cast_fp16)[name = tensor("value_cast_fp16")]; + tensor var_2231 = const()[name = tensor("op_2231"), val = tensor([1, 12, 64, 1500])]; + tensor mh_q_cast_fp16 = reshape(shape = var_2231, x = query_cast_fp16)[name = tensor("mh_q_cast_fp16")]; + tensor var_2233_to_fp16 = const()[name = tensor("op_2233_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2234_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_2233_to_fp16)[name = tensor("op_2234_cast_fp16")]; + tensor var_2237 = const()[name = tensor("op_2237"), val = tensor([1, 12, 64, 1500])]; + tensor var_2238_cast_fp16 = reshape(shape = var_2237, x = key_cast_fp16)[name = tensor("op_2238_cast_fp16")]; + tensor mh_w_transpose_x_0 = const()[name = tensor("mh_w_transpose_x_0"), val = tensor(true)]; + tensor mh_w_transpose_y_0 = const()[name = tensor("mh_w_transpose_y_0"), val = tensor(false)]; + tensor mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_2234_cast_fp16, y = var_2238_cast_fp16)[name = tensor("mh_w_cast_fp16")]; + tensor var_2241_cast_fp16 = softmax(axis = var_2146, x = mh_w_cast_fp16)[name = tensor("op_2241_cast_fp16")]; + tensor var_2242 = const()[name = tensor("op_2242"), val = tensor([1, 12, 64, 1500])]; + tensor var_2243_cast_fp16 = reshape(shape = var_2242, x = value_cast_fp16)[name = tensor("op_2243_cast_fp16")]; + tensor attn_transpose_x_0 = const()[name = tensor("attn_transpose_x_0"), val = tensor(false)]; + tensor attn_transpose_y_0 = const()[name = tensor("attn_transpose_y_0"), val = tensor(true)]; + tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_2243_cast_fp16, y = var_2241_cast_fp16)[name = tensor("attn_cast_fp16")]; + tensor var_2246 = const()[name = tensor("op_2246"), val = tensor([1, 768, 1, 1500])]; + tensor input_89_cast_fp16 = reshape(shape = var_2246, x = attn_cast_fp16)[name = tensor("input_89_cast_fp16")]; + tensor var_2256_pad_type_0 = const()[name = tensor("op_2256_pad_type_0"), val = tensor("valid")]; + tensor var_2256_strides_0 = const()[name = tensor("op_2256_strides_0"), val = tensor([1, 1])]; + tensor var_2256_pad_0 = const()[name = tensor("op_2256_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2256_dilations_0 = const()[name = tensor("op_2256_dilations_0"), val = tensor([1, 1])]; + tensor var_2256_groups_0 = const()[name = tensor("op_2256_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58643520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58938496))), name = tensor("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58938624)))]; + tensor var_2256_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2256_dilations_0, groups = var_2256_groups_0, pad = var_2256_pad_0, pad_type = var_2256_pad_type_0, strides = var_2256_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = tensor("op_2256_cast_fp16")]; + tensor var_2262_pad_type_0 = const()[name = tensor("op_2262_pad_type_0"), val = tensor("valid")]; + tensor var_2262_strides_0 = const()[name = tensor("op_2262_strides_0"), val = tensor([1, 1])]; + tensor var_2262_pad_0 = const()[name = tensor("op_2262_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2262_dilations_0 = const()[name = tensor("op_2262_dilations_0"), val = tensor([1, 1])]; + tensor var_2262_groups_0 = const()[name = tensor("op_2262_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58947520))), name = tensor("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58940224))), shape = tensor([768, 768, 1, 1])]; + tensor var_2262_cast_fp16 = conv(dilations = var_2262_dilations_0, groups = var_2262_groups_0, pad = var_2262_pad_0, pad_type = var_2262_pad_type_0, strides = var_2262_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = tensor("op_2262_cast_fp16")]; + tensor obj_cast_fp16 = add(x = var_2256_cast_fp16, y = var_2262_cast_fp16)[name = tensor("obj_cast_fp16")]; + tensor inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_cast_fp16)[name = tensor("inputs_47_cast_fp16")]; + tensor out_47_axes_0 = const()[name = tensor("out_47_axes_0"), val = tensor([1])]; + tensor var_2273_to_fp16 = const()[name = tensor("op_2273_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_2273_to_fp16, x = inputs_47_cast_fp16)[name = tensor("out_47_cast_fp16")]; + tensor input_91_gamma_0_to_fp16 = const()[name = tensor("input_91_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59021312)))]; + tensor input_91_beta_0_to_fp16 = const()[name = tensor("input_91_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59022912)))]; + tensor input_91_epsilon_0_to_fp16 = const()[name = tensor("input_91_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_91_cast_fp16 = batch_norm(beta = input_91_beta_0_to_fp16, epsilon = input_91_epsilon_0_to_fp16, gamma = input_91_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = tensor("input_91_cast_fp16")]; + tensor var_2291_pad_type_0 = const()[name = tensor("op_2291_pad_type_0"), val = tensor("valid")]; + tensor var_2291_strides_0 = const()[name = tensor("op_2291_strides_0"), val = tensor([1, 1])]; + tensor var_2291_pad_0 = const()[name = tensor("op_2291_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2291_dilations_0 = const()[name = tensor("op_2291_dilations_0"), val = tensor([1, 1])]; + tensor var_2291_groups_0 = const()[name = tensor("op_2291_groups_0"), val = tensor(1)]; + tensor layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59024512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60204224))), name = tensor("layers_11_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60204352)))]; + tensor var_2291_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_2291_dilations_0, groups = var_2291_groups_0, pad = var_2291_pad_0, pad_type = var_2291_pad_type_0, strides = var_2291_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = tensor("op_2291_cast_fp16")]; + tensor var_2297_pad_type_0 = const()[name = tensor("op_2297_pad_type_0"), val = tensor("valid")]; + tensor var_2297_strides_0 = const()[name = tensor("op_2297_strides_0"), val = tensor([1, 1])]; + tensor var_2297_pad_0 = const()[name = tensor("op_2297_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2297_dilations_0 = const()[name = tensor("op_2297_dilations_0"), val = tensor([1, 1])]; + tensor var_2297_groups_0 = const()[name = tensor("op_2297_groups_0"), val = tensor(1)]; + tensor layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60241728))), name = tensor("layers_11_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60210560))), shape = tensor([3072, 768, 1, 1])]; + tensor var_2297_cast_fp16 = conv(dilations = var_2297_dilations_0, groups = var_2297_groups_0, pad = var_2297_pad_0, pad_type = var_2297_pad_type_0, strides = var_2297_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = tensor("op_2297_cast_fp16")]; + tensor input_93_cast_fp16 = add(x = var_2291_cast_fp16, y = var_2297_cast_fp16)[name = tensor("input_93_cast_fp16")]; + tensor input_mode_0 = const()[name = tensor("input_mode_0"), val = tensor("EXACT")]; + tensor input_cast_fp16 = gelu(mode = input_mode_0, x = input_93_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_2308_pad_type_0 = const()[name = tensor("op_2308_pad_type_0"), val = tensor("valid")]; + tensor var_2308_strides_0 = const()[name = tensor("op_2308_strides_0"), val = tensor([1, 1])]; + tensor var_2308_pad_0 = const()[name = tensor("op_2308_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2308_dilations_0 = const()[name = tensor("op_2308_dilations_0"), val = tensor([1, 1])]; + tensor var_2308_groups_0 = const()[name = tensor("op_2308_groups_0"), val = tensor(1)]; + tensor layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60536704))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61716416))), name = tensor("layers_11_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61716544)))]; + tensor var_2308_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_2308_dilations_0, groups = var_2308_groups_0, pad = var_2308_pad_0, pad_type = var_2308_pad_type_0, strides = var_2308_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_cast_fp16)[name = tensor("op_2308_cast_fp16")]; + tensor var_2314_pad_type_0 = const()[name = tensor("op_2314_pad_type_0"), val = tensor("valid")]; + tensor var_2314_strides_0 = const()[name = tensor("op_2314_strides_0"), val = tensor([1, 1])]; + tensor var_2314_pad_0 = const()[name = tensor("op_2314_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2314_dilations_0 = const()[name = tensor("op_2314_dilations_0"), val = tensor([1, 1])]; + tensor var_2314_groups_0 = const()[name = tensor("op_2314_groups_0"), val = tensor(1)]; + tensor layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61753984))), name = tensor("layers_11_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61718144))), shape = tensor([768, 3072, 1, 1])]; + tensor var_2314_cast_fp16 = conv(dilations = var_2314_dilations_0, groups = var_2314_groups_0, pad = var_2314_pad_0, pad_type = var_2314_pad_type_0, strides = var_2314_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_cast_fp16)[name = tensor("op_2314_cast_fp16")]; + tensor hidden_states_cast_fp16 = add(x = var_2308_cast_fp16, y = var_2314_cast_fp16)[name = tensor("hidden_states_cast_fp16")]; + tensor inputs_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_cast_fp16)[name = tensor("inputs_cast_fp16")]; + tensor out_axes_0 = const()[name = tensor("out_axes_0"), val = tensor([1])]; + tensor var_2329_to_fp16 = const()[name = tensor("op_2329_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_2329_to_fp16, x = inputs_cast_fp16)[name = tensor("out_cast_fp16")]; + tensor encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62048960)))]; + tensor encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62050560)))]; + tensor encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = var_57_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor("encoder_output_embeds_type_fp32_cast_fp16")]; + } -> (encoder_output_embeds); +} \ No newline at end of file