Manh Lai commited on
Commit
2262653
·
1 Parent(s): 897662d

update to generate onnx tokenizer

Browse files
Files changed (2) hide show
  1. convert_to_onnx.py +80 -32
  2. tokenizer.json +0 -0
convert_to_onnx.py CHANGED
@@ -1,54 +1,102 @@
1
  from pathlib import Path
2
  import onnx
3
  import shutil
 
4
  from onnxconverter_common import float16
5
  from onnxruntime.quantization import quantize_dynamic, QuantType
6
-
7
  from optimum.onnxruntime import ORTModelForFeatureExtraction
8
  from transformers import AutoTokenizer
 
9
 
10
- # Set model name and output directory
11
  model_name = "dangvantuan/vietnamese-embedding"
12
  output_dir = Path("onnx")
13
  output_dir.mkdir(parents=True, exist_ok=True)
14
 
15
- # -------------------------------------------
16
- # Step 1: Export the model to ONNX (FP32)
17
- # -------------------------------------------
18
- print("Exporting the FP32 model...")
19
  model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
20
  model.save_pretrained(output_dir)
21
 
22
- # Save tokenizer
23
- tokenizer = AutoTokenizer.from_pretrained(model_name)
24
- tokenizer.save_pretrained(output_dir)
25
-
26
- # Define model paths
27
- model_fp32_path = output_dir / "model.onnx"
28
- model_quantized_path = output_dir / "model_quantized.onnx"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- # -------------------------------------------
31
- # Step 2: Quantize to INT8
32
- # -------------------------------------------
33
- print("Quantizing to INT8 (dynamic quantization)...")
34
  quantize_dynamic(
35
- model_input=model_fp32_path.as_posix(),
36
- model_output=model_quantized_path.as_posix(),
37
  weight_type=QuantType.QInt8,
38
  )
39
 
40
- # -------------------------------------------
41
- # Step 3: Move JSON files to parent folder
42
- # -------------------------------------------
43
- print("Moving JSON files to parent folder...")
44
- parent_dir = output_dir.parent
45
- json_files = list(output_dir.glob("*.json"))
46
-
47
- for json_file in json_files:
48
- shutil.move(str(json_file), str(parent_dir / json_file.name))
49
 
50
  print("✅ Conversion complete!")
51
- print(f"Original FP32 model: {model_fp32_path}")
52
- print(f"Quantized INT8 model: {model_quantized_path}")
53
- print(f"Tokenizer files moved to: {[f.name for f in json_files]}")
54
- print(f"ONNX files remain in: {output_dir}")
 
1
  from pathlib import Path
2
  import onnx
3
  import shutil
4
+ import json
5
  from onnxconverter_common import float16
6
  from onnxruntime.quantization import quantize_dynamic, QuantType
 
7
  from optimum.onnxruntime import ORTModelForFeatureExtraction
8
  from transformers import AutoTokenizer
9
+ from tokenizers import Tokenizer
10
 
11
+ # Configuration
12
  model_name = "dangvantuan/vietnamese-embedding"
13
  output_dir = Path("onnx")
14
  output_dir.mkdir(parents=True, exist_ok=True)
15
 
16
+ # --------------------------------------------------
17
+ # Step 1: Export model to ONNX (FP32)
18
+ # --------------------------------------------------
19
+ print("Exporting FP32 model...")
20
  model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
21
  model.save_pretrained(output_dir)
22
 
23
+ # --------------------------------------------------
24
+ # Step 2: Convert tokenizer to JSON format
25
+ # --------------------------------------------------
26
+ print("Processing tokenizer...")
27
+ try:
28
+ # First try to get fast tokenizer directly
29
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
30
+ tokenizer.save_pretrained(output_dir, legacy_format=False)
31
+ print("✓ Saved modern tokenizer.json")
32
+
33
+ except Exception as e:
34
+ print(f"Couldn't create fast tokenizer directly: {e}")
35
+ print("Attempting manual conversion...")
36
+
37
+ # Load slow tokenizer
38
+ slow_tokenizer = AutoTokenizer.from_pretrained(model_name)
39
+
40
+ # Save original files first
41
+ slow_tokenizer.save_pretrained(output_dir)
42
+
43
+ # Convert to fast tokenizer format
44
+ try:
45
+ # Create Tokenizer object from the slow tokenizer
46
+ tokenizer_json = {
47
+ "version": "1.0",
48
+ "truncation": None,
49
+ "padding": None,
50
+ "added_tokens": [],
51
+ "normalizer": {
52
+ "type": "Sequence",
53
+ "normalizers": []
54
+ },
55
+ "pre_tokenizer": {
56
+ "type": "Whitespace"
57
+ },
58
+ "post_processor": None,
59
+ "decoder": None,
60
+ "model": {
61
+ "type": "WordPiece",
62
+ "unk_token": slow_tokenizer.unk_token,
63
+ "sep_token": slow_tokenizer.sep_token,
64
+ "cls_token": slow_tokenizer.cls_token,
65
+ "pad_token": slow_tokenizer.pad_token,
66
+ "mask_token": slow_tokenizer.mask_token,
67
+ "vocab": slow_tokenizer.get_vocab(),
68
+ "max_input_chars_per_word": 100
69
+ }
70
+ }
71
+
72
+ # Save as tokenizer.json
73
+ with open(output_dir / "tokenizer.json", "w", encoding="utf-8") as f:
74
+ json.dump(tokenizer_json, f, ensure_ascii=False, indent=2)
75
+
76
+ print("✓ Manually created tokenizer.json")
77
+
78
+ except Exception as e:
79
+ print(f"Failed to create tokenizer.json: {e}")
80
+ print("Falling back to original tokenizer files")
81
 
82
+ # --------------------------------------------------
83
+ # Step 3: Quantize model to INT8
84
+ # --------------------------------------------------
85
+ print("Quantizing to INT8...")
86
  quantize_dynamic(
87
+ model_input=output_dir / "model.onnx",
88
+ model_output=output_dir / "model_quantized.onnx",
89
  weight_type=QuantType.QInt8,
90
  )
91
 
92
+ # --------------------------------------------------
93
+ # Step 4: Clean up file organization
94
+ # --------------------------------------------------
95
+ print("Organizing files...")
96
+ # Move all JSON files to parent directory
97
+ for json_file in output_dir.glob("*.json"):
98
+ shutil.move(str(json_file), str(Path(".") / json_file.name))
 
 
99
 
100
  print("✅ Conversion complete!")
101
+ print(f"ONNX models saved in: {output_dir}")
102
+ print(f"Tokenizer files moved to project root")
 
 
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff