|
from pathlib import Path |
|
import onnx |
|
import shutil |
|
import json |
|
from onnxconverter_common import float16 |
|
from onnxruntime.quantization import quantize_dynamic, QuantType |
|
from optimum.onnxruntime import ORTModelForFeatureExtraction |
|
from transformers import AutoTokenizer |
|
from tokenizers import Tokenizer |
|
|
|
|
|
model_name = "dangvantuan/vietnamese-embedding" |
|
output_dir = Path("onnx") |
|
output_dir.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
|
print("Exporting FP32 model...") |
|
model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True) |
|
model.save_pretrained(output_dir) |
|
|
|
|
|
|
|
|
|
print("Processing tokenizer...") |
|
try: |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) |
|
tokenizer.save_pretrained(output_dir, legacy_format=False) |
|
print("β Saved modern tokenizer.json") |
|
|
|
except Exception as e: |
|
print(f"Couldn't create fast tokenizer directly: {e}") |
|
print("Attempting manual conversion...") |
|
|
|
|
|
slow_tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
slow_tokenizer.save_pretrained(output_dir) |
|
|
|
|
|
try: |
|
|
|
tokenizer_json = { |
|
"version": "1.0", |
|
"truncation": None, |
|
"padding": None, |
|
"added_tokens": [], |
|
"normalizer": { |
|
"type": "Sequence", |
|
"normalizers": [] |
|
}, |
|
"pre_tokenizer": { |
|
"type": "Whitespace" |
|
}, |
|
"post_processor": None, |
|
"decoder": None, |
|
"model": { |
|
"type": "WordPiece", |
|
"unk_token": slow_tokenizer.unk_token, |
|
"sep_token": slow_tokenizer.sep_token, |
|
"cls_token": slow_tokenizer.cls_token, |
|
"pad_token": slow_tokenizer.pad_token, |
|
"mask_token": slow_tokenizer.mask_token, |
|
"vocab": slow_tokenizer.get_vocab(), |
|
"max_input_chars_per_word": 100 |
|
} |
|
} |
|
|
|
|
|
with open(output_dir / "tokenizer.json", "w", encoding="utf-8") as f: |
|
json.dump(tokenizer_json, f, ensure_ascii=False, indent=2) |
|
|
|
print("β Manually created tokenizer.json") |
|
|
|
except Exception as e: |
|
print(f"Failed to create tokenizer.json: {e}") |
|
print("Falling back to original tokenizer files") |
|
|
|
|
|
|
|
|
|
print("Quantizing to INT8...") |
|
quantize_dynamic( |
|
model_input=output_dir / "model.onnx", |
|
model_output=output_dir / "model_quantized.onnx", |
|
weight_type=QuantType.QInt8, |
|
) |
|
|
|
|
|
|
|
|
|
print("Organizing files...") |
|
|
|
for json_file in output_dir.glob("*.json"): |
|
shutil.move(str(json_file), str(Path(".") / json_file.name)) |
|
|
|
print("β
Conversion complete!") |
|
print(f"ONNX models saved in: {output_dir}") |
|
print(f"Tokenizer files moved to project root") |