huihui-ai commited on
Commit
2570c61
·
verified ·
1 Parent(s): 07cc489

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +11 -5
README.md CHANGED
@@ -41,18 +41,24 @@ import torch
41
 
42
  # Load the model and tokenizer
43
  model_name = "huihui-ai/DeepSeek-R1-Distill-Qwen-Coder-32B-Fusion-9010"
44
- quant_config_4 = BitsAndBytesConfig(
45
- load_in_4bit=True,
46
- bnb_4bit_compute_dtype=torch.bfloat16,
47
- bnb_4bit_use_double_quant=True,
 
 
 
 
 
48
  llm_int8_enable_fp32_cpu_offload=True,
 
49
  )
50
 
51
  model = AutoModelForCausalLM.from_pretrained(
52
  model_name,
53
  trust_remote_code=True,
54
  torch_dtype=torch.bfloat16,
55
- quantization_config=quant_config_4,
56
  device_map="auto",
57
  )
58
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
41
 
42
  # Load the model and tokenizer
43
  model_name = "huihui-ai/DeepSeek-R1-Distill-Qwen-Coder-32B-Fusion-9010"
44
+ #quant_config_4 = BitsAndBytesConfig(
45
+ # load_in_4bit=True,
46
+ # bnb_4bit_compute_dtype=torch.bfloat16,
47
+ # bnb_4bit_use_double_quant=True,
48
+ # llm_int8_enable_fp32_cpu_offload=True,
49
+ #)
50
+
51
+ quant_config_8 = BitsAndBytesConfig(
52
+ load_in_8bit=True,
53
  llm_int8_enable_fp32_cpu_offload=True,
54
+ llm_int8_has_fp16_weight=True,
55
  )
56
 
57
  model = AutoModelForCausalLM.from_pretrained(
58
  model_name,
59
  trust_remote_code=True,
60
  torch_dtype=torch.bfloat16,
61
+ quantization_config=quant_config_8,
62
  device_map="auto",
63
  )
64
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)