Kikinoking commited on
Commit
45f7328
·
verified ·
1 Parent(s): 7548dee

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +27 -0
README.md CHANGED
@@ -45,3 +45,30 @@ with torch.inference_mode():
45
  output = model.generate(**inputs, max_new_tokens=1)
46
 
47
  print("Answer:", tokenizer.decode(output[0], skip_special_tokens=True))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  output = model.generate(**inputs, max_new_tokens=1)
46
 
47
  print("Answer:", tokenizer.decode(output[0], skip_special_tokens=True))
48
+
49
+ ##How It Was Built
50
+
51
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
52
+ import torch
53
+
54
+ base_id = "aidasvenc/MNLP_M3_mcqa_model"
55
+ qcfg = BitsAndBytesConfig(
56
+ load_in_4bit=True,
57
+ bnb_4bit_quant_type="nf4",
58
+ bnb_4bit_compute_dtype=torch.bfloat16,
59
+ bnb_4bit_use_double_quant=True
60
+ )
61
+
62
+ tokenizer = AutoTokenizer.from_pretrained(base_id)
63
+ model = AutoModelForCausalLM.from_pretrained(
64
+ base_id,
65
+ quantization_config=qcfg,
66
+ device_map="auto",
67
+ torch_dtype="auto"
68
+ )
69
+
70
+ # Push to Hugging Face Hub
71
+ model.push_to_hub("Kikinoking/MNLP_M3_quantized_model", private=True)
72
+ tokenizer.push_to_hub("Kikinoking/MNLP_M3_quantized_model")
73
+
74
+ print("VRAM used (MiB):", torch.cuda.memory_reserved()/1024**2)