SekoiaTree
/

MNLP_quantized_model_scratch

Text Generation

text-generation-inference

8-bit precision

compressed-tensors

Model card Files Files and versions

SekoiaTree commited on 12 days ago

Commit

fc35f27

·

verified ·

1 Parent(s): f2116a9

Upload Qwen3ForCausalLM

Files changed (2) hide show

config.json +13 -30
model.safetensors +2 -2

config.json CHANGED Viewed

@@ -48,36 +48,19 @@
   "num_hidden_layers": 28,
   "num_key_value_heads": 8,
   "quantization_config": {
-    "config_groups": {
-      "group_0": {
-        "input_activations": null,
-        "output_activations": null,
-        "targets": [
-          "Linear"
-        ],
-        "weights": {
-          "actorder": null,
-          "block_structure": null,
-          "dynamic": false,
-          "group_size": null,
-          "num_bits": 8,
-          "observer": "minmax",
-          "observer_kwargs": {},
-          "strategy": "channel",
-          "symmetric": true,
-          "type": "int"
-        }
-      }
-    },
-    "format": "pack-quantized",
-    "global_compression_ratio": null,
-    "ignore": [
-      "lm_head"
-    ],
-    "kv_cache_scheme": null,
-    "quant_method": "compressed-tensors",
-    "quantization_status": "compressed",
-    "sparsity_config": {}
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,

   "num_hidden_layers": 28,
   "num_key_value_heads": 8,
   "quantization_config": {
+    "_load_in_4bit": true,
+    "_load_in_8bit": false,
+    "bnb_4bit_compute_dtype": "float32",
+    "bnb_4bit_quant_storage": "uint8",
+    "bnb_4bit_quant_type": "fp4",
+    "bnb_4bit_use_double_quant": false,
+    "llm_int8_enable_fp32_cpu_offload": false,
+    "llm_int8_has_fp16_weight": false,
+    "llm_int8_skip_modules": null,
+    "llm_int8_threshold": 6.0,
+    "load_in_4bit": true,
+    "load_in_8bit": false,
+    "quant_method": "bitsandbytes"
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2809587d85194d1a0c17bc2cb65e1377525b2caa028a68573952508eec9b0e4
-size 752467504

 version https://git-lfs.github.com/spec/v1
+oid sha256:29c472906a077e3bdfc9d392b4ec01b26b699ce093559c330e727d469e972354
+size 559155940