bosbos commited on
Commit
9446ebd
·
verified ·
1 Parent(s): 83e877d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +78 -0
README.md CHANGED
@@ -48,6 +48,84 @@ sequences = pipeline(
48
  for seq in sequences:
49
  print(f"Result: {seq['generated_text']}")
50
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  Output:
53
  >"Prédiction" is a noun that refers to the act of making a forecast or an estimate of something that will happen in the future. It can also refer to the result of such a forecast or estimate.
 
48
  for seq in sequences:
49
  print(f"Result: {seq['generated_text']}")
50
  ```
51
+ ``` python
52
+ # !pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7
53
+
54
+ import os
55
+ import torch
56
+ from datasets import load_dataset
57
+ from transformers import (
58
+ AutoModelForCausalLM,
59
+ AutoTokenizer,
60
+ BitsAndBytesConfig,
61
+ Trainer,
62
+ HfArgumentParser,
63
+ TrainingArguments,
64
+ pipeline,
65
+ logging,
66
+ )
67
+ from peft import LoraConfig, PeftModel
68
+ from trl import SFTTrainer
69
+
70
+ ###############################################################################
71
+ # bitsandbytes parameters
72
+ ################################################################################
73
+
74
+ # Activate 4-bit precision base model loading
75
+ use_4bit = True
76
+
77
+ # Compute dtype for 4-bit base models
78
+ bnb_4bit_compute_dtype = "float16"
79
+
80
+ # Quantization type (fp4 or nf4)
81
+ bnb_4bit_quant_type = "nf4"
82
+
83
+ # Activate nested quantization for 4-bit base models (double quantization)
84
+ use_nested_quant = False
85
+
86
+ ################################################################################
87
+ # SFT parameters
88
+ ################################################################################
89
+
90
+ # Maximum sequence length to use
91
+ max_seq_length = None
92
+
93
+ # Pack multiple short examples in the same input sequence to increase efficiency
94
+ packing = False
95
+
96
+ # Load the entire model on the GPU 0
97
+ device_map = {"": 0}
98
+
99
+ model_name="bosbos/bosbos-2-7b"
100
+ # Load tokenizer and model with QLoRA configuration
101
+ compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
102
+
103
+ bnb_config = BitsAndBytesConfig(
104
+ load_in_4bit=use_4bit,
105
+ bnb_4bit_quant_type=bnb_4bit_quant_type,
106
+ bnb_4bit_compute_dtype=compute_dtype,
107
+ bnb_4bit_use_double_quant=use_nested_quant,
108
+ )
109
+ # Load base model
110
+ model = AutoModelForCausalLM.from_pretrained(
111
+ model_name,
112
+ quantization_config=bnb_config,
113
+ device_map=device_map
114
+ )
115
+ model.config.use_cache = False
116
+ model.config.pretraining_tp = 1
117
+
118
+ # Load LLaMA tokenizer
119
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
120
+ tokenizer.pad_token = tokenizer.eos_token
121
+ tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training
122
+
123
+ # Run text generation pipeline with our next model
124
+ prompt = "what is prediction in frensh ?"
125
+ pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
126
+ result = pipe(f"<s>[INST] {prompt} [/INST]")
127
+ print(result[0]['generated_text'])
128
+ ```
129
 
130
  Output:
131
  >"Prédiction" is a noun that refers to the act of making a forecast or an estimate of something that will happen in the future. It can also refer to the result of such a forecast or estimate.