Spaces:

darrenphodgson76
/

SmolLM2-1.7B-Instruct-Bussiness-Analysis

Paused

App Files Files Community

darrenphodgson76 commited on Apr 1

Commit

bce314a

·

verified ·

1 Parent(s): c754b33

Upload 3 files

Files changed (3) hide show

app.py +18 -0
requirements.txt +6 -0
train.py +60 -0

app.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import gradio as gr
+import subprocess
+def run_training():
+    # Execute train.py and capture its output
+    result = subprocess.run(["python", "train.py"], capture_output=True, text=True)
+    return result.stdout + "\n" + result.stderr
+# Create a Gradio interface with no inputs and a text output for logs
+iface = gr.Interface(
+    fn=run_training,
+    inputs=[],
+    outputs="text",
+    title="LLaMA LoRA Fine-Tuning",
+    description="Click the button below to start fine-tuning the LLaMA 3.2 3B Instruct model using PEFT/LoRA."
+)
+iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+unsloth
+transformers
+datasets
+pandas
+torch
+trl

train.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import unsloth  # must be first
+import pandas as pd
+import torch
+from datasets import Dataset
+from transformers import TrainingArguments
+from unsloth import FastLanguageModel
+from trl import SFTTrainer  # ✅ now works because we added 'trl'
+# Load and format your dataset
+df = pd.read_csv("data.csv")
+df["text"] = df.apply(lambda row: f"### Instruction:\n{row['instruction']}\n\n### Response:\n{row['response']}\n", axis=1)
+dataset = Dataset.from_pandas(df[["text"]])
+# Load Unsloth model
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name = "unsloth/Llama-3.2-3B-Instruct",
+    max_seq_length = 2048,
+    dtype = torch.float16,
+    load_in_4bit = True,
+)
+# Apply LoRA without task_type
+model = FastLanguageModel.get_peft_model(
+    model,
+    r = 8,
+    lora_alpha = 32,
+    lora_dropout = 0.05,
+    bias = "none",
+)
+# Tokenize text
+def tokenize(example):
+    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)
+tokenized_dataset = dataset.map(tokenize, batched=True)
+# Set up training
+training_args = TrainingArguments(
+    output_dir = "./lora-finetuned",
+    per_device_train_batch_size = 2,
+    num_train_epochs = 3,
+    learning_rate = 2e-4,
+    logging_steps = 10,
+    save_steps = 100,
+    fp16 = True,
+)
+# Train
+trainer = SFTTrainer(
+    model = model,
+    tokenizer = tokenizer,
+    args = training_args,
+    train_dataset = tokenized_dataset,
+)
+trainer.train()
+# Save the fine-tuned LoRA adapter
+model.save_pretrained("./lora-finetuned")