{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# FinGPT" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Part 1: Preparing the Data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "1.1 Initialize Directories:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import os\n", "import shutil\n", "\n", "jsonl_path = \"data/dataset_new.jsonl\"\n", "save_path = 'data/dataset_new'\n", "\n", "\n", "if os.path.exists(jsonl_path):\n", " os.remove(jsonl_path)\n", "\n", "if os.path.exists(save_path):\n", " shutil.rmtree(save_path)\n", "\n", "directory = \"data\"\n", "if not os.path.exists(directory):\n", " os.makedirs(directory)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "1.2 Load and Prepare Dataset:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dataset({\n", " features: ['input', 'output', 'instruction'],\n", " num_rows: 9543\n", "})" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from datasets import load_dataset\n", "import datasets\n", "\n", "dic = {\n", " 0:\"negative\",\n", " 1:'positive',\n", " 2:'neutral',\n", "}\n", "\n", "tfns = load_dataset('zeroshot/twitter-financial-news-sentiment')\n", "tfns = tfns['train']\n", "tfns = tfns.to_pandas()\n", "tfns['label'] = tfns['label'].apply(lambda x:dic[x])\n", "tfns['instruction'] = 'What is the sentiment of this tweet? Please choose an answer from {negative/neutral/positive}.'\n", "tfns.columns = ['input', 'output', 'instruction']\n", "tfns = datasets.Dataset.from_pandas(tfns)\n", "tfns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "1.3 Concatenate and Shuffle Dataset" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "19086\n" ] }, { "data": { "text/plain": [ "(19086, 3)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tmp_dataset = datasets.concatenate_datasets([tfns]*2)\n", "train_dataset = tmp_dataset\n", "print(tmp_dataset.num_rows)\n", "\n", "all_dataset = train_dataset.shuffle(seed = 42)\n", "all_dataset.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Part 2: Dataset Formatting and Tokenization" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "2.1 Dataset Formatting:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8204ff4d7ae048508ff011ff341df7b3", "version_major": 2, "version_minor": 0 }, "text/plain": [ "formatting..: 0%| | 0/19086 [00:00 dict:\n", " context = f\"Instruction: {example['instruction']}\\n\"\n", " if example.get(\"input\"):\n", " context += f\"Input: {example['input']}\\n\"\n", " context += \"Answer: \"\n", " target = example[\"output\"]\n", " return {\"context\": context, \"target\": target}\n", "\n", "\n", "data_list = []\n", "for item in all_dataset.to_pandas().itertuples():\n", " tmp = {}\n", " tmp[\"instruction\"] = item.instruction\n", " tmp[\"input\"] = item.input\n", " tmp[\"output\"] = item.output\n", " data_list.append(tmp)\n", "\n", "\n", "# save to a jsonl file\n", "with open(\"data/dataset_new.jsonl\", 'w') as f:\n", " for example in tqdm(data_list, desc=\"formatting..\"):\n", " f.write(json.dumps(format_example(example)) + '\\n')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "2.2 Tokenization" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "from transformers import AutoTokenizer, AutoConfig\n", "\n", "model_name = \"THUDM/chatglm2-6b\"\n", "jsonl_path = \"data/dataset_new.jsonl\" # updated path\n", "save_path = 'data/dataset_new' # updated path\n", "max_seq_length = 512\n", "skip_overlength = True\n", "\n", "# The preprocess function tokenizes the prompt and target, combines them into input IDs,\n", "# and then trims or pads the sequence to the maximum sequence length.\n", "def preprocess(tokenizer, config, example, max_seq_length):\n", " prompt = example[\"context\"]\n", " target = example[\"target\"]\n", " prompt_ids = tokenizer.encode(prompt, max_length=max_seq_length, truncation=True)\n", " target_ids = tokenizer.encode(\n", " target,\n", " max_length=max_seq_length,\n", " truncation=True,\n", " add_special_tokens=False)\n", " input_ids = prompt_ids + target_ids + [config.eos_token_id]\n", " return {\"input_ids\": input_ids, \"seq_len\": len(prompt_ids)}\n", "\n", "# The read_jsonl function reads each line from the JSONL file, preprocesses it using the preprocess function,\n", "# and then yields each preprocessed example.\n", "def read_jsonl(path, max_seq_length, skip_overlength=False):\n", " tokenizer = AutoTokenizer.from_pretrained(\n", " model_name, trust_remote_code=True)\n", " config = AutoConfig.from_pretrained(\n", " model_name, trust_remote_code=True, device_map='auto')\n", " with open(path, \"r\") as f:\n", " for line in tqdm(f.readlines()):\n", " example = json.loads(line)\n", " feature = preprocess(tokenizer, config, example, max_seq_length)\n", " if skip_overlength and len(feature[\"input_ids\"]) > max_seq_length:\n", " continue\n", " feature[\"input_ids\"] = feature[\"input_ids\"][:max_seq_length]\n", " yield feature" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "2.3 Save the dataset" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ee7846a30c5d4d59be6bd5e2cf6c1870", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Saving the dataset (0/1 shards): 0%| | 0/19086 [00:00 to the callbacks of this Trainer, but there is already one. The currentlist of callbacks is\n", ":DefaultFlowCallback\n", "TensorBoardCallback\n", "d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\transformers\\optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", " warnings.warn(\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "27521448ffea440ba40770ef24937509", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/954 [00:00 \u001b[39m\u001b[32m63\u001b[39m trainer.train()\n\u001b[32m 64\u001b[39m writer.close()\n\u001b[32m 65\u001b[39m \u001b[38;5;66;03m# save model\u001b[39;00m\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\transformers\\trainer.py:1645\u001b[39m, in \u001b[36mTrainer.train\u001b[39m\u001b[34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[39m\n\u001b[32m 1640\u001b[39m \u001b[38;5;28mself\u001b[39m.model_wrapped = \u001b[38;5;28mself\u001b[39m.model\n\u001b[32m 1642\u001b[39m inner_training_loop = find_executable_batch_size(\n\u001b[32m 1643\u001b[39m \u001b[38;5;28mself\u001b[39m._inner_training_loop, \u001b[38;5;28mself\u001b[39m._train_batch_size, args.auto_find_batch_size\n\u001b[32m 1644\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m1645\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m inner_training_loop(\n\u001b[32m 1646\u001b[39m args=args,\n\u001b[32m 1647\u001b[39m resume_from_checkpoint=resume_from_checkpoint,\n\u001b[32m 1648\u001b[39m trial=trial,\n\u001b[32m 1649\u001b[39m ignore_keys_for_eval=ignore_keys_for_eval,\n\u001b[32m 1650\u001b[39m )\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\transformers\\trainer.py:1938\u001b[39m, in \u001b[36mTrainer._inner_training_loop\u001b[39m\u001b[34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[39m\n\u001b[32m 1935\u001b[39m \u001b[38;5;28mself\u001b[39m.control = \u001b[38;5;28mself\u001b[39m.callback_handler.on_step_begin(args, \u001b[38;5;28mself\u001b[39m.state, \u001b[38;5;28mself\u001b[39m.control)\n\u001b[32m 1937\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m.accelerator.accumulate(model):\n\u001b[32m-> \u001b[39m\u001b[32m1938\u001b[39m tr_loss_step = \u001b[38;5;28mself\u001b[39m.training_step(model, inputs)\n\u001b[32m 1940\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[32m 1941\u001b[39m args.logging_nan_inf_filter\n\u001b[32m 1942\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[32m 1943\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m (torch.isnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch.isinf(tr_loss_step))\n\u001b[32m 1944\u001b[39m ):\n\u001b[32m 1945\u001b[39m \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[32m 1946\u001b[39m tr_loss += tr_loss / (\u001b[32m1\u001b[39m + \u001b[38;5;28mself\u001b[39m.state.global_step - \u001b[38;5;28mself\u001b[39m._globalstep_last_logged)\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\transformers\\trainer.py:2759\u001b[39m, in \u001b[36mTrainer.training_step\u001b[39m\u001b[34m(self, model, inputs)\u001b[39m\n\u001b[32m 2756\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m loss_mb.reduce_mean().detach().to(\u001b[38;5;28mself\u001b[39m.args.device)\n\u001b[32m 2758\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m.compute_loss_context_manager():\n\u001b[32m-> \u001b[39m\u001b[32m2759\u001b[39m loss = \u001b[38;5;28mself\u001b[39m.compute_loss(model, inputs)\n\u001b[32m 2761\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.args.n_gpu > \u001b[32m1\u001b[39m:\n\u001b[32m 2762\u001b[39m loss = loss.mean() \u001b[38;5;66;03m# mean() to average on multi-gpu parallel training\u001b[39;00m\n", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 3\u001b[39m, in \u001b[36mModifiedTrainer.compute_loss\u001b[39m\u001b[34m(self, model, inputs)\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mcompute_loss\u001b[39m(\u001b[38;5;28mself\u001b[39m, model, inputs):\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m model(\n\u001b[32m 4\u001b[39m input_ids=inputs[\u001b[33m\"\u001b[39m\u001b[33minput_ids\u001b[39m\u001b[33m\"\u001b[39m],\n\u001b[32m 5\u001b[39m labels=inputs[\u001b[33m\"\u001b[39m\u001b[33mlabels\u001b[39m\u001b[33m\"\u001b[39m],\n\u001b[32m 6\u001b[39m ).loss\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1773\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1772\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1773\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._call_impl(*args, **kwargs)\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1784\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1779\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1780\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1781\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1782\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1783\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1784\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(*args, **kwargs)\n\u001b[32m 1786\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1787\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\utils\\operations.py:687\u001b[39m, in \u001b[36mconvert_outputs_to_fp32..forward\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 686\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mforward\u001b[39m(*args, **kwargs):\n\u001b[32m--> \u001b[39m\u001b[32m687\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m model_forward(*args, **kwargs)\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\utils\\operations.py:675\u001b[39m, in \u001b[36mConvertOutputsToFp32.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 674\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, *args, **kwargs):\n\u001b[32m--> \u001b[39m\u001b[32m675\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28mself\u001b[39m.model_forward(*args, **kwargs))\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\amp\\autocast_mode.py:44\u001b[39m, in \u001b[36mautocast_decorator..decorate_autocast\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 41\u001b[39m \u001b[38;5;129m@functools\u001b[39m.wraps(func)\n\u001b[32m 42\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mdecorate_autocast\u001b[39m(*args, **kwargs):\n\u001b[32m 43\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[32m---> \u001b[39m\u001b[32m44\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m func(*args, **kwargs)\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\peft\\peft_model.py:1091\u001b[39m, in \u001b[36mPeftModelForCausalLM.forward\u001b[39m\u001b[34m(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)\u001b[39m\n\u001b[32m 1089\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m peft_config.peft_type == PeftType.POLY:\n\u001b[32m 1090\u001b[39m kwargs[\u001b[33m\"\u001b[39m\u001b[33mtask_ids\u001b[39m\u001b[33m\"\u001b[39m] = task_ids\n\u001b[32m-> \u001b[39m\u001b[32m1091\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.base_model(\n\u001b[32m 1092\u001b[39m input_ids=input_ids,\n\u001b[32m 1093\u001b[39m attention_mask=attention_mask,\n\u001b[32m 1094\u001b[39m inputs_embeds=inputs_embeds,\n\u001b[32m 1095\u001b[39m labels=labels,\n\u001b[32m 1096\u001b[39m output_attentions=output_attentions,\n\u001b[32m 1097\u001b[39m output_hidden_states=output_hidden_states,\n\u001b[32m 1098\u001b[39m return_dict=return_dict,\n\u001b[32m 1099\u001b[39m **kwargs,\n\u001b[32m 1100\u001b[39m )\n\u001b[32m 1102\u001b[39m batch_size = _get_batch_size(input_ids, inputs_embeds)\n\u001b[32m 1103\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m attention_mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 1104\u001b[39m \u001b[38;5;66;03m# concat prompt attention mask\u001b[39;00m\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1773\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1772\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1773\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._call_impl(*args, **kwargs)\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1784\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1779\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1780\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1781\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1782\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1783\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1784\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(*args, **kwargs)\n\u001b[32m 1786\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1787\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\peft\\tuners\\tuners_utils.py:160\u001b[39m, in \u001b[36mBaseTuner.forward\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 159\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, *args: Any, **kwargs: Any):\n\u001b[32m--> \u001b[39m\u001b[32m160\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.model.forward(*args, **kwargs)\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py:165\u001b[39m, in \u001b[36madd_hook_to_module..new_forward\u001b[39m\u001b[34m(module, *args, **kwargs)\u001b[39m\n\u001b[32m 163\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 164\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m165\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 166\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m module._hf_hook.post_forward(module, output)\n", "\u001b[36mFile \u001b[39m\u001b[32m~/.cache\\huggingface\\modules\\transformers_modules\\THUDM\\chatglm2-6b\\d2e2d91789248536a747d9ce60642a336444186c\\modeling_chatglm.py:937\u001b[39m, in \u001b[36mChatGLMForConditionalGeneration.forward\u001b[39m\u001b[34m(self, input_ids, position_ids, attention_mask, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, return_last_logit)\u001b[39m\n\u001b[32m 934\u001b[39m use_cache = use_cache \u001b[38;5;28;01mif\u001b[39;00m use_cache \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m.config.use_cache\n\u001b[32m 935\u001b[39m return_dict = return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m.config.use_return_dict\n\u001b[32m--> \u001b[39m\u001b[32m937\u001b[39m transformer_outputs = \u001b[38;5;28mself\u001b[39m.transformer(\n\u001b[32m 938\u001b[39m input_ids=input_ids,\n\u001b[32m 939\u001b[39m position_ids=position_ids,\n\u001b[32m 940\u001b[39m attention_mask=attention_mask,\n\u001b[32m 941\u001b[39m past_key_values=past_key_values,\n\u001b[32m 942\u001b[39m inputs_embeds=inputs_embeds,\n\u001b[32m 943\u001b[39m use_cache=use_cache,\n\u001b[32m 944\u001b[39m output_hidden_states=output_hidden_states,\n\u001b[32m 945\u001b[39m return_dict=return_dict,\n\u001b[32m 946\u001b[39m )\n\u001b[32m 948\u001b[39m hidden_states = transformer_outputs[\u001b[32m0\u001b[39m]\n\u001b[32m 949\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m return_last_logit:\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1773\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1772\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1773\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._call_impl(*args, **kwargs)\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1784\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1779\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1780\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1781\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1782\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1783\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1784\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(*args, **kwargs)\n\u001b[32m 1786\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1787\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py:165\u001b[39m, in \u001b[36madd_hook_to_module..new_forward\u001b[39m\u001b[34m(module, *args, **kwargs)\u001b[39m\n\u001b[32m 163\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 164\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m165\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 166\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m module._hf_hook.post_forward(module, output)\n", "\u001b[36mFile \u001b[39m\u001b[32m~/.cache\\huggingface\\modules\\transformers_modules\\THUDM\\chatglm2-6b\\d2e2d91789248536a747d9ce60642a336444186c\\modeling_chatglm.py:830\u001b[39m, in \u001b[36mChatGLMModel.forward\u001b[39m\u001b[34m(self, input_ids, position_ids, attention_mask, full_attention_mask, past_key_values, inputs_embeds, use_cache, output_hidden_states, return_dict)\u001b[39m\n\u001b[32m 827\u001b[39m rotary_pos_emb = rotary_pos_emb.transpose(\u001b[32m0\u001b[39m, \u001b[32m1\u001b[39m).contiguous()\n\u001b[32m 829\u001b[39m \u001b[38;5;66;03m# Run encoder.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m830\u001b[39m hidden_states, presents, all_hidden_states, all_self_attentions = \u001b[38;5;28mself\u001b[39m.encoder(\n\u001b[32m 831\u001b[39m inputs_embeds, full_attention_mask, rotary_pos_emb=rotary_pos_emb,\n\u001b[32m 832\u001b[39m kv_caches=past_key_values, use_cache=use_cache, output_hidden_states=output_hidden_states\n\u001b[32m 833\u001b[39m )\n\u001b[32m 835\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m return_dict:\n\u001b[32m 836\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtuple\u001b[39m(v \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m [hidden_states, presents, all_hidden_states, all_self_attentions] \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m)\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1773\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1772\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1773\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._call_impl(*args, **kwargs)\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1784\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1779\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1780\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1781\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1782\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1783\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1784\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(*args, **kwargs)\n\u001b[32m 1786\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1787\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py:165\u001b[39m, in \u001b[36madd_hook_to_module..new_forward\u001b[39m\u001b[34m(module, *args, **kwargs)\u001b[39m\n\u001b[32m 163\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 164\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m165\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 166\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m module._hf_hook.post_forward(module, output)\n", "\u001b[36mFile \u001b[39m\u001b[32m~/.cache\\huggingface\\modules\\transformers_modules\\THUDM\\chatglm2-6b\\d2e2d91789248536a747d9ce60642a336444186c\\modeling_chatglm.py:631\u001b[39m, in \u001b[36mGLMTransformer.forward\u001b[39m\u001b[34m(self, hidden_states, attention_mask, rotary_pos_emb, kv_caches, use_cache, output_hidden_states)\u001b[39m\n\u001b[32m 629\u001b[39m layer = \u001b[38;5;28mself\u001b[39m._get_layer(index)\n\u001b[32m 630\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.gradient_checkpointing \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m.training:\n\u001b[32m--> \u001b[39m\u001b[32m631\u001b[39m layer_ret = torch.utils.checkpoint.checkpoint(\n\u001b[32m 632\u001b[39m layer,\n\u001b[32m 633\u001b[39m hidden_states,\n\u001b[32m 634\u001b[39m attention_mask,\n\u001b[32m 635\u001b[39m rotary_pos_emb,\n\u001b[32m 636\u001b[39m kv_caches[index],\n\u001b[32m 637\u001b[39m use_cache\n\u001b[32m 638\u001b[39m )\n\u001b[32m 639\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 640\u001b[39m layer_ret = layer(\n\u001b[32m 641\u001b[39m hidden_states,\n\u001b[32m 642\u001b[39m attention_mask,\n\u001b[32m (...)\u001b[39m\u001b[32m 645\u001b[39m use_cache=use_cache\n\u001b[32m 646\u001b[39m )\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\_compile.py:53\u001b[39m, in \u001b[36m_disable_dynamo..inner\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 50\u001b[39m disable_fn = torch._dynamo.disable(fn, recursive, wrapping=\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[32m 51\u001b[39m fn.__dynamo_disable = disable_fn \u001b[38;5;66;03m# type: ignore[attr-defined]\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m53\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m disable_fn(*args, **kwargs)\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\_dynamo\\eval_frame.py:1005\u001b[39m, in \u001b[36mDisableContext.__call__.._fn\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 1003\u001b[39m _maybe_set_eval_frame(_callback_from_stance(\u001b[38;5;28mself\u001b[39m.callback))\n\u001b[32m 1004\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1005\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m fn(*args, **kwargs)\n\u001b[32m 1006\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m 1007\u001b[39m set_eval_frame(\u001b[38;5;28;01mNone\u001b[39;00m)\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\utils\\checkpoint.py:488\u001b[39m, in \u001b[36mcheckpoint\u001b[39m\u001b[34m(function, use_reentrant, context_fn, determinism_check, debug, *args, **kwargs)\u001b[39m\n\u001b[32m 483\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m context_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m noop_context_fn \u001b[38;5;129;01mor\u001b[39;00m debug \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m:\n\u001b[32m 484\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 485\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mPassing `context_fn` or `debug` is only supported when \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 486\u001b[39m \u001b[33m\"\u001b[39m\u001b[33muse_reentrant=False.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 487\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m488\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m CheckpointFunction.apply(function, preserve, *args)\n\u001b[32m 489\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 490\u001b[39m gen = _checkpoint_without_reentrant_generator(\n\u001b[32m 491\u001b[39m function, preserve, context_fn, determinism_check, debug, *args, **kwargs\n\u001b[32m 492\u001b[39m )\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\autograd\\function.py:581\u001b[39m, in \u001b[36mFunction.apply\u001b[39m\u001b[34m(cls, *args, **kwargs)\u001b[39m\n\u001b[32m 578\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m torch._C._are_functorch_transforms_active():\n\u001b[32m 579\u001b[39m \u001b[38;5;66;03m# See NOTE: [functorch vjp and autograd interaction]\u001b[39;00m\n\u001b[32m 580\u001b[39m args = _functorch.utils.unwrap_dead_wrappers(args)\n\u001b[32m--> \u001b[39m\u001b[32m581\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m().apply(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 583\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_setup_ctx_defined:\n\u001b[32m 584\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[32m 585\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mIn order to use an autograd.Function with functorch transforms \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 586\u001b[39m \u001b[33m\"\u001b[39m\u001b[33m(vmap, grad, jvp, jacrev, ...), it must override the setup_context \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 587\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mstaticmethod. For more details, please see \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 588\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mhttps://pytorch.org/docs/main/notes/extending.func.html\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 589\u001b[39m )\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\utils\\checkpoint.py:262\u001b[39m, in \u001b[36mCheckpointFunction.forward\u001b[39m\u001b[34m(ctx, run_function, preserve_rng_state, *args)\u001b[39m\n\u001b[32m 259\u001b[39m ctx.save_for_backward(*tensor_inputs)\n\u001b[32m 261\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m torch.no_grad():\n\u001b[32m--> \u001b[39m\u001b[32m262\u001b[39m outputs = run_function(*args)\n\u001b[32m 263\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m outputs\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1773\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m 1772\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1773\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._call_impl(*args, **kwargs)\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1784\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 1779\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m 1780\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m 1781\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m 1782\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m 1783\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1784\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(*args, **kwargs)\n\u001b[32m 1786\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1787\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py:165\u001b[39m, in \u001b[36madd_hook_to_module..new_forward\u001b[39m\u001b[34m(module, *args, **kwargs)\u001b[39m\n\u001b[32m 163\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 164\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m165\u001b[39m output = module._old_forward(*args, **kwargs)\n\u001b[32m 166\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m module._hf_hook.post_forward(module, output)\n", "\u001b[36mFile \u001b[39m\u001b[32m~/.cache\\huggingface\\modules\\transformers_modules\\THUDM\\chatglm2-6b\\d2e2d91789248536a747d9ce60642a336444186c\\modeling_chatglm.py:562\u001b[39m, in \u001b[36mGLMBlock.forward\u001b[39m\u001b[34m(self, hidden_states, attention_mask, rotary_pos_emb, kv_cache, use_cache)\u001b[39m\n\u001b[32m 559\u001b[39m layernorm_input = residual + layernorm_input\n\u001b[32m 561\u001b[39m \u001b[38;5;66;03m# Layer norm post the self attention.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m562\u001b[39m layernorm_output = \u001b[38;5;28mself\u001b[39m.post_attention_layernorm(layernorm_input)\n\u001b[32m 564\u001b[39m \u001b[38;5;66;03m# MLP.\u001b[39;00m\n\u001b[32m 565\u001b[39m mlp_output = \u001b[38;5;28mself\u001b[39m.mlp(layernorm_output)\n", "\u001b[36mFile \u001b[39m\u001b[32md:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1949\u001b[39m, in \u001b[36mModule.__getattr__\u001b[39m\u001b[34m(self, name)\u001b[39m\n\u001b[32m 1944\u001b[39m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks = OrderedDict()\n\u001b[32m 1946\u001b[39m \u001b[38;5;66;03m# It is crucial that the return type is not annotated as `Any`, otherwise type checking\u001b[39;00m\n\u001b[32m 1947\u001b[39m \u001b[38;5;66;03m# on `torch.nn.Module` and all its subclasses is largely disabled as a result. See:\u001b[39;00m\n\u001b[32m 1948\u001b[39m \u001b[38;5;66;03m# https://github.com/pytorch/pytorch/pull/115074\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1949\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__getattr__\u001b[39m(\u001b[38;5;28mself\u001b[39m, name: \u001b[38;5;28mstr\u001b[39m) -> Union[Tensor, \u001b[33m\"\u001b[39m\u001b[33mModule\u001b[39m\u001b[33m\"\u001b[39m]:\n\u001b[32m 1950\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33m_parameters\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m.\u001b[34m__dict__\u001b[39m:\n\u001b[32m 1951\u001b[39m _parameters = \u001b[38;5;28mself\u001b[39m.\u001b[34m__dict__\u001b[39m[\u001b[33m\"\u001b[39m\u001b[33m_parameters\u001b[39m\u001b[33m\"\u001b[39m]\n", "\u001b[31mKeyboardInterrupt\u001b[39m: " ] } ], "source": [ "class ModifiedTrainer(Trainer):\n", " def compute_loss(self, model, inputs):\n", " return model(\n", " input_ids=inputs[\"input_ids\"],\n", " labels=inputs[\"labels\"],\n", " ).loss\n", "\n", " def prediction_step(self, model: torch.nn.Module, inputs, prediction_loss_only: bool, ignore_keys = None):\n", " with torch.no_grad():\n", " res = model(\n", " input_ids=inputs[\"input_ids\"].to(model.device),\n", " labels=inputs[\"labels\"].to(model.device),\n", " ).loss\n", " return (res, None, None)\n", "\n", " def save_model(self, output_dir=None, _internal_call=False):\n", " from transformers.trainer import TRAINING_ARGS_NAME\n", "\n", " os.makedirs(output_dir, exist_ok=True)\n", " torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME))\n", " saved_params = {\n", " k: v.to(\"cpu\") for k, v in self.model.named_parameters() if v.requires_grad\n", " }\n", " torch.save(saved_params, os.path.join(output_dir, \"adapter_model.bin\"))\n", "\n", "def data_collator(features: list) -> dict:\n", " len_ids = [len(feature[\"input_ids\"]) for feature in features]\n", " longest = max(len_ids)\n", " input_ids = []\n", " labels_list = []\n", " for ids_l, feature in sorted(zip(len_ids, features), key=lambda x: -x[0]):\n", " ids = feature[\"input_ids\"]\n", " seq_len = feature[\"seq_len\"]\n", " labels = (\n", " [tokenizer.pad_token_id] * (seq_len - 1) + ids[(seq_len - 1) :] + [tokenizer.pad_token_id] * (longest - ids_l)\n", " )\n", " ids = ids + [tokenizer.pad_token_id] * (longest - ids_l)\n", " _ids = torch.LongTensor(ids)\n", " labels_list.append(torch.LongTensor(labels))\n", " input_ids.append(_ids)\n", " input_ids = torch.stack(input_ids)\n", " labels = torch.stack(labels_list)\n", " return {\n", " \"input_ids\": input_ids,\n", " \"labels\": labels,\n", " }\n", "\n", "from torch.utils.tensorboard import SummaryWriter\n", "from transformers.integrations import TensorBoardCallback\n", "\n", "# Train\n", "# Took about 10 compute units\n", "# Took 1 hour to train\n", "writer = SummaryWriter()\n", "trainer = ModifiedTrainer(\n", " model=model,\n", " args=training_args, # Trainer args\n", " train_dataset=dataset[\"train\"], # Training set\n", " eval_dataset=dataset[\"test\"], # Testing set\n", " data_collator=data_collator, # Data Collator\n", " callbacks=[TensorBoardCallback(writer)],\n", ")\n", "trainer.train()\n", "writer.close()\n", "# save model\n", "# model.save_pretrained(training_args.output_dir)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Part 5: Inference and Benchmarks using FinGPT" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 5.1 Load the model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Path exists.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f632e66d18914e13a9febb54f4e9ee42", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/7 [00:00\n", " res = test_tfns(model, tokenizer, batch_size = batch_size)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\fingpt\\tfns.py\", line 62, in test_tfns\n", " res = model.generate(**tokens, max_length=512)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\peft\\peft_model.py\", line 1148, in generate\n", " outputs = self.base_model.generate(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\utils\\_contextlib.py\", line 120, in decorate_context\n", " return func(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\transformers\\generation\\utils.py\", line 1522, in generate\n", " return self.greedy_search(\n", " ^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\transformers\\generation\\utils.py\", line 2339, in greedy_search\n", " outputs = self(\n", " ^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1773, in _wrapped_call_impl\n", " return self._call_impl(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1784, in _call_impl\n", " return forward_call(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py\", line 165, in new_forward\n", " output = module._old_forward(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"C:\\Users\\23524/.cache\\huggingface\\modules\\transformers_modules\\THUDM\\chatglm2-6b\\d2e2d91789248536a747d9ce60642a336444186c\\modeling_chatglm.py\", line 937, in forward\n", " transformer_outputs = self.transformer(\n", " ^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1773, in _wrapped_call_impl\n", " return self._call_impl(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1784, in _call_impl\n", " return forward_call(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py\", line 165, in new_forward\n", " output = module._old_forward(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"C:\\Users\\23524/.cache\\huggingface\\modules\\transformers_modules\\THUDM\\chatglm2-6b\\d2e2d91789248536a747d9ce60642a336444186c\\modeling_chatglm.py\", line 807, in forward\n", " inputs_embeds = self.embedding(input_ids)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1773, in _wrapped_call_impl\n", " return self._call_impl(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1784, in _call_impl\n", " return forward_call(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py\", line 165, in new_forward\n", " output = module._old_forward(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"C:\\Users\\23524/.cache\\huggingface\\modules\\transformers_modules\\THUDM\\chatglm2-6b\\d2e2d91789248536a747d9ce60642a336444186c\\modeling_chatglm.py\", line 723, in forward\n", " words_embeddings = self.word_embeddings(input_ids)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1773, in _wrapped_call_impl\n", " return self._call_impl(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1784, in _call_impl\n", " return forward_call(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py\", line 165, in new_forward\n", " output = module._old_forward(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\sparse.py\", line 192, in forward\n", " return F.embedding(\n", " ^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\functional.py\", line 2546, in embedding\n", " return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", "torch.AcceleratorError: CUDA error: out of memory\n", "Search for `cudaErrorMemoryAllocation' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information.\n", "CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.\n", "For debugging consider passing CUDA_LAUNCH_BLOCKING=1\n", "Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.\n", "\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 2176, in showtraceback\n", " stb = self.InteractiveTB.structured_traceback(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 1182, in structured_traceback\n", " return FormattedTB.structured_traceback(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 1053, in structured_traceback\n", " return VerboseTB.structured_traceback(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 861, in structured_traceback\n", " formatted_exceptions: list[list[str]] = self.format_exception_as_a_whole(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 773, in format_exception_as_a_whole\n", " frames.append(self.format_record(record))\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 651, in format_record\n", " _format_traceback_lines(\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\tbtools.py\", line 99, in _format_traceback_lines\n", " line = stack_line.render(pygmented=has_colors).rstrip(\"\\n\") + \"\\n\"\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\stack_data\\core.py\", line 360, in render\n", " start_line, lines = self.frame_info._pygmented_scope_lines\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\stack_data\\utils.py\", line 145, in cached_property_wrapper\n", " value = obj.__dict__[self.func.__name__] = self.func(obj)\n", " ^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\stack_data\\core.py\", line 780, in _pygmented_scope_lines\n", " lines = _pygmented_with_ranges(formatter, code, ranges)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\stack_data\\utils.py\", line 165, in _pygmented_with_ranges\n", " return pygments.highlight(code, lexer, formatter).splitlines()\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\pygments\\__init__.py\", line 82, in highlight\n", " return format(lex(code, lexer), formatter, outfile)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\pygments\\__init__.py\", line 64, in format\n", " formatter.format(tokens, realoutfile)\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\pygments\\formatters\\terminal256.py\", line 250, in format\n", " return Formatter.format(self, tokensource, outfile)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\pygments\\formatter.py\", line 124, in format\n", " return self.format_unencoded(tokensource, outfile)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\pygments\\formatters\\terminal256.py\", line 256, in format_unencoded\n", " for ttype, value in tokensource:\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\stack_data\\utils.py\", line 158, in get_tokens\n", " for ttype, value in super().get_tokens(text):\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\pygments\\lexer.py\", line 270, in streamer\n", " for _, t, v in self.get_tokens_unprocessed(text):\n", " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\pygments\\lexer.py\", line 712, in get_tokens_unprocessed\n", " m = rexmatch(text, pos)\n", " ^^^^^^^^^^^^^^^^^^^\n", "MemoryError\n" ] } ], "source": [ "batch_size = 8\n", "\n", "# TFNS Test Set, len 2388\n", "# Available: 84.85 compute units\n", "res = test_tfns(model, tokenizer, batch_size = batch_size)\n", "# Available: 83.75 compute units\n", "# Took about 1 compute unite to inference\n", "\n", "\n", "# FPB, len 1212\n", "# res = test_fpb(model, tokenizer, batch_size = batch_size)\n", "\n", "# FiQA, len 275\n", "# res = test_fiqa(model, tokenizer, prompt_fun = add_instructions, batch_size = batch_size)\n", "\n", "# NWGI, len 4047\n", "# res = test_nwgi(model, tokenizer, batch_size = batch_size)" ] } ], "metadata": { "kernelspec": { "display_name": "fingpt-env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 2 }