{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# t5 training for combined concatenated outputs (thing + property) \n", "\n", "refer to `t5_train_tp.py` and `guide_for_tp.md` for faster training workflow" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "90f850a9e8324109808e45e40f0eea47", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/6260 [00:00\n", " \n", " \n", " [1800/3920 13:48 < 16:16, 2.17 it/s, Epoch 36/80]\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining LossValidation LossBleu
2002.6543000.11238026.397731
4000.1066000.03533587.137364
6000.0446000.02296489.884682
8000.0263000.01822086.274312
10000.0173000.01625286.389477
12000.0124000.01565194.416285
14000.0115000.01483391.596509
16000.0088000.01516891.629519
18000.0069000.01504295.375351

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/transformers/generation/utils.py:1141: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n", "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n", " warnings.warn('Was asked to gather along dimension 0, but all '\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/transformers/generation/utils.py:1141: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n", "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n", " warnings.warn('Was asked to gather along dimension 0, but all '\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/transformers/generation/utils.py:1141: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n", "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n", " warnings.warn('Was asked to gather along dimension 0, but all '\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/transformers/generation/utils.py:1141: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n", "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n", " warnings.warn('Was asked to gather along dimension 0, but all '\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/transformers/generation/utils.py:1141: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n", "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n", " warnings.warn('Was asked to gather along dimension 0, but all '\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/transformers/generation/utils.py:1141: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n", "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n", " warnings.warn('Was asked to gather along dimension 0, but all '\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/transformers/generation/utils.py:1141: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n", "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n", " warnings.warn('Was asked to gather along dimension 0, but all '\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/transformers/generation/utils.py:1141: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n", "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n", " warnings.warn('Was asked to gather along dimension 0, but all '\n", "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/transformers/generation/utils.py:1141: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n", "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n", "There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].\n" ] }, { "ename": "", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", "\u001b[1;31mClick here for more info. \n", "\u001b[1;31mView Jupyter log for further details." ] } ], "source": [ "from datasets import load_from_disk\n", "import json\n", "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer, EarlyStoppingCallback\n", "import evaluate\n", "import numpy as np\n", "import os\n", "\n", "model_name = \"facebook/bart-base\"\n", "train_epochs = 80\n", "\n", "# Load mode configuration\n", "with open(\"mode.json\", \"r\") as json_file:\n", " mode_dict = json.load(json_file)\n", "\n", "mode_dict.update({\"model\": model_name, \"train_epochs\": train_epochs})\n", "fold_group = mode_dict.get(\"fold_group\")\n", "\n", "with open(\"mode.json\", \"w\") as json_file:\n", " json.dump(mode_dict, json_file)\n", "\n", "mode = mode_dict.get(\"mode\", \"default_value\")\n", "file_path = f'combined_data/{mode}/{fold_group}'\n", "split_datasets = load_from_disk(file_path)\n", "\n", "# Load tokenizer and add special tokens\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "additional_special_tokens = [\n", " \"\", \"\", \"\", \"\",\n", " \"\", \"\", \"\", \"\", \n", " \"\", \"\", \"\", \"\",\n", " \"\", \"\"\n", "]\n", "tokenizer.add_special_tokens({\"additional_special_tokens\": additional_special_tokens})\n", "\n", "# Preprocess function for tokenization\n", "def preprocess_function(examples):\n", " inputs = [ex[\"input\"] for ex in examples['translation']]\n", " targets = [ex[\"thing_property\"] for ex in examples['translation']]\n", " return tokenizer(inputs, text_target=targets, max_length=64, truncation=True)\n", "\n", "tokenized_datasets = split_datasets.map(\n", " preprocess_function, batched=True, remove_columns=split_datasets[\"train\"].column_names\n", ")\n", "\n", "# Load model and resize token embeddings\n", "model = AutoModelForSeq2SeqLM.from_pretrained(model_name)\n", "model.resize_token_embeddings(len(tokenizer))\n", "\n", "# Data collator for padding and batching\n", "data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)\n", "\n", "# Load evaluation metric\n", "metric = evaluate.load(\"sacrebleu\")\n", "\n", "# Compute metrics function\n", "def compute_metrics(eval_preds):\n", " preds, labels = eval_preds\n", " preds = preds[0] if isinstance(preds, tuple) else preds\n", " \n", " # Decode predictions and labels\n", " decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)\n", " labels = np.where(labels != -100, labels, tokenizer.pad_token_id) # Replace padding tokens\n", " decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)\n", " \n", " # Post-process decoding\n", " decoded_preds = [pred.strip() for pred in decoded_preds]\n", " decoded_labels = [[label.strip()] for label in decoded_labels]\n", " \n", " result = metric.compute(predictions=decoded_preds, references=decoded_labels)\n", " return {\"bleu\": result[\"score\"]}\n", "\n", "args = Seq2SeqTrainingArguments(\n", " f\"train_{fold_group}_{model_name}_{mode}_{train_epochs}\",\n", " save_strategy=\"steps\",\n", " learning_rate=1e-5,\n", " per_device_train_batch_size=32,\n", " per_device_eval_batch_size=64,\n", " auto_find_batch_size=True,\n", " ddp_find_unused_parameters=False,\n", " weight_decay=0.01,\n", " save_total_limit=1,\n", " num_train_epochs=train_epochs,\n", " predict_with_generate=True,\n", " bf16=True,\n", " push_to_hub=False,\n", " evaluation_strategy=\"steps\",\n", " eval_steps=200,\n", " save_steps=200, \n", " logging_steps=200, \n", " load_best_model_at_end=True, \n", " lr_scheduler_type=\"linear\",\n", " warmup_steps=100,\n", ")\n", "\n", "# Define the EarlyStoppingCallback\n", "early_stopping_callback = EarlyStoppingCallback(\n", " early_stopping_patience=2\n", ")\n", "\n", "trainer = Seq2SeqTrainer(\n", " model,\n", " args,\n", " train_dataset=tokenized_datasets[\"train\"],\n", " eval_dataset=tokenized_datasets[\"validation\"],\n", " data_collator=data_collator,\n", " tokenizer=tokenizer,\n", " compute_metrics=compute_metrics,\n", " callbacks=[early_stopping_callback] \n", ")\n", "\n", "trainer.train()\n", "os._exit(0)\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 2 }