{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# t5 training for combined concatenated outputs (thing + property) \n", "\n", "refer to `t5_train_tp.py` and `guide_for_tp.md` for faster training workflow" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/hwang/anaconda3/envs/torch/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n", " warnings.warn('Was asked to gather along dimension 0, but all '\n" ] }, { "data": { "text/html": [ "\n", "
Step | \n", "Training Loss | \n", "Validation Loss | \n", "Bleu | \n", "
---|---|---|---|
100 | \n", "9.068100 | \n", "1.485702 | \n", "0.000000 | \n", "
200 | \n", "0.886400 | \n", "0.219002 | \n", "20.999970 | \n", "
300 | \n", "0.302500 | \n", "0.100100 | \n", "50.318311 | \n", "
400 | \n", "0.168400 | \n", "0.053922 | \n", "52.052581 | \n", "
500 | \n", "0.113800 | \n", "0.046394 | \n", "53.469249 | \n", "
600 | \n", "0.084500 | \n", "0.040225 | \n", "53.980484 | \n", "
700 | \n", "0.066900 | \n", "0.026786 | \n", "58.959618 | \n", "
800 | \n", "0.053300 | \n", "0.025612 | \n", "52.672595 | \n", "
900 | \n", "0.042600 | \n", "0.019917 | \n", "58.475230 | \n", "
1000 | \n", "0.038200 | \n", "0.021234 | \n", "52.335545 | \n", "
1100 | \n", "0.032500 | \n", "0.021687 | \n", "52.400191 | \n", "
1200 | \n", "0.030100 | \n", "0.022106 | \n", "59.836717 | \n", "
1300 | \n", "0.026800 | \n", "0.020341 | \n", "55.878989 | \n", "
1400 | \n", "0.023200 | \n", "0.019192 | \n", "53.356706 | \n", "
1500 | \n", "0.022500 | \n", "0.018187 | \n", "59.718873 | \n", "
1600 | \n", "0.020900 | \n", "0.017806 | \n", "62.848480 | \n", "
1700 | \n", "0.017200 | \n", "0.018625 | \n", "62.796542 | \n", "
1800 | \n", "0.015500 | \n", "0.020747 | \n", "62.920445 | \n", "
1900 | \n", "0.013800 | \n", "0.027109 | \n", "68.566983 | \n", "
2000 | \n", "0.013900 | \n", "0.024757 | \n", "65.792365 | \n", "
2100 | \n", "0.011600 | \n", "0.021626 | \n", "68.714757 | \n", "
2200 | \n", "0.011800 | \n", "0.025541 | \n", "73.793641 | \n", "
2300 | \n", "0.011000 | \n", "0.017915 | \n", "71.351766 | \n", "
2400 | \n", "0.010500 | \n", "0.020459 | \n", "76.285575 | \n", "
2500 | \n", "0.009700 | \n", "0.019714 | \n", "78.722420 | \n", "
2600 | \n", "0.008700 | \n", "0.026323 | \n", "73.858894 | \n", "
2700 | \n", "0.008600 | \n", "0.023967 | \n", "78.752238 | \n", "
2800 | \n", "0.008500 | \n", "0.025074 | \n", "78.772012 | \n", "
2900 | \n", "0.008400 | \n", "0.022061 | \n", "83.261974 | \n", "
3000 | \n", "0.008800 | \n", "0.022081 | \n", "80.992463 | \n", "
3100 | \n", "0.007100 | \n", "0.024494 | \n", "81.058833 | \n", "
"
],
"text/plain": [
"