hipom_data_mapping/post_process/tfidf_class/2z.plot_classfication.ipynb

148 lines
173 KiB
Plaintext
Raw Normal View History

2024-09-25 08:52:30 +09:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA4sAAAIrCAYAAABGeAw5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hT1RvA8W+S7l1ayi57yBARUX6AbBARERcgiANQHOAeKKCoqLhwIE4QlCGgAooWWTIEFJAhSxBkQxmF0j3S5P7+OCRN2qRN27RJ2/fzPPfpTXLuvae3t+197znnPTpN0zSEEEIIIYQQQggbek9XQAghhBBCCCGE95FgUQghhBBCCCFEPhIsCiGEEEIIIYTIR4JFIYQQQgghhBD5SLAohBBCCCGEECIfCRaFEEIIIYQQQuQjwaIQQgghhBBCiHwkWBRCCCGEEEIIkY+PpysgSp/ZbOb06dOEhoai0+k8XR0hhBBCCCGEh2iaRkpKCjVr1kSvL7jtUILFSuD06dPUqVPH09UQQgghhBBCeIkTJ05Qu3btAstIsFgJhIaGAuqCCAsL83BtRGkyGo2sWLGC3r174+vr6+nqCFFkcg2L8kyuX1GeyfVbeSQnJ1OnTh1rjFAQCRYrAUvX07CwMAkWKzij0UhQUBBhYWHyh16US3INi/JMrl9Rnsn1W/m4MjxNEtwIIYQQQgghhMhHgkUhhBBCCCGEEPlIsCiEEEIIIYQQIh8JFoUQQgghhBBC5CPBohBCCCGEEEKIfCRYFEIIIYQQQgiRjwSLQgghhBBCCCHykWBRCCGEEEIIIUQ+EiwKIYQQQgghhMhHgkUhhBBCCCGEEPlIsCiEEEIIIYQQIh8JFoUQQgghhBBC5CPBohBCCCGEG6xerWP06G6sXq3zdFWEEMItJFgUQgghhCghTYPx4/WcPBnG+PF6NM3TNRJCiJKTYFEIIYQQooRWrIBt29Rt1bZtelas8HCFhBDCDSRYFEIIIYQoAU2DCRPAYFDNiQaDxoQJSOuiEKLck2BRCCGEEKIEfvoJtm4Fk0mNVTSZdGzdirQuCiHKPR9PV0AIIYQQwptpGpw5A4cPw3//qa+W5b//1GeO3HwztGkDsbFQp07uV8tSrRro5bG9EMKLSbAohBBCiEovIwOOHLEPAi3rR46oz4vKaIQtW9TiiK8v1K6dP5C0XY+IAJ0kVxVCeIgEi0IIIYSo8GxbB/O2DB4+DPHxBW+v16sgrkEDtTRsCPXrw6uvwoEDYDI53qZ+fRg9Gk6ehOPH4cQJtcTHq2DyyBG1OBMc7DyQtCxBQSU7N0II4YwEi0IIIYSoEDIy4OjR/F1FLUthrYNhYSoItASElqCwQQMVpPn62pdfvhz27XO+P7NZ1eWKK+CJJ+w/Mxrh9Onc4NE2kLSsX7gAaWnwzz9qcSYqquDWyZo189ddCCFcIcGiEEIIIcoFTYOzZx13FT18WAVfBdHrVfCUNyC0LFWquN7lMzcDquNWRQuDQZXr3dt+376+ULeuWpxJT8/fIpl3PS1NBZUXLsDOnc6/7xo1HAeSlvWqVWX8pBAiPwkWhRBCeI3Vq3WMHt2NL7/U0aePp2sjPMHSOuioq6grrYOhobnBYN6gMDYW/PzcU88VK1QG1MKYTFgzo95wQ9GOERQETZqoxRFNg0uXCm6dPHlStWKeOqWWP/90vC8/PzV+0lnrZGwshIcXrf7utmoVPPYYfPQR9Ozp2boIUVlIsCiEEMIraBqMH6/n5Mkwxo83c8MNktjD3bzhZlvT4Nw5511FT50qeHtL62DeVkFLYFiU1sGSfA8TJqi6mM2Fl9frHbculpROB5GRarnySsdlzGZ1vgtqnTxzBrKzc38GzoSGFtw6Wbs2BAa67/uzpWnw4ouqO+6LL0KPHvL3QYiyIMGiEEIIr7BiBWzbpvrBbdumL1ZLjHCuLG+2MzNzWwcdBYXp6QVvHxrquGWwQQPVbdNdrYPFlZ2tgixXAkVQ5U6cUNv5+5du3fLS66F6dbVce63jMtnZqguvbRCZN6i8eBFSUtQYzYLGaUZHF9w6WaMG+BTj7tO2Jbe4LbVCiKKTYFEIIYTH5Y7/0jCZdBgMGhMm6NzeElOZufNm29I66KyraGGtgzqdfetg3qAwKsq7f+7+/uocnj9v/35OjpENGzbSqVNHfHzsM8rExJR9oOgqPz+oV08tzqSlOQ8kLevp6ZCQoJbt2x3vR69XCXcKSshTtar9zz/v+FBn40CFEO4nwaIQQgiPyw1k1J2fyaRj61b48kvo2lUlA7EsPj72rw0GuWEsTHFutm1bBx0taWkFHzMkxHHLYMOGKjDw1sDJVZbAxpbRCPHxSbRpU/GyjwYHQ7NmanFE0yAxseDWyZMnISdHfT15Ev74w/G+/P3tpwYxGu3Hh5ZkHKgQomgkWBRCCOFRlkBGp1PrtkaNcm0fBQWTeZfy8Lm7s1LmTcZiudleuFB163TUVfTUqfw/D1s6nRqj5qy7aHS0BPGViU6nxotWqQJXXeW4jMmkstnmDSJtX585A1lZcOiQWpwprXGgQgh7EiwKIYTwqMKySlomHDca1eJIQZ+VR3q9+4JRHx+Ii3McjA8eXHA9goPtA0Hb9bp1y3/roChbBoPqglqzJlx3neMyWVnqQYUliFy9GmbNyl/ObFZ/NyZPhrFjJWAUorRIsCiEEMJjLElXnDEYoEUL2Lw5N9gxmXKDw5yc3HVnS2FlSvvzwso4SpJiNqub5qys0jv3FtHR6hw76i4qrYOirPn7516DmqYy9xY0l+WLL8KcOfDEE3D33aWXjVWIykqCRSGEEB6zYoXzRBiQf2ySTqdaynx8Ks5NodlcegFpdjZ88IHzLqUGA9SvD2vWSFAovI+rc1nu2wcPPggvvAAPPQSPPKJaL4UQJSfBohDl1PGk4ySkJ9i9l5OTw3/p/7HjzA588uQmjw6KJjY8tiyrKESBNA1Gjy68XEXPfKjXq2yUpTEdxPLlKpGIM5IoRHirvEmZnLF0bdXp1LjH11+Ht9+GQYNUa2PbtmVWZSEqJAkWhSiHjicdp+nHTcnMyXRc4N/8bwX4BHBg9AEJGIXX+PbbghNYWEhAUzxFudmuyMG4KJ9cbVU0mdTYxl9+gYwM1ZK+YYPqmjpnDnTqBE8+Cbfcoq51IUTRuDnfmhCiLCSkJzgPFJ3IzMnM1xIphKeYTPDww66Xt2Q+LCg7p7BnudkuKFAE+2BcCG9gedDhalZgvR4mToTbboPff1fX89Chqrv6hg1w++3QqBFMmQJJSaVadSEqnAoTLB49epQJEybQqVMnoqOj8fX1JSQkhAYNGnDbbbcxZ84cjEVIlZeRkcGMGTPo378/9evXJygoiPDwcJo0acJNN93Eu+++y8GDB91W//j4eCZOnEjbtm2JiooiKCiIhg0bct9997F+/Xq3HUcIIbzBlCmQnOx6ebNZtR5kZ5denSqS4txsSzAuvEV2tupS6ij5kyN5/z5cc41qVTx2DMaNg6goNWfo00+r6V4ef1xNFyOEKJxO08r/v4YpU6bw4osvklVI2rimTZvy/fff07JlywLL/fLLLzzyyCMcP368wHKPP/44H3zwQVGrm8+PP/7I/fffT2JiotMyo0aNYtq0aRiK0YciOTmZ8PBwkpKSCAsLK0lVhZfYHr+dtl8UfSDGtge3cXWNq0uhRkK4btcuaNdO3diNHQt33pn7WU6OkQ0bNtKpU0d8fOxnNY+JUTd6onBZWWpqi7NnXd+menV1Qy3TYRSf0WgkLi6Ovn374uvrW/gGwqkTJ+D8edfLF/T3IT0d5s5VXVT37VPv6XTQv78a19ili3TBBrl+K5OixAblfszixx9/zNNPP2193aFDB/r370+dOnVITk5m7969zJo1i9TUVA4cOEC3bt3YvXs31atXd7i/mTNnMnLkSMxmMzqdjt69e9OzZ09q1aqFTqcjPj6ebdu2sWzZMrfUf82aNQwcOJDsy4/DbrrpJvr3709wcDDbt29nxowZJCUl8fnnn6PT6fj000/dclwhhPCEjAwYMkQFiv36wRtv2N+kGY0QH59EmzZ
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
"# k 값\n",
"k_values = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])\n",
"\n",
"# BoW, TF-IDF, Word2Vec 실험 결과\n",
"bow_c = np.array([85.1, 85.1, 85.1, 85.76, 85.78, 85.84, 85.75, 85.72, 85.58])\n",
"bow_e = np.array([84.43, 84.43, 84.43, 84.95, 84.88, 85.00, 84.95, 85.00, 84.94])\n",
"bow_j = np.array([85.36, 85.36, 85.36, 86.10, 86.09, 86.16, 86.10, 85.87, 85.78])\n",
"\n",
"tfidf_c = np.array([83.66, 83.66, 83.66, 84.37, 84.37, 84.42, 84.44, 84.38, 84.44])\n",
"tfidf_e = np.array([83.66, 83.66, 83.66, 84.42, 84.33, 84.46, 84.45, 84.45, 84.46])\n",
"\n",
"word2vec_c = np.array([84.43, 84.43, 84.43, 85.03, 85.04, 84.91, 84.93, 84.92, 84.93])\n",
"word2vec_e = np.array([85.69, 86.04, 85.85, 85.88, 85.84, 85.81, 85.84, 85.86, 85.84])\n",
"\n",
"# 그래프 설정\n",
"plt.figure(figsize=(10, 6))\n",
"\n",
"# 글씨 크기 설정\n",
"plt.rcParams.update({'font.size': 22})\n",
"\n",
"# 도형 크기를 1.5배로 설정\n",
"marker_size = 9 # 기본 크기가 6이므로 1.5배로 설정\n",
"\n",
"# BoW (C, E, J) -> 색상: 파란색, 도형: 원(circle), 사각형(square), 삼각형(triangle_up)\n",
"plt.plot(k_values, bow_c, 'bo--', label='KNN BoW C', markersize=marker_size) # C: 점선\n",
"plt.plot(k_values, bow_e, 'bs-', label='KNN BoW E', markersize=marker_size)\n",
"plt.plot(k_values, bow_j, 'b^-', label='KNN BoW J', markersize=marker_size)\n",
"\n",
"# TF-IDF (C, E) -> 색상: 빨간색, 도형: 원(circle), 사각형(square)\n",
"plt.plot(k_values, tfidf_c, 'ro--', label='KNN TF-IDF C', markersize=marker_size) # C: 점선\n",
"plt.plot(k_values, tfidf_e, 'rs-', label='KNNTF-IDF E', markersize=marker_size)\n",
"\n",
"# Word2Vec (C, E) -> 색상: 녹색, 실선, 도형: 원(circle), 사각형(square)\n",
"plt.plot(k_values, word2vec_c, 'go--', label='KNN Word2Vec C', markersize=marker_size) # C: 점선\n",
"plt.plot(k_values, word2vec_e, 'gs-', label='KNN Word2Vec E', markersize=marker_size) # 실선\n",
"\n",
"# 그래프 꾸미기\n",
"plt.xlabel('K')\n",
"plt.ylabel('Recall')\n",
"plt.legend()\n",
"\n",
"# 그래프 출력\n",
"plt.grid(True)\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA24AAAHhCAYAAAAWKO1DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAC5aUlEQVR4nOzdd3gUVdvH8e+mAyGh915EmqAIgtKkCii9I11EUOw+vhaw8ehjx4IIilQpiiCo9I6ooCCCSC9SAwQI6clmM+8fY9YE0rPJbDa/z3Xl2tnZMzN3MiHsveec+9gMwzAQERERERERt+VldQAiIiIiIiKSPiVuIiIiIiIibk6Jm4iIiIiIiJtT4iYiIiIiIuLmlLiJiIiIiIi4OSVuIiIiIiIibk6Jm4iIiIiIiJvzsTqAgiYxMZFz585RtGhRbDab1eGIiIiIiIhFDMMgIiKCChUq4OWVfp+aErc8du7cOSpXrmx1GCIiIiIi4iZOnz5NpUqV0m2jxC2PFS1aFDBvTlBQkKWx2O121q5dS6dOnfD19bU0FnEd3VfPo3vqmXRfPY/uqWfSffU87nRPw8PDqVy5sjNHSI8StzyWNDwyKCjILRK3woULExQUZPkvrbiO7qvn0T31TLqvnkf31DPpvnoed7ynmZlCpeIkIiIiIiIibk6Jm4iIiIiIiJtT4iYiIiIiIuLmlLiJiIiIiIi4OSVuIiIiIiIibk6Jm4iIiIiIiJtT4iYiIiIiIuLmlLiJiIiIiIi4OSVuIiIiIiIibk6Jm4iIiIiIiJtT4iYiIiIiIuLmlLiJiIiIiIi4OSVuIiIiIiIibk6Jm4iIiIiIiJtT4iYiIiIiIuLmlLiJiIiIiIi4OSVuIiIiIiIibk6Jm4iIiIiIiJtT4iYiIiIiIuLmlLiJiIiIiIi4OSVuIiIiIiIibk6Jm4iIiIiIiJtT4iYiIiIiIuLmlLiJiIiIiIi4OSVuIiIiIiIibk6Jm4iIiIiIiJtT4iYiIiIiIuLmlLiJiIiIiIi4OSVuIiIiIiIibk6Jm4iIiIiIiJtT4iYiIiIiIuLmlLiJiIiIiIi4OY9K3A4ePMhjjz1Go0aNKF68OAEBAVStWpUePXqwcOFCEhMTM32uo0eP8swzz9CgQQOCg4MJDAykTp06PPzww+zZsyf3vgkREREREZHr+FgdgCs4HA5eeOEF3nrrLQzDSPHaqVOnOHXqFCtWrGDq1Kl89dVXVKhQId3zzZgxg8cff5yYmJgU+w8fPszhw4eZPn06kyZNYtKkSS7/XkRERERERK7nEYnbI488wqeffgqAt7c3AwYMoF27dhQtWpQTJ04wf/58/vzzT7Zv306nTp348ccfKVasWKrnmj9/PmPHjgXAy8uLgQMH0r59e3x8fNi+fTtz5swhLi6Ol156CX9/f5599tm8+jZFRERERKSAyveJ27p165xJW5EiRVi9ejUtW7ZM0ebpp59m3LhxfPbZZ+zfv5/nn3+eTz755IZzXbp0iYcffhgwk7Zly5bRvXt35+vDhg1j5MiRtG/fnujoaF588UV69uxJnTp1cvE7FBERERGRgi7fz3F7//33nduTJ0++IWkDsxfuk08+cSZYM2bM4OTJkze0e+eddwgPDwfg4YcfTpG0JWnevDmvvfYaAAkJCbzyyiuu+DZERFI6dQp27za/fv+d4GPH4Pff/9136pTVEUp26L6KiEg25eset8TERDZv3gyAzWZj2LBhabb18fHh/vvvZ+LEiTgcDhYtWsT//d//pWizePFi5/YTTzyR5rnGjBnDpEmTiIqKYsWKFcTExFCoUKGcfTMiIklOnYI6dSA2FgBfoO31bQIC4NAhqFIlj4OTbNN9FRGRHMjXPW6XL192FhApXbo0JUqUSLd98iGN3333XYrX/vrrL/7++28A6tatS/Xq1dM8T9GiRWnVqhUAUVFRbNmyJVvxi4ikKjTU+eY+TbGxZjvJP3RfRUQkB/J14nZ9Bcms2LdvX5rPmzZtmuHxydtcfy4REREpIDT8VUTySL4eKlmiRAl8fX2x2+1cunSJq1evUrx48TTbHz582LkdERHB2bNnqVixIgCHDh1yvpZeb1tqbZIfKyKSZ554AtKokCtuKCwsc+3eeAMqVABf35RfPj437ktvf3aO8fYGmy1XfwweRcNfRSQP5evEzcfHhxYtWrB161YMw2DevHk8+uijqbZ1OBzMnz8/xb6wsDBn4haW7D/UUqVKZXjtkiVLpjhPWuLi4oiLi3M+Typ+YrfbsdvtGV4nNyVd3+o4xLV0X/OZa9ew/fUX7N+P7a+/sO3fj+2PP8jUW+etW3M7OrHCkiWWXt5IL9lL9pqRWiJ4/bH/PL+hbRrXMNJ7/bp9KeJML+bcTEZDQvDNxPBXe0gIlC+fOzFIrtP/q57Hne5pVmLI14kbwIMPPsjWf968vPDCCzRt2pQWLVqkaJOYmMiECRM4ePBgiv1JSRRAZGSkczsgICDD6yYvRhIREZFmuzfeeCPVypNr166lcOHCGV4nL6xbt87qECQX6L66F+/YWIqeOUPRv/8m6NQpip4+TdDff1Po8uVsn/Ng//7ElC7twiglNxW6dImbv/oqw3YnOnbEHhyMLSEBL4cD2z9fXsmeeyUkpNhvczjwSr6dkIAtMdF5jrT221KZcmBLSICEBPhnDnla8lO/XKKPD4ne3hje3iT6+JiP1z/Pxn7fiAgqZ+L6v/7wA1dOnsTh56cezXxM/696Hne4p9HR0Zlum+8Tt0GDBjFv3jzWrFlDZGQkrVu3vmEB7i+//JK9e/dSunRpYmNjnYmWl1fuT/F77rnnePLJJ53Pw8PDqVy5Mp06dSIoKCjXr58eu93OunXr6NixI76+vpbGIq6j+2qxuDg4ePDf3rP9+7EdOAAnTqT6JhnAqFgRo359jHr1MOrXB29vfEaNyvBSNZ96Cm691dXfgeSW33+HTCRulSZPztX7avzzlQjgcIDdbn4lJPy7nfzrn0TOltr+VPbZMnGupOe21M6VyrYtnXMk/7Kl8cm1V0ICXgkJufYzzcidr74KgOHvD8WLQ/HiGP88prldogRGsWJQooQ5JFp/zy2j/1c9jzvd0+QdSRnJ94mbl5cXX3/9NcOHD2fZsmUkJCTw5Zdf8uWXX6ZoV758eZYvX06nTp2c+5LPhwsMDHRux2Y07AGc1SzBrDKZFn9/f/z9/W/Y7+vra/kvShJ3ikVcR/c1l9ntcPQo/Pmn+bV/v/l49Kj5Zjg1ZcpA/frQoEGKR1uxYil7L3bvzlQIvknDwyR/8Mncf7l5el99fc05WJ7CMCAxMe2k0dX7T5+GWbMyjsvbGxwObHFxEBICISFZ77EMDDSTuH+SuhSPqe1LegwKgjz4oLog0P+rnscd7mlWrp/vEzcwE6elS5eyfv16Zs+ezU8//URISAje3t5Ur16dXr168dhjj1G0aFFnb5vNZqNs2bLOcxRLNsE/NBOlmC8nG95UTMUBRDyXwwEnTqRMzvbvh4MHzTduqSlW7IbkjPr1zcQtM0qVMt9Mp/chUkCA2U7yD93X3GezmUmSt3feJKS7d2cucduxA266Ca5ehStXzMfk29c/Jt9OmkcfGWl+ZbVKpZeX+Tcpqwlf8eJQuLCGdoq4EY9I3JJ06NCBDh06pPn6vn37cPzzSfhNN91EcHCw87Xka7ydOHEiw2slb5P8WBHJpwzD/PT8+h60AwfSnutTpMiNPWgNGphFCHLyZqdKFbMKXWgoDgf89lsC27cf5q67buL2233w9sZ8c68qdfmL7mvBZbNB0aLmV1bvr8MB166ln9yl9RgdbfZAXrlifh07lrVr+/llL+ErXtw81gM4HLBli42tWytSpIiNu+82PxeQ/Cs/31OPStwysnnzZud269atU7zWsGFD5/avv/6a4bmSt2nQoEHOgxORvGEY5lClpMQsKUnbvx/SKjTk7w/16qVMzurXN9+A5dYQpCpVWPpbFR57DM6cAWgGX0KlSvDBB9D7tty5rOQy3VfJKm9vMykqUSLrx8bFpZ3kZZT4JSRAfDxcuGB+ZVWRItkb2hkc7B5DO0+
"text/plain": [
"<Figure size 1000x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"# 데이터\n",
"C_values = [0.1, 1, 10, 100, 1000, 10000, 100000]\n",
"BoW_scores = [84.74, 89.12, 89.36, 89.36, 89.36, 89.36, 89.36]\n",
"TFIDF_scores = [69.54, 88.67, 90.02, 90.02, 89.87, 89.33, 89.18]\n",
"Word2Vec_scores = [29.1, 51.54, 71.46, 79, 77.62, 87.28, 87.12]\n",
"\n",
"# 플로팅\n",
"plt.figure(figsize=(10, 5)) # 가로 8, 세로 4로 상하 크기를 줄임\n",
"plt.rcParams.update({'font.size': 22})\n",
"\n",
"plt.plot(C_values, BoW_scores, label='SVM BoW', marker='o', color='blue')\n",
"plt.plot(C_values, TFIDF_scores, label='SVM TF-IDF', marker='s', color='red')\n",
"plt.plot(C_values, Word2Vec_scores, label='SVM Word2Vec', marker='^', color='green')\n",
"\n",
"# 로그 스케일로 C 값 표현\n",
"plt.xscale('log')\n",
"\n",
"# y축 범위 설정\n",
"plt.ylim(67, 93)\n",
"\n",
"# 제목과 레이블\n",
"\n",
"plt.xlabel('C')\n",
"plt.ylabel('Recall')\n",
"\n",
"# 범례와 그리드\n",
"plt.legend()\n",
"plt.grid(True)\n",
"\n",
"# 글자 크기 설정\n",
"plt.xticks()\n",
"plt.yticks()\n",
"\n",
"# 그래프 출력\n",
"plt.show()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "torch",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}