notebook/traceability.ipynb
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "PKPL.ipynb",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "XE2oNhk3bDQy"
},
"source": [
"# Preprocessing"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "m0oXXtkjWcnQ"
},
"source": [
"## Data Reading & Inspection"
]
},
{
"cell_type": "code",
"metadata": {
"id": "IGxjT-sOHPL0",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 176
},
"outputId": "72f812a6-012f-43ce-aa23-01a703d98faa"
},
"source": [
"!git clone -l -s https://github.com/asyrofist/tugasPKPL.git cloned_pkpl\n",
"%cd cloned_pkpl\n",
"!ls"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Cloning into 'cloned_pkpl'...\n",
"warning: --local is ignored\n",
"remote: Enumerating objects: 54, done.\u001b[K\n",
"remote: Counting objects: 100% (54/54), done.\u001b[K\n",
"remote: Compressing objects: 100% (50/50), done.\u001b[K\n",
"remote: Total 54 (delta 23), reused 12 (delta 3), pack-reused 0\u001b[K\n",
"Unpacking objects: 100% (54/54), done.\n",
"/content/cloned_pkpl\n",
"nltk PKPL.ipynb README.md sklearn srs2.csv\tsrs3.csv TPOT.ipynb\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "DJueAR_wIByc",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 863
},
"outputId": "d41b373b-4a75-4f8f-b309-0c6c18f5cff9"
},
"source": [
"import pandas as pd\n",
"from time import time\n",
"\n",
"print(\"Loading Dataset...\")\n",
"t0 = time()\n",
"dataset2 = pd.read_csv('/content/cloned_pkpl/srs2.csv')\n",
"print(\"done in %0.3fs.\" % (time() - t0))\n",
"dataset2.head(n = 100)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Dataset...\n",
"done in 0.016s.\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Requirement Statement</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>F01</td>\n",
" <td>Users can create a new diary.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>F02</td>\n",
" <td>Users can add titles to the diary.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>F03</td>\n",
" <td>Users can add weather to the diary.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>F04</td>\n",
" <td>Users can add dates to the diary.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>F06</td>\n",
" <td>Users can add hours to the diary.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>F07</td>\n",
" <td>Users can add seconds to the diary.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>F08</td>\n",
" <td>Users can save diaries.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>F09</td>\n",
" <td>Users can add photos to the diary.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>F10</td>\n",
" <td>Users can read the diary that has been created.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>F11</td>\n",
" <td>Users can share diaries in the form of postcards.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>F12</td>\n",
" <td>Users can save postcards.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>F13</td>\n",
" <td>Users can change the postcard background color.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>F14</td>\n",
" <td>Users can change the color of posts on the pos...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>F15</td>\n",
" <td>Users can delete the diary.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>F16</td>\n",
" <td>Users can edit the diary that has been created.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>F17</td>\n",
" <td>Users can open a calendar that contains a diary.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>F18</td>\n",
" <td>The system can open a diary editor.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>NF01</td>\n",
" <td>The application has a high level of availabili...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>NF02</td>\n",
" <td>Applications must have a high degree of flexib...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>NF03</td>\n",
" <td>This application must have a high level of int...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>NF04</td>\n",
" <td>This application has a high usability aspect a...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>NF05</td>\n",
" <td>This application must have a response time val...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>NF06</td>\n",
" <td>Applications must have a high level of interop...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>NF07</td>\n",
" <td>Documentation must be included in the system s...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>NF08</td>\n",
" <td>The application must have a high degree of fle...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>NF09</td>\n",
" <td>The modules in the system will be designed and...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Requirement Statement\n",
"0 F01 Users can create a new diary.\n",
"1 F02 Users can add titles to the diary.\n",
"2 F03 Users can add weather to the diary.\n",
"3 F04 Users can add dates to the diary.\n",
"4 F06 Users can add hours to the diary.\n",
"5 F07 Users can add seconds to the diary.\n",
"6 F08 Users can save diaries.\n",
"7 F09 Users can add photos to the diary.\n",
"8 F10 Users can read the diary that has been created.\n",
"9 F11 Users can share diaries in the form of postcards.\n",
"10 F12 Users can save postcards.\n",
"11 F13 Users can change the postcard background color.\n",
"12 F14 Users can change the color of posts on the pos...\n",
"13 F15 Users can delete the diary.\n",
"14 F16 Users can edit the diary that has been created.\n",
"15 F17 Users can open a calendar that contains a diary.\n",
"16 F18 The system can open a diary editor.\n",
"17 NF01 The application has a high level of availabili...\n",
"18 NF02 Applications must have a high degree of flexib...\n",
"19 NF03 This application must have a high level of int...\n",
"20 NF04 This application has a high usability aspect a...\n",
"21 NF05 This application must have a response time val...\n",
"22 NF06 Applications must have a high level of interop...\n",
"23 NF07 Documentation must be included in the system s...\n",
"24 NF08 The application must have a high degree of fle...\n",
"25 NF09 The modules in the system will be designed and..."
]
},
"metadata": {
"tags": []
},
"execution_count": 2
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "2NWKG3eRm51O",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 434
},
"outputId": "f3203767-10e4-4556-c125-593482985af6"
},
"source": [
"import nltk\n",
"import string\n",
"from nltk.tokenize import word_tokenize \n",
"from nltk.probability import FreqDist\n",
"from nltk.tokenize import sent_tokenize, word_tokenize\n",
"from nltk.corpus import stopwords\n",
"from nltk.stem import PorterStemmer\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import re \n",
"\n",
"nltk.download('stopwords')\n",
"nltk.download('punkt')\n",
"\n",
"stemming = PorterStemmer()\n",
"stops = set(stopwords.words(\"english\"))\n",
"\n",
"# cleaning text\n",
"def text_cleaning(raw):\n",
" \n",
" # lowering word\n",
" lower_case = raw.lower()\n",
"\n",
" # hapus punctuatioation & lower\n",
" hasil_punctuation = lower_case.translate(str.maketrans(\"\",\"\",string.punctuation))\n",
" \n",
" # hapus whitespace\n",
" hasil_whitespace = hasil_punctuation.strip()\n",
"\n",
" # hapus angka\n",
" hasil_hapusangka = re.sub(r\"\\d+\", \"\", hasil_whitespace)\n",
" \n",
" # tokenisasi \n",
" tokens = nltk.tokenize.word_tokenize(hasil_hapusangka)\n",
" \n",
" # Stemming\n",
" stemmed_words = [stemming.stem(w) for w in tokens]\n",
" \n",
" # Remove stop words\n",
" meaningful_words = [w for w in stemmed_words if not w in stops]\n",
" \n",
" # Rejoin meaningful stemmed words\n",
" joined_words = ( \" \".join(meaningful_words))\n",
" \n",
" # Return cleaned data\n",
" return joined_words \n",
"\n",
"# applying\n",
"def apply_cleaning(hasil):\n",
" cleaned_hasil = []\n",
" for element in hasil:\n",
" cleaned_hasil.append(text_cleaning(element))\n",
" return cleaned_hasil\n",
"\n",
"# Load data example (dari functional maupun nonfunctional)\n",
"statement = dataset2\n",
"\n",
"# Truncate data for example\n",
"statement = statement.head(100)\n",
"\n",
"# Get text to clean (dari row yang diinginkan)\n",
"text_to_clean = list(statement['Requirement Statement'])\n",
"\n",
"# Clean text\n",
"print(\"Loading Original & Cleaned Text...\")\n",
"t0 = time()\n",
"cleaned_text = apply_cleaning(text_to_clean)\n",
"\n",
"# Show first example\n",
"print(\"done in %0.3fs.\" % (time() - t0))\n",
"pd.DataFrame([text_to_clean, cleaned_text],index=['ORIGINAL','CLEANED'], columns= statement['ID'])"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
"[nltk_data] Unzipping corpora/stopwords.zip.\n",
"[nltk_data] Downloading package punkt to /root/nltk_data...\n",
"[nltk_data] Unzipping tokenizers/punkt.zip.\n",
"Loading Original & Cleaned Text...\n",
"done in 0.018s.\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>ID</th>\n",
" <th>F01</th>\n",
" <th>F02</th>\n",
" <th>F03</th>\n",
" <th>F04</th>\n",
" <th>F06</th>\n",
" <th>F07</th>\n",
" <th>F08</th>\n",
" <th>F09</th>\n",
" <th>F10</th>\n",
" <th>F11</th>\n",
" <th>F12</th>\n",
" <th>F13</th>\n",
" <th>F14</th>\n",
" <th>F15</th>\n",
" <th>F16</th>\n",
" <th>F17</th>\n",
" <th>F18</th>\n",
" <th>NF01</th>\n",
" <th>NF02</th>\n",
" <th>NF03</th>\n",
" <th>NF04</th>\n",
" <th>NF05</th>\n",
" <th>NF06</th>\n",
" <th>NF07</th>\n",
" <th>NF08</th>\n",
" <th>NF09</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>ORIGINAL</th>\n",
" <td>Users can create a new diary.</td>\n",
" <td>Users can add titles to the diary.</td>\n",
" <td>Users can add weather to the diary.</td>\n",
" <td>Users can add dates to the diary.</td>\n",
" <td>Users can add hours to the diary.</td>\n",
" <td>Users can add seconds to the diary.</td>\n",
" <td>Users can save diaries.</td>\n",
" <td>Users can add photos to the diary.</td>\n",
" <td>Users can read the diary that has been created.</td>\n",
" <td>Users can share diaries in the form of postcards.</td>\n",
" <td>Users can save postcards.</td>\n",
" <td>Users can change the postcard background color.</td>\n",
" <td>Users can change the color of posts on the pos...</td>\n",
" <td>Users can delete the diary.</td>\n",
" <td>Users can edit the diary that has been created.</td>\n",
" <td>Users can open a calendar that contains a diary.</td>\n",
" <td>The system can open a diary editor.</td>\n",
" <td>The application has a high level of availabili...</td>\n",
" <td>Applications must have a high degree of flexib...</td>\n",
" <td>This application must have a high level of int...</td>\n",
" <td>This application has a high usability aspect a...</td>\n",
" <td>This application must have a response time val...</td>\n",
" <td>Applications must have a high level of interop...</td>\n",
" <td>Documentation must be included in the system s...</td>\n",
" <td>The application must have a high degree of fle...</td>\n",
" <td>The modules in the system will be designed and...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CLEANED</th>\n",
" <td>user creat new diari</td>\n",
" <td>user add titl diari</td>\n",
" <td>user add weather diari</td>\n",
" <td>user add date diari</td>\n",
" <td>user add hour diari</td>\n",
" <td>user add second diari</td>\n",
" <td>user save diari</td>\n",
" <td>user add photo diari</td>\n",
" <td>user read diari ha creat</td>\n",
" <td>user share diari form postcard</td>\n",
" <td>user save postcard</td>\n",
" <td>user chang postcard background color</td>\n",
" <td>user chang color post postcard</td>\n",
" <td>user delet diari</td>\n",
" <td>user edit diari ha creat</td>\n",
" <td>user open calendar contain diari</td>\n",
" <td>system open diari editor</td>\n",
" <td>applic ha high level avail oper continu seven ...</td>\n",
" <td>applic must high degre flexibilitywhich must a...</td>\n",
" <td>thi applic must high level integr data secur i...</td>\n",
" <td>thi applic ha high usabl aspect easi use inter...</td>\n",
" <td>thi applic must respons time valu quit good fa...</td>\n",
" <td>applic must high level interoperabilitybecaus ...</td>\n",
" <td>document must includ system sourc code develop...</td>\n",
" <td>applic must high degre flexibilityand use user...</td>\n",
" <td>modul system design program structur way conti...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"ID F01 ... NF09\n",
"ORIGINAL Users can create a new diary. ... The modules in the system will be designed and...\n",
"CLEANED user creat new diari ... modul system design program structur way conti...\n",
"\n",
"[2 rows x 26 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 3
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "xImrib3hVC5-"
},
"source": [
"# Bag of Words"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "SDaKVClvVHfO"
},
"source": [
"### Implementing Bag of Words in scikit-learn"
]
},
{
"cell_type": "code",
"metadata": {
"id": "dYwTMw6CVUXv",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 372
},
"outputId": "68c1a3ef-24ef-4c84-d8f4-ed00fadf1009"
},
"source": [
"import pandas as pd\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"count_vector = CountVectorizer(cleaned_text)\n",
"\n",
"print(\"Loading Counting Vector...\")\n",
"t0 = time()\n",
"\n",
"print(count_vector)\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Counting Vector...\n",
"CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n",
" dtype=<class 'numpy.int64'>, encoding='utf-8',\n",
" input=['user creat new diari', 'user add titl diari',\n",
" 'user add weather diari', 'user add date diari',\n",
" 'user add hour diari', 'user add second diari',\n",
" 'user save diari', 'user add photo diari',\n",
" 'user read diari ha creat',\n",
" 'user share diari form postcard', 'user save postcard',\n",
" 'user...\n",
" 'chang addit sourc code made high level maintain',\n",
" 'applic must high degre flexibilityand use user age '\n",
" 'seven year abov',\n",
" 'modul system design program structur way continu use '\n",
" 'system developmentso reusabl level high'],\n",
" lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
" ngram_range=(1, 1), preprocessor=None, stop_words=None,\n",
" strip_accents=None, token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n",
" tokenizer=None, vocabulary=None)\n",
"done in 0.003s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "j7Z3nUFTVYQx",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 90
},
"outputId": "55ae4c0b-bae7-49bd-e0c5-9af600a3b465"
},
"source": [
"print(\"Loading feature names...\")\n",
"t0 = time()\n",
"\n",
"count_vector.fit(cleaned_text)\n",
"print(count_vector.get_feature_names())\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading feature names...\n",
"['abl', 'abov', 'access', 'add', 'addit', 'age', 'android', 'applic', 'aspect', 'avail', 'background', 'calendar', 'chang', 'code', 'color', 'connect', 'contain', 'continu', 'creat', 'data', 'date', 'day', 'degre', 'delet', 'design', 'develop', 'developmentso', 'devic', 'diari', 'document', 'easi', 'edit', 'editor', 'extend', 'facebook', 'fast', 'flexibilityand', 'flexibilitywhich', 'form', 'good', 'ha', 'high', 'hour', 'hoursth', 'includ', 'integr', 'interfac', 'interoperabilitybecaus', 'introduc', 'kitkat', 'less', 'level', 'lineinstagramand', 'made', 'maintain', 'media', 'minut', 'modul', 'must', 'need', 'new', 'one', 'open', 'oper', 'password', 'per', 'perform', 'photo', 'post', 'postcard', 'program', 'quit', 'read', 'reread', 'respons', 'result', 'reusabl', 'run', 'save', 'second', 'secur', 'seven', 'share', 'social', 'sourc', 'stop', 'structur', 'system', 'thi', 'three', 'time', 'titl', 'toler', 'two', 'usabl', 'use', 'user', 'valu', 'version', 'way', 'weather', 'week', 'without', 'year']\n",
"done in 0.008s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "gJI7AdnDVfFb",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 176
},
"outputId": "5e8f82ee-cdd8-48c6-afd0-0b200eedb71e"
},
"source": [
"print(\"Loading Document array...\")\n",
"t0 = time()\n",
"\n",
"doc_array = count_vector.transform(cleaned_text).toarray()\n",
"print(doc_array)\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Document array...\n",
"[[0 0 0 ... 0 0 0]\n",
" [0 0 0 ... 0 0 0]\n",
" [0 0 0 ... 0 0 0]\n",
" ...\n",
" [0 0 0 ... 0 0 0]\n",
" [0 1 0 ... 0 0 1]\n",
" [0 0 0 ... 0 0 0]]\n",
"done in 0.001s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "gLFj_IHX-jr8",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"outputId": "df691933-cabe-4d64-e600-c9d391207025"
},
"source": [
"plt.hist(doc_array)\n",
"plt.xlabel('doc_array')\n",
"plt.ylabel('Frequency')\n",
"plt.show"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<function matplotlib.pyplot.show>"
]
},
"metadata": {
"tags": []
},
"execution_count": 7
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEHCAYAAACp9y31AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAUNElEQVR4nO3df7AlZX3n8feHHxFRgrAzgSlCGCAUiEERR01UjGhckYBoshEpZdElQTaQlUqylYmkktlU7cY/FslmY1SMrOCq8ecYXHETmGFR4wIO48gPR0QREmACo7IOagJCvvvH6TscLvfOnHvn9Dkz87xfVaem++nu09/bt+dz+3T3eTpVhSSpHXtMuwBJ0mQZ/JLUGINfkhpj8EtSYwx+SWrMXtMuYBRLliyp5cuXT7sMSdql3HTTTd+pqqWz23eJ4F++fDnr1q2bdhmStEtJcvdc7Z7qkaTGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY3pLfiTHJrk2iRfS3Jbkrd17auS3JtkQ/c6pa8aJElP1ud9/I8Cv1NV65PsB9yU5Opu2iVV9V97XLckaR69BX9VbQI2dcMPJdkIHNLX+iRJo5nIOf4ky4HnAjd0TRckuTnJZUkOmGeZc5OsS7Ju8+bNO7T+i884lYOv3cDGY565te2elV9g+crP8q7z1m6dtmrVqifM/67z1rJm7ZEcd/lxW+e/+IxTAR6ff9X+O1SbJE1a78Gf5OnAJ4ELq2oL8G7gSOB4Bp8ILp5ruaq6tKpWVNWKpUuf1NWEJGmReg3+JHszCP0PVdWnAKrq/qp6rKr+BXgf8II+a5AkPVGfd/UEeD+wsareOdS+bGi21wG39lWDJOnJ+ryr58XAWcAtSTZ0bW8HzkxyPFDAXcBbe6xBkjRLn3f1fBHIHJOu6mudkqTt85u7ktQYg1+SGrP7B/8c99kfd/lxO/SWB1+7YfszSdJOavcPfknSExj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNaa54F+z9shplyBJU9Vc8EtS6wx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDf7ZV+0+7AknqlcEvSY0x+CWpMQa/JDXG4JekxvQW/EkOTXJtkq8luS3J27r2A5NcneSO7t8D+qpBkvRkfR7xPwr8TlUdC/w8cH6SY4GVwJqqOgpY041Lkiakt+Cvqk1Vtb4bfgjYCBwCnA5c3s12OfDavmqQJD3ZRM7xJ1kOPBe4ATioqjZ1k/4ROGieZc5Nsi7Jus2bN0+iTElqQu/Bn+TpwCeBC6tqy/C0qiqg5lquqi6tqhVVtWLp0qV9lylJzeg1+JPszSD0P1RVn+qa70+yrJu+DHigzxokSU/U5109Ad4PbKyqdw5NuhI4uxs+G/jrvmqQJD3ZXj2+94uBs4Bbkmzo2t4OvAP4WJJzgLuB1/dYgyRplt6Cv6q+CGSeya/oa72SpG3zm7uS1BiDX5IaY/BLUmMMfmkXcM/KL0y7BO1GDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjRgr+JMf1XYgkaTJGPeL/iyQ3JvnNJPv3WpEkqVcjBX9VnQi8ETgUuCnJh5O8stfKJEm9GPkcf1XdAfwB8HvALwJ/luTrSX6lr+IkSeM36jn+Zye5BNgIvBw4raqe2Q1fMs8ylyV5IMmtQ22rktybZEP3OmUMP4MkaQFGPeL/78B64DlVdX5VrQeoqvsYfAqYyweAk+dov6Sqju9eVy20YEnSjtlrxPl+GfinqnoMIMkewD5V9aOq+uBcC1TV55MsH0uVkqSxGfWI/xrgqUPj+3Zti3FBkpu7U0EHLPI9JEmLNGrw71NVP5gZ6Yb3XcT63g0cCRwPbAIunm/GJOcmWZdk3ebNmxexKknSXEYN/h8mOWFmJMnzgH9a6Mqq6v6qeqyq/gV4H/CCbcx7aVWtqKoVS5cuXeiqJEnzGPUc/4XAx5PcBwQ4GDhjoStLsqyqNnWjrwNu3db8kqTxGyn4q+rLSY4Bju6abq+qH29rmSQfAV4GLElyD/BHwMuSHA8UcBfw1kXWLUlapFGP+AGeDyzvljkhCVV1xXwzV9WZczS/f2HlSZLGbaTgT/JBBhdlNwCPdc0FzBv8kqSd06hH/CuAY6uq+ixGktS/Ue/quZXBBV1J0i5u1CP+JcDXktwIPDzTWFWv6aUqSVJvRg3+VX0WIUmanFFv57wuyWHAUVV1TZJ9gT37LU2S1IdRu2X+DeATwHu7pkOAT/dVlCSpP6Ne3D0feDGwBbY+lOWn+ipKktSfUYP/4ap6ZGYkyV4M7uOXJO1iRg3+65K8HXhq96zdjwOf6a8sSVJfRg3+lcBm4BYG/etcxfxP3pIk7cRGvatnphvl9/VbjiSpb6P21fNt5jinX1VHjL0iSVKvFtJXz4x9gF8DDhx/OZKkvo10jr+qvjv0ureq/pTBA9glSbuYUU/1nDA0ugeDTwAL6ctfkrSTGDW8hx+K/iiDp2e9fuzVSJJ6N+pdPSf1XYgkaTJGPdXz29uaXlXvHE85kqS+LeSunucDV3bjpwE3Anf0UZQkqT+jBv9PAydU1UMASVYBn62qN/VVmCSpH6N22XAQ8MjQ+CNdmyRpFzPqEf8VwI1JVnfjrwUu76ckSVKfRr2r5z8n+RxwYtf0lqr6Sn9lSZL6MuqpHoB9gS1V9d+Ae5Ic3lNNkqQejfroxT8Cfg/4/a5pb+B/9lWUJKk/ox7xvw54DfBDgKq6D9ivr6IkSf0ZNfgfqaqi65o5ydP6K0mS1KdRg/9jSd4LPCPJbwDX4ENZJGmXtN27epIE+ChwDLAFOBr4w6q6uufaJEk92G7wV1UluaqqjgMMe0naxY16qmd9kuf3WokkaSJG/ebuC4E3JbmLwZ09YfBh4Nl9FSZJ6sc2gz/Jz1TV3wOvWugbJ7kMOBV4oKp+rms7kMH1guV0D3OpqgcX+t6SpMXb3qmeTwNU1d3AO6vq7uHXdpb9AHDyrLaVwJqqOgpY041LkiZoe8GfoeEjFvLGVfV54Huzmk/n8c7dLmfQ2ZskaYK2F/w1z/BiHVRVm7rhf2QbXTsnOTfJuiTrNm/ePIZVS5Jg+8H/nCRbkjwEPLsb3pLkoSRbdmTFw98Enmf6pVW1oqpWLF26dEdWJUkass2Lu1W155jXd3+SZVW1Kcky4IExv78kaTsW0i3zOFwJnN0Nnw389YTXL0nN6y34k3wE+L/A0UnuSXIO8A7glUnuAH6pG5ckTdCoX+BasKo6c55Jr+hrnZKk7Zv0qR5J0pQZ/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNWavaaw0yV3AQ8BjwKNVtWIadUhSi6YS/J2Tquo7U1y/JDXJUz2S1JhpBX8Bf5vkpiTnzjVDknOTrEuybvPmzRMuT5qcNWuPnHYJasy0gv8lVXUC8Grg/CQvnT1DVV1aVSuqasXSpUsnX6Ek7aamEvxVdW/37wPAauAF06hDklo08eBP8rQk+80MA/8auHXSdUhSq6ZxV89BwOokM+v/cFX97ynUIUlNmnjwV9WdwHMmvV5J0oC3c0pSYwx+SWqMwS9JjTH4pZ2YX+5SHwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLPTju8uOmXYI0L4Nfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPzShM11j//stoOv3TCpcrSDVq1aNe0SFszgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4JfGZPnKz/Ku89ayZu2RANyz8gsAXHzGqRx87QY2HvPMrV/2uWflF7bOP2PVqlWwan8uPuNUADYe80zg8S93LV/52Qn9JJqWSX0ZzOCXpMYY/JLUGINfkhpj8EtSY6YS/ElOTnJ7km8mWTmNGiSpVRMP/iR7Au8CXg0cC5yZ5NhJ1yFJrZrGEf8LgG9W1Z1V9QjwV8DpU6hDkpqUqprsCpN/A5xcVb/ejZ8FvLCqLpg137nAud3o0cDtC1jNEuA7Yyi3D9a2ODtrbTtrXWBti7U71XZYVS2d3bjX+OoZr6q6FLh0McsmWVdVK8Zc0lhY2+LsrLXtrHWBtS1WC7VN41TPvcChQ+M/3bVJkiZgGsH/ZeCoJIcn+QngDcCVU6hDkpo08VM9VfVokguAvwH2BC6rqtvGvJpFnSKaEGtbnJ21tp21LrC2xdrta5v4xV1J0nT5zV1JaozBL0mN2eWCf3vdPSR5SpKPdtNvSLJ8aNrvd+23J3nVhOv67SRfS3JzkjVJDhua9liSDd1r7Be6R6jtzUk2D9Xw60PTzk5yR/c6ewq1XTJU1zeS/L+hab1ttySXJXkgya3zTE+SP+vqvjnJCUPT+t5m26vtjV1NtyT5UpLnDE27q2vfkGTdFGp7WZLvD/3e/nBoWq9duYxQ238cquvWbv86sJvW23ZLcmiSa7t8uC3J2+aYZ7z7W1XtMi8GF4O/BRwB/ATwVeDYWfP8JvCebvgNwEe74WO7+Z8CHN69z54TrOskYN9u+N/P1NWN/2DK2+zNwJ/PseyBwJ3dvwd0wwdMsrZZ8/8Wg5sBJrHdXgqcANw6z/RTgM8BAX4euGES22zE2l40s04GXaPcMDTtLmDJFLfby4D/taP7Qh+1zZr3NGDtJLYbsAw4oRveD/jGHP9Hx7q/7WpH/KN093A6cHk3/AngFUnStf9VVT1cVd8Gvtm930Tqqqprq+pH3ej1DL6/MAk70kXGq4Crq+p7VfUgcDVw8hRrOxP4yBjXP6+q+jzwvW3McjpwRQ1cDzwjyTL632bbra2qvtStGya7r42y3ebTe1cuC6xtkvvapqpa3w0/BGwEDpk121j3t10t+A8B/mFo/B6evIG2zlNVjwLfB/7ViMv2Wdewcxj89Z6xT5J1Sa5P8tox1bTQ2n61+wj5iSQzX7Drc5st6P27U2OHA2uHmvvcbtszX+19b7OFmr2vFfC3SW7KoFuUafiFJF9N8rkkz+radprtlmRfBuH5yaHmiWy3DE5NPxe4Ydakse5vO22XDburJG8CVgC/ONR8WFXdm+QIYG2SW6rqWxMs6zPAR6rq4SRvZfCJ6eUTXP8o3gB8oqoeG2qb9nbbqSU5iUHwv2So+SXdNvsp4OokX++OhCdlPYPf2w+SnAJ8GjhqgusfxWnA31XV8KeD3rdbkqcz+GNzYVVtGed7z7arHfGP0t3D1nmS7AXsD3x3xGX7rIskvwRcBLymqh6eaa+qe7t/7wT+D4O/+OOy3dqq6rtD9fwl8LxRl+27tiFvYNZH75632/bMV/tO0SVJkmcz+F2eXlXfnWkf2mYPAKsZ3+nOkVTVlqr6QTd8FbB3kiXsJNuts619rZftlmRvBqH/oar61ByzjHd/6+NiRV8vBp9Q7mTwkX/mAtCzZs1zPk+8uPuxbvhZPPHi7p2M7+LuKHU9l8HFq6NmtR8APKUbXgLcwRgvao1Y27Kh4dcB19fjF46+3dV4QDd84CRr6+Y7hsHFtUxqu3Xvu5z5L1L+Mk+82HbjJLbZiLX9DINrWC+a1f40YL+h4S8x6Cl3krUdPPN7ZBCef99tw5H2hT5r66bvz+A6wNMmtd26n/8K4E+3Mc9Y97exbtRJvBhc3f4GgxC9qGv7YwZH0QD7AB/vdvwbgSOGlr2oW+524NUTrusa4H5gQ/e6smt/EXBLt6PfApwzhW32J8BtXQ3XAscMLfvvum35TeAtk66tG18FvGPWcr1uNwZHfJuAHzM4b3oOcB5wXjc9DB4o9K1u/SsmuM22V9tfAg8O7WvruvYjuu311e73fdEUartgaF+7nqE/TnPtC5OsrZvnzQxuAhlertftxuBUXAE3D/3OTulzf7PLBklqzK52jl+StIMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8akqSVUl+d9p1SNNk8Es96fpQ32O+cWla3Am120tyUQYPcfkicHTXdnzXq+fNSVYnOaBr/9kk13S9R65PcuQ87/n0DB6os757QMfpXfvy7mEiVwC3AifOGj80ybu7XkVvS/KfuuVenuTTQ+//yiSre90wapbf3NVuLcnzgA8AL2TQH8x64D3AvwV+q6quS/LHwE9W1YVJbmDQPcTqJPsAe9Tjz1EYft+9GDxYZ0vXydj1DHqZPIxBnzMvqqrru252t453yx5YVd9LsiewBvgPDL6GvxE4sao2J/kwgx5TP9PTplHDPOLX7u5EYHVV/agGXd1eyaCjrWdU1XXdPJcDL02yH3BIVa0GqKp/niv0OwH+S5KbGfTDdAhwUDft7pmQn2f89UnWA19h0HngsTU4Avsg8KYkzwB+gSf2oy+Njf3xS4vzRmAp8Lyq+nGSuxh0EAjww1nzbh1Pcjjwu8Dzq+rBJB8YWu5/MHg2wj8DH6/Bg4SksfOIX7u7zwOvTfLU7oj+NAZB/GCSE7t5zgKuq8Fj7+6ZeZpXkqd0T2Oay/7AA13on8TgFM8ofrJb//eTHMTgmbgAVNV9wH3AHzD4IyD1wiN+7daqan2SjzLoUvcB4MvdpLOB93TBfifwlq79LOC93Xn/HwO/1k2f7UPAZ5LcAqwDvj5iPV9N8pVu/n8A/m6O911aVRtH/BGlBfPirrQTSfLnwFeq6v3TrkW7L4Nf2kkkuYnBaaBX1tCjOaVxM/ilbUhyHIO7bYY9XFUvnEY90jgY/JLUGO/qkaTGGPyS1BiDX5IaY/BLUmP+Pz+3kJCoLpJOAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "ELJunumwVrYw",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "1501b6c1-3072-46fc-e80c-708f41d0f830"
},
"source": [
"print(\"Loading frequency matrix...\")\n",
"t0 = time()\n",
"\n",
"frequency_matrix = pd.DataFrame(doc_array,index=cleaned_text,columns=count_vector.get_feature_names())\n",
"print(\"done in %0.3fs.\" % (time() - t0))\n",
"frequency_matrix"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading frequency matrix...\n",
"done in 0.001s.\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>abl</th>\n",
" <th>abov</th>\n",
" <th>access</th>\n",
" <th>add</th>\n",
" <th>addit</th>\n",
" <th>age</th>\n",
" <th>android</th>\n",
" <th>applic</th>\n",
" <th>aspect</th>\n",
" <th>avail</th>\n",
" <th>background</th>\n",
" <th>calendar</th>\n",
" <th>chang</th>\n",
" <th>code</th>\n",
" <th>color</th>\n",
" <th>connect</th>\n",
" <th>contain</th>\n",
" <th>continu</th>\n",
" <th>creat</th>\n",
" <th>data</th>\n",
" <th>date</th>\n",
" <th>day</th>\n",
" <th>degre</th>\n",
" <th>delet</th>\n",
" <th>design</th>\n",
" <th>develop</th>\n",
" <th>developmentso</th>\n",
" <th>devic</th>\n",
" <th>diari</th>\n",
" <th>document</th>\n",
" <th>easi</th>\n",
" <th>edit</th>\n",
" <th>editor</th>\n",
" <th>extend</th>\n",
" <th>facebook</th>\n",
" <th>fast</th>\n",
" <th>flexibilityand</th>\n",
" <th>flexibilitywhich</th>\n",
" <th>form</th>\n",
" <th>good</th>\n",
" <th>...</th>\n",
" <th>password</th>\n",
" <th>per</th>\n",
" <th>perform</th>\n",
" <th>photo</th>\n",
" <th>post</th>\n",
" <th>postcard</th>\n",
" <th>program</th>\n",
" <th>quit</th>\n",
" <th>read</th>\n",
" <th>reread</th>\n",
" <th>respons</th>\n",
" <th>result</th>\n",
" <th>reusabl</th>\n",
" <th>run</th>\n",
" <th>save</th>\n",
" <th>second</th>\n",
" <th>secur</th>\n",
" <th>seven</th>\n",
" <th>share</th>\n",
" <th>social</th>\n",
" <th>sourc</th>\n",
" <th>stop</th>\n",
" <th>structur</th>\n",
" <th>system</th>\n",
" <th>thi</th>\n",
" <th>three</th>\n",
" <th>time</th>\n",
" <th>titl</th>\n",
" <th>toler</th>\n",
" <th>two</th>\n",
" <th>usabl</th>\n",
" <th>use</th>\n",
" <th>user</th>\n",
" <th>valu</th>\n",
" <th>version</th>\n",
" <th>way</th>\n",
" <th>weather</th>\n",
" <th>week</th>\n",
" <th>without</th>\n",
" <th>year</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>user creat new diari</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user add titl diari</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user add weather diari</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user add date diari</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user add hour diari</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user add second diari</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user save diari</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user add photo diari</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user read diari ha creat</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user share diari form postcard</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user save postcard</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user chang postcard background color</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user chang color post postcard</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user delet diari</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user edit diari ha creat</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user open calendar contain diari</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>system open diari editor</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>applic ha high level avail oper continu seven day per week hour per day without stop</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>applic must high degre flexibilitywhich must abl run devic use kitkat version android oper system abov</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thi applic must high level integr data secur includ secur form password user</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thi applic ha high usabl aspect easi use interfac applic introduc two hoursth new user creat diari less three minut</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thi applic must respons time valu quit good fast access extend perform result toler three second</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>applic must high level interoperabilitybecaus one need thi applic connect social media lineinstagramand facebook</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>document must includ system sourc code develop reread chang addit sourc code made high level maintain</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>applic must high degre flexibilityand use user age seven year abov</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>modul system design program structur way continu use system developmentso reusabl level high</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>26 rows × 104 columns</p>\n",
"</div>"
],
"text/plain": [
" abl abov ... without year\n",
"user creat new diari 0 0 ... 0 0\n",
"user add titl diari 0 0 ... 0 0\n",
"user add weather diari 0 0 ... 0 0\n",
"user add date diari 0 0 ... 0 0\n",
"user add hour diari 0 0 ... 0 0\n",
"user add second diari 0 0 ... 0 0\n",
"user save diari 0 0 ... 0 0\n",
"user add photo diari 0 0 ... 0 0\n",
"user read diari ha creat 0 0 ... 0 0\n",
"user share diari form postcard 0 0 ... 0 0\n",
"user save postcard 0 0 ... 0 0\n",
"user chang postcard background color 0 0 ... 0 0\n",
"user chang color post postcard 0 0 ... 0 0\n",
"user delet diari 0 0 ... 0 0\n",
"user edit diari ha creat 0 0 ... 0 0\n",
"user open calendar contain diari 0 0 ... 0 0\n",
"system open diari editor 0 0 ... 0 0\n",
"applic ha high level avail oper continu seven d... 0 0 ... 1 0\n",
"applic must high degre flexibilitywhich must ab... 1 1 ... 0 0\n",
"thi applic must high level integr data secur in... 0 0 ... 0 0\n",
"thi applic ha high usabl aspect easi use interf... 0 0 ... 0 0\n",
"thi applic must respons time valu quit good fas... 0 0 ... 0 0\n",
"applic must high level interoperabilitybecaus o... 0 0 ... 0 0\n",
"document must includ system sourc code develop ... 0 0 ... 0 0\n",
"applic must high degre flexibilityand use user ... 0 1 ... 0 1\n",
"modul system design program structur way contin... 0 0 ... 0 0\n",
"\n",
"[26 rows x 104 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 8
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Ngzf208WglCO"
},
"source": [
"### Visualisasi"
]
},
{
"cell_type": "code",
"metadata": {
"id": "69tGYAWpgjU_",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 370
},
"outputId": "a99d3806-435d-481f-b8f8-0e9d70e873ff"
},
"source": [
"import umap\n",
"\n",
"print(\"Loading Visualisation...\")\n",
"t0 = time()\n",
"\n",
"visualisasi_bow = doc_array\n",
"embedding = umap.UMAP(n_neighbors=3, min_dist=0.1, random_state=4).fit_transform(visualisasi_bow)\n",
"plt.figure(figsize=(7,5))\n",
"plt.scatter(embedding[:, 0], embedding[:, 1], c = None, s = 50 ,edgecolors=None)\n",
"plt.xlabel('widht')\n",
"plt.ylabel('height')\n",
"plt.show()\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Visualisation...\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcUAAAE9CAYAAAB6AHnwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAXs0lEQVR4nO3df5DcdX3H8dcre1zkLoBAAiZAPIYiEhEQLsFftIKxRRoaqVpg0AKDQ8tI/VFtC6jFgiJSf9TpOFSqEFoRdFAGGhAUxEJH6uWCEEICSuGA/IAkZASS6CV39+4f+7lwd9nb283t7vd7u8/HzM3u97u7333fJrev/fz6riNCAABAmpZ1AQAA5AWhCABAQigCAJAQigAAJIQiAAAJoQgAQNKWdQG1MHPmzOjq6sq6DABAjixfvnxTRMyq5jFNEYpdXV3q7e3NugwAQI7Yfqbax9B9CgBAQigCAJBkGoq2r7O9wfbKEfs+b3ut7YfTz6lZ1ggAaB1ZtxSXSDqlxP6vR8Sx6efOBtcEAGhRmYZiRNwvaXOWNQAAMCzrluJ4LrK9InWv7pt1MQCA1pDHULxG0mGSjpW0XtJXS93J9gW2e233bty4sZH17ZYt/QO6uedZXfXj1bq551lt6R/IuiQAwBjO+vsUbXdJWhoRR1Vz20jd3d2R53WKy/o269zrexQhbds+qI72gmxpyXkLNL9rv6zLA4CmZHt5RHRX85jctRRtzx6xebqklePddyrY0j+gc6/v0db+QW3bPiipGIxb+wfTflqMAJAXWS/JuEnSg5KOsL3G9vmSrrb9qO0Vkk6S9Mksa5yspY+s03iN8Qhp6Yp1jS0IADCuTE/zFhFnldj9nYYXUkNb+ge09JF16ntxq7r279SvX3hlZwtxrG3bB9W3aVuDKwQAjKcpzn2aF6XGDgeHQtPbpql/YGiX+3e0F9Q1s2OXIF10zBzNmM4/DQA0Gu+8NTJy7HDYeC3EYbY0e589dcKV94wK0ivuWMUkHADIQO4m2kxV5cYOp7dNU3ub1dFekFRsIXZOL+ias4/XhTcuZxIOAOQELcUa6Xtx67gtw/6BIX3knYfq8ANnqG/TNnXN7NCio+fovyqYhHPG/Ll1rBoAMBKhuBtKjQF27d+pjvZCyWDsaC/o8ANn7BJw5YJ0dybh1GJskvFNAK2Md7sqlZpMc8Udq3TN2cfLLv0YW1p09Jxd9r9unz3VXrC2D+7aXByehDPZuqoZm6zFMQBgKmNMcYSJTsVWbiH+hTcu1zVnH6/O6YVdxg6XnLdAnWNaW8v6Nuvqu1aXDERp/CAdr+7JniCAkwwAAC3FnSppJU20EH/9S79Tz6ULtXTFulFjh2MDcTiAtm3fdZmGJHW2lw7S8VRygoCJxiZrcYypgi5iAOPhnUDll1Oce32Pei5dqM7pbRWNAXZOb5tUALUXrH947xFVdVfWYmyy1uObjVJtwNFFDKAcQlHlQ2rHwJB+uPw5/eXbD51wMk2lY4DlAmj7YGj9b/tL3jZeAFRaV7kAqdXv1kjVBlylH34AtC7GFDVxSF1xx2ot69usRcfMqXoyTSnDAVTKeAG0rG+zTrjyHl2+dJX+7b+f0uVLV+mEK++puK5yj5dUs9+tUXZnDJTz0AKYCKGo8iElSTsGQ+de3yNLaayvssk046k2gCYKgInqCmnCAJkxva0mv1uj7E7ATdUuYgCNk693uowsOmaOrrhjVdn7jJxsUslkmnKGA2i871gce6xKJ8GMV9fNPc9W9Pj5XftN+ndrlN0JuHJdxG3TrA2v/F5b0gcEAK2Jv369GlJnXfugSpy3W9LoN9pKJtNMZGwAzd5nukLWvatf0P9t2DJqvK/SABivrmoCpBa/WyPszhhouQ8/A0OhOx9dr7see55JN0ALo/s0md+1nz636E1qL5Tu16zHZJPhADr5yAP05buf0JfverzkeN/ujEGONNnH59HujoF++ITXa4+CtUeJf+ff7RhiXSbQ4gjFEd5//MHao630SzIwNKSTjjig5s9ZyYSRyU6CmWqTaCpR7Rjo8ESj//jfZ7RjMDQ0NE5/sph0A7QyQnGEkW+008eEo2Wd9NWf72y91Uol44WTnQQz1SbRVGq4C/qy0+bpwj86TJedNk89ly7cpeuz1AePcU4kJIlJN0Arm5rvhjU2dv3e0oveqVO+8cCo+/QPDKl/oPbr2Sod75vsJJipNImmGpM9WUIpU7VLGcDkTe13xBootQB8YGhIVun+xlqf8qyaCSOTnQQzVSbR1Fq5Dx6lTNUuZQCT19Ldp+ON520fCPWPMw211l1rzTjelzcTrUMdnlzVDF3KACanpf/yq+1Wk2rftVbtmkVUr9xSjI72abr4lCO1/qXfN02XMoDd19J//dV2q0n1ab0163hfXkz0wYM1iQCGtfS7brnxvOHZp4VpbkjrrVXH+xqFDx4AKtHS7wjlutXaCtZ9n3qX7ntiA2+iTYIPHgAm0tLv8BN1qx2w92t4EwWAFtLSoSjRrQYAeBXv/KJbDQBQ1NLrFAEAGIlQBAAgIRQBAEgIRQAAEkIRAICEUAQAICEUAQBICEUAABJCEQCAJNNQtH2d7Q22V47Yt5/tn9r+TbrcN8saAQCtI+uW4hJJp4zZd7GkeyPicEn3pm0AAOou01CMiPslbR6ze7GkG9L1GyS9r6FFAQBaVtYtxVIOjIj16frzkg7MshgAQOvIYyjuFBEhKUrdZvsC2722ezdu3NjgygAAzSiPofiC7dmSlC43lLpTRFwbEd0R0T1r1qyGFggAaE55DMXbJZ2Trp8j6bYMawEAtJCsl2TcJOlBSUfYXmP7fElXSXqP7d9IWpi2AQCou7Ysnzwizhrnpnc3tBAAAJTP7lMAADJBKAIAkBCKAAAkhCIAAAmhCABAQigCAJAQigAAJIQiAAAJoQgAQEIoAgCQEIoAACSEIgAACaEIAEBCKAIAkBCKAAAkhCIAAAmhCABAQigCAJAQigAAJIQiAAAJoQgAQEIoAgCQEIoAACSEIgAACaEIAEBCKAIAkBCKAAAkhCIAAAmhCABAQigCAJAQigAAJIQiAAAJoQgAQEIoAgCQEIoAACRtWRcwHtt9kl6RNChpICK6s60IANDschuKyUkRsSnrIgAArYHuUwAAkjyHYkj6ie3lti/IuhgAQPPLc/fpOyNire0DJP3U9uMRcf/wjSkoL5CkuXPnZlUjAKCJ5LalGBFr0+UGSbdKWjDm9msjojsiumfNmpVFiQCAJpPLULTdaXuv4euS/ljSymyrAgA0u7x2nx4o6VbbUrHG70XEXdmWBABodrkMxYh4StIxWdcBAGgtuew+BQAgC4QiAAAJoQgAQEIoAgCQEIoAACSEIgAACaEIAEBCKAIAkBCKAAAkhCIAAAmhCABAQigCAJAQigAAJIQiAAAJoQgAQEIoAgCQEIoAACSEIgAACaEIAEBCKAIAkFQUirb/s5J9AABMZZW2FN80csN2QdLxtS8HAIDslA1F25fYfkXS0bZfTj+vSNog6baGVAgAQIOUDcWI+FJE7CXpnyNi7/SzV0TsHxGXNKhGAAAaoq2SO0XEJbYPkvT6kY+JiPvrVRgAAI1WUSjavkrSmZJWSRpMu0MSoQgAaBoVhaKk0yUdERH99SwGAIAsVTr79ClJe9SzEAAAsla2pWj7X1XsJt0m6WHb90ra2VqMiI/VtzwAABpnou7T3nS5XNLtda4FAIBMlQ3FiLihUYUAAJC1SmefPqpiN+pIL6nYkvxCRLxY68IAAGi0Smef/ljFpRjfS9tnSuqQ9LykJZJOq3llAAA0WKWhuDAijhux/ajthyLiONsfqkdhAAA0WqVLMgq2Fwxv2J4vqZA2B2peFQAAGai0pfgRSdfZniHJkl6W9BHbnZK+VK/iAABopErPfbpM0ptt75O2Xxpx8w/qUZjtUyR9Q8UW6bcj4qp6PA8AAMMmWrz/oYj4ru2/HbNfkhQRX6tHUen7Gr8p6T2S1khaZvv2iFhVj+cDAECauKXYmS73qnchYyyQ9GREPCVJtm+WtFjFE5IDAFAXEy3e/1a6/KfGlLPTQZKeG7G9RtIJI+9g+wJJF0jS3LlzG1cZAKBpVTT71PYbbN9re2XaPtr2Z+tbWnkRcW1EdEdE96xZs7IsBQDQJCpdkvHvki6RtEOSImKFigv462WtpENGbB+c9gEAUDeVhmJHRPSM2VfP9YnLJB1u+1Db7SoGMCckBwDUVaXrFDfZPkzp/Ke2PyBpfb2KiogB2xdJulvFJRnXRcRj9Xo+AACkykPxo5KulfRG22slPS3p7LpVJSki7pR0Zz2fAwCAkSoNxbWSrpd0n6T9VDyjzTmSLq9TXQAANFyloXibpN9KekjSuvqVAwBAdioNxYMj4pS6VgIAQMYqnX36C9tvrmslAABkbKJznz6q4ozTNknn2X5KUr+K35QREXF0/UsEAKAxJuo+XdSQKgAAyIGJzn36TKMKAQAga5WOKQIA0PQIRQAAEkIRAICEUAQAICEUAQBICEUAABJCEQCAhFAEACAhFAEASAhFAAASQhEAgIRQBAAgIRQBAEgIRQAAEkIRAICEUAQAICEUAQBICEUAABJCEQCAhFAEACAhFAEASAhFAAASQhEAgIRQBAAgIRQBAEgIRQAAEkIRAICEUAQAIMldKNr+vO21th9OP6dmXRMAoDW0ZV3AOL4eEV/JuggAQGvJXUsRAICs5DUUL7K9wvZ1tvfNuhgAQGvIJBRt32N7ZYmfxZKukXSYpGMlrZf01XGOcYHtXtu9GzdubGD1AIBm5YjIuoZx2e6StDQijip3v+7u7ujt7W1ITQCAqcH28ojoruYxues+tT17xObpklZmVQsAoLXkcfbp1baPlRSS+iT9VbblAABaRe5CMSI+nHUNAIDWlLvuUwAAskIoAgCQEIoAACSEIgAACaEIAEBCKAIAkBCKAAAkhCIAAAmhCABAQigCAJAQigAAJIQiAAAJoQgAQEIoAgCQEIoAACSEIgAACaEIAEBCKAIAkBCKAAAkhCIAAAmhCABAQigCAJAQigAAJIQiAAAJoQgAQEIoAgCQEIoAACSEIgAACaEIAEBCKAIAkBCKAAAkhCIAAAmhCABAQigCAJAQigAAJIQiAABJJqFo+4O2H7M9ZLt7zG2X2H7S9hO2/ySL+gAArakto+ddKenPJX1r5E7b8ySdKelNkuZIusf2GyJisPElAgBaTSYtxYhYHRFPlLhpsaSbI6I/Ip6W9KSkBY2tDgDQqvI2pniQpOdGbK9J+3Zh+wLbvbZ7N27c2JDiAADNrW7dp7bvkfS6Ejd9JiJum+zxI+JaSddKUnd3d0z2eAAA1C0UI2LhbjxsraRDRmwfnPYBAFB3ees+vV3Smban2z5U0uGSejKuCQDQIrJaknG67TWS3ibpDtt3S1JEPCbpB5JWSbpL0keZeQoAaJRMlmRExK2Sbh3nti9K+mJjKwIAIH/dpwAAZIZQBAAgIRQBAEgIRQAAEkIRAICEUAQAICEUAQBICEUAABJCEQCAJKsvGQYAtKAt/QNa+sg69b24VV37d2rRMXM0Y3p+oig/lQAAmtqyvs069/oeRUjbtg+qo72gK+5YpSXnLdD8rv2yLk8S3acAgAbY0j+gc6/v0db+QW3bXvyeh23bB7W1fzDtH8i4wiJCEQBQd7csX6MdA0Mlb4uQlq5Y1+CKSqP7FABQtWrGBpf1bdYXlj6mcTJR27YPqm/TtjpWWzlCEQBQlWrGBoe7TccLREnqaC+oa2ZHnauuDN2nAICKVTs2uPSRdYoof0xbWnT0nHqVXBVCEQBQsXIhV2pssO/FrTvDs5Q9CtaS8xaoMyfLMghFAEDFyoXc2LHBLf0D2vByv9qmueT92wvW5xYdmZvlGBJjigAAVT5xpmv/TnW0F0oG48ixweFxx6Gh0MBQ6ablHm3T9P7jDqntLzJJhCIAtIByoVfNxJlFx8zRFXesKvkcA0NDeuL5V7TkF326+q7V2ra99OyaPfcoaNo07dJtmoez3TgmGgGdArq7u6O3tzfrMgAgl0qFnl0MpSNn760TrrxHW/t3bfl1Ti+o59KFu4z3jT3e9LZp6h8Y2nnZXrC2D5bOlrZp0uJjD9Lli48addxyNe5u96rt5RHRXc1jGFMEgCY20WzRHy5/rqqJM5I0v2s/9Vy6UJedNk/nv+PQnfv707qL8QJRkgaGpAP2es0uLcS8nO2GUASAJjbRbNGfPb6h4okzI3VOb9MZ8+fq8ANnqDDORJpSSq1JrHZGaz0RigDQxCaaLSpZHe2FkrdXsqh+oiUXY5Vak1jNjNZ6IxQBoIkNzxYtpaO9oJPfeIA8TkOvkkX15Y4vFZddDD9X5/RCyTWJE9XYyLPdEIoA0MQWHTOnbOh94PiDU1AVdgZTuQCr5vid7QV9dtGRuvCPDtNlp81Tz6ULS06amajGRp7thtmnANDkKpnZubV/QEtXrFPfpm3qmtmhRUfPqfgsM7WYOZqX2aeEIgC0gMmEXqOOX+saCUUAABLWKQIAMAmEIgAACaEIAEBCKAIAkBCKAAAkhCIAAAmhCABA0hTrFG1vlPRM1nXU0ExJm7IuIkd4PUbj9XgVr8VovB6jHRERe1XzgMZ+pXGdRMSsrGuoJdu91S44bWa8HqPxeryK12I0Xo/RbFd9Vhe6TwEASAhFAAASQjGfrs26gJzh9RiN1+NVvBaj8XqMVvXr0RQTbQAAqAVaigAAJIRiztg+xfYTtp+0fXHW9WTJ9iG277O9yvZjtj+edU1Zs12w/SvbS7OuJWu2X2v7FtuP215t+21Z15QV259MfyMrbd9k+zVZ19RItq+zvcH2yhH79rP9U9u/SZf7VnIsQjFHbBckfVPSeyXNk3SW7XnZVpWpAUmfioh5kt4q6aMt/npI0sclrc66iJz4hqS7IuKNko5Ri74utg+S9DFJ3RFxlKSCpDOzrarhlkg6Zcy+iyXdGxGHS7o3bU+IUMyXBZKejIinImK7pJslLc64psxExPqIeChdf0XFN72Dsq0qO7YPlvSnkr6ddS1Zs72PpD+U9B1JiojtEfHbbKvKVJukPW23SeqQtC7jehoqIu6XtHnM7sWSbkjXb5D0vkqORSjmy0GSnhuxvUYtHAIj2e6S9BZJv8y2kkz9i6S/lzSUdSE5cKikjZKuT93J37bdmXVRWYiItZK+IulZSeslvRQRP8m2qlw4MCLWp+vPSzqwkgcRisg92zMk/VDSJyLi5azryYLtRZI2RMTyrGvJiTZJx0m6JiLeImmrKuweazZprGyxih8U5kjqtP2hbKvKlygus6hoqQWhmC9rJR0yYvvgtK9l2d5DxUC8MSJ+lHU9GXqHpD+z3adit/rJtr+bbUmZWiNpTUQM9xzcomJItqKFkp6OiI0RsUPSjyS9PeOa8uAF27MlKV1uqORBhGK+LJN0uO1DbberOFh+e8Y1Zca2VRwzWh0RX8u6nixFxCURcXBEdKn4/+JnEdGyrYGIeF7Sc7aPSLveLWlVhiVl6VlJb7Xdkf5m3q0WnXQ0xu2SzknXz5F0WyUPaooTgjeLiBiwfZGku1WcQXZdRDyWcVlZeoekD0t61PbDad+lEXFnhjUhP/5G0o3pA+RTks7LuJ5MRMQvbd8i6SEVZ2z/Si12ZhvbN0l6l6SZttdIukzSVZJ+YPt8Fb9F6S8qOhZntAEAoIjuUwAAEkIRAICEUAQAICEUAQBICEUAABJCEZiibN9p+7Ul9n/e9qfT9Z/b7i5xn2Ntn9qIOoGphFAEpqiIOHUSJ8E+VhKhCIxBKAI5ZfvvbH8sXf+67Z+l6yfbvtF2n+2Zad9nbP/a9v9IOmLMoT5ouyfdfmJa7H65pDNsP2z7jEb+XkCeEYpAfj0g6cR0vVvSjHQu2BMl3T98J9vHq3jqt+HW3/wxx2mLiAWSPiHpsvS1ZP8o6fsRcWxEfL++vwYwdRCKQH4tl3S87b0l9Ut6UMVwPFHFwBx2oqRbI2Jb+haRsefLHT6R+nJJXXWtGJjiOPcpkFMRscP205LOlfQLSSsknSTpD1TdCZ/70+Wg+JsHyqKlCOTbA5I+rWJ36QOS/lrSr2L0SYvvl/Q+23va3kvSaRUc9xVJe9W6WGCqIxSBfHtA0mxJD0bEC5J+r9Fdp4qIhyR9X9Ijkn6s4leQTeQ+SfOYaAOMxrdkAACQ0FIEACAhFAEASAhFAAASQhEAgIRQBAAgIRQBAEgIRQAAEkIRAIDk/wHfwQX8OSDNgAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 504x360 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
},
{
"output_type": "stream",
"text": [
"done in 5.967s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "tapavf6vNh3o"
},
"source": [
"# Term Frequency - Inverse Document Frequency"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "XUU4UEbHNwCM"
},
"source": [
"### Feature Generation using TF-IDF"
]
},
{
"cell_type": "code",
"metadata": {
"id": "8QcgR58qNrey",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 318
},
"outputId": "6ed476ae-5fea-47a1-d84c-109522910687"
},
"source": [
"import numpy as np\n",
"import math\n",
"\n",
"def l2_normalizer(vec):\n",
" denom = np.sum([el**2 for el in vec])\n",
" return [(el / math.sqrt(denom)) for el in vec]\n",
"\n",
"doc_term_matrix_l2 = []\n",
"for vec in doc_array:\n",
" doc_term_matrix_l2.append(l2_normalizer(vec))\n",
"\n",
"print ('A regular old document term matrix: ') \n",
"print (np.matrix(doc_array))\n",
"print ('\\nA document term matrix with row-wise L2 norms of 1:')\n",
"print (np.matrix(doc_term_matrix_l2))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"A regular old document term matrix: \n",
"[[0 0 0 ... 0 0 0]\n",
" [0 0 0 ... 0 0 0]\n",
" [0 0 0 ... 0 0 0]\n",
" ...\n",
" [0 0 0 ... 0 0 0]\n",
" [0 1 0 ... 0 0 1]\n",
" [0 0 0 ... 0 0 0]]\n",
"\n",
"A document term matrix with row-wise L2 norms of 1:\n",
"[[0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0.30151134 0. ... 0. 0. 0.30151134]\n",
" [0. 0. 0. ... 0. 0. 0. ]]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "7aYrvfjm-2Xg",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "99d6bf2a-1a6b-414d-e971-26ef64b7db1e"
},
"source": [
"plt.hist(doc_term_matrix_l2)\n",
"plt.xlabel('term matrix l2')\n",
"plt.ylabel('Frequency')\n",
"plt.show"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<function matplotlib.pyplot.show>"
]
},
"metadata": {
"tags": []
},
"execution_count": 11
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAATsUlEQVR4nO3dfbRddX3n8fdHIgUUQSRFCujFmGWgghWDtXV0rNiWqoXUMq2OD9FBU0ad2to6pNYp6cy41LH40BYfqFqD1YpSR2ixOphAfZgCBonyEJWooKEoqa3AYEfK+J0/zs72Em+SnZNzzr733PdrrbPu3r+zz9nfX06Sz92/vc9vp6qQJAngfn0XIEmaPwwFSVLLUJAktQwFSVLLUJAktZb0XcC+OPzww2tmZqbvMiRpQbnmmmv+saqWzvXcgg6FmZkZNm3a1HcZkrSgJLllV885fCRJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqTW2EIhyXuS3J7k+llthyW5LMlNzc8HN+1J8sdJtib5YpKTxlWXJGnXxnmk8F7g1J3a1gIbqmo5sKFZB/glYHnzWAO8fYx17da2tZ++z/rM2kvb5S0rjpt0OZI0UWMLhar6FPBPOzWfDqxvltcDq2a1X1ADVwKHJjlyXLVJkuY26XMKR1TVbc3yt4AjmuWjgG/O2m5b0/YjkqxJsinJpu3btw9dyMzaS2HdIZyw/gS2rDiO887ayLm//kwA1q1bt8fX73xEIUnToLcTzTW4OfRe3yC6qs6vqpVVtXLp0jkn+ZMkDWnSofDtHcNCzc/bm/ZbgWNmbXd009abDRuX8dDLNw9WmiMK4D5HFJI0bSYdCpcAq5vl1cDFs9pf0FyF9ATgjlnDTJKkCRnb/RSS/CXwFODwJNuAc4DXAx9KciZwC/BrzeYfA54ObAW+B7xoXHVJknZtbKFQVc/ZxVOnzLFtAS8bVy2SpG78RrMkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahsIYnHfWxr5LkKShGAqSpJahsI/amVR34kyqkhYiQ0GS1DIURmjHPRckaaEyFCRJLUNhxLasOK7vEiRpaIaCJKllKOyDDRuX9V2CJI2UoSBJahkKo7LukL4rkKR9ZihIklqGgiSpZShIklqGgiSpZShIklqGgiSpZSiMwMzaS/suQZJGwlCQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSq5dQSPLbSW5Icn2Sv0xyQJJjk1yVZGuSC5Ps30dtkrSYTTwUkhwF/CawsqoeDewHPBt4A/Dmqnok8M/AmZOuTZIWu76Gj5YAByZZAhwE3AY8FbioeX49sKqn2iRp0Zp4KFTVrcAfAd9gEAZ3ANcA362qe5vNtgFHzfX6JGuSbEqyafv27ZMoWZIWjT6Gjx4MnA4cC/wE8ADg1K6vr6rzq2plVa1cunTpmKqUpMWpj+GjpwFfr6rtVfWvwEeAJwKHNsNJAEcDt/ZQmyQtan2EwjeAJyQ5KEmAU4AbgcuBM5ptVgMX91CbJC1qfZxTuIrBCeXPA9c1NZwPnA28MslW4CHAuyddmyQtdkv2vMnoVdU5wDk7NX8NeHwP5UiSGn6jWZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSa1OoZDkhHEXIknqX9cjhbcluTrJS5McMtaKJEm96RQKVfUk4LnAMcA1ST6Q5OfHWpkkaeI6n1OoqpuA1wBnA/8W+OMkX0ryrHEVJ0marK7nFE5M8mZgC/BU4Jer6rhm+c1jrE+SNEFLOm73J8C7gFdX1b/saKyqf0jymrFUJkmauK7DR88APrAjEJLcL8lBAFX1vr3daZJDk1zUDD9tSfIzSQ5LclmSm5qfD97b95Uk7ZuuofBJ4MBZ6wc1bcN6K/DxqloBPIbBsNRaYENVLQc2NOuSpAnqGgoHVNX/2bHSLB80zA6bS1qfDLy7ea97quq7wOnA+maz9cCqYd5fkjS8rqFwd5KTdqwkeRzwL7vZfneOBbYDf57k2iTvSvIA4Iiquq3Z5lvAEXO9OMmaJJuSbNq+ffuQJUiS5tI1FH4L+HCSTyf5DHAh8PIh97kEOAl4e1U9FribnYaKqqqAmuvFVXV+Va2sqpVLly4dsgRJ0lw6XX1UVZ9LsgJ4VNP05ar61yH3uQ3YVlVXNesXMQiFbyc5sqpuS3IkcPuQ7y9JGtLeTIh3MnAig9/yn5PkBcPssKq+BXwzyY6AOQW4EbgEWN20rQYuHub9JUnD63SkkOR9wDJgM/D/muYCLhhyv/8JeH+S/YGvAS9iEFAfSnImcAvwa0O+tyRpSF2/vLYSOL4Z699nVbW5ec+dnTKK95ckDafr8NH1wEPHWYgkqX9djxQOB25McjXw/R2NVXXaWKqSJPWiayisG2cRkqT5oeslqX+X5OHA8qr6ZDPv0X7jLU2SNGldp85+CYPvE7yzaToK+Oi4ipIk9aPrieaXAU8E7oT2hjs/Pq6iJEn96BoK36+qe3asJFnCLqahkCQtXF1D4e+SvBo4sLk384eBvx5fWZKkPnQNhbUMZja9DvgN4GMM7tcsSZoiXa8++gHwZ81DkjSlus599HXmOIdQVY8YeUWSpN7szdxHOxwA/DvgsNGXI0nqU6dzClX1nVmPW6vqLcAzxlybJGnCug4fnTRr9X4Mjhy6HmVIkhaIrv+xnztr+V7gZrzfgSRNna5XH/3cuAuRJPWv6/DRK3f3fFW9aTTlSJL6tDdXH53M4D7KAL8MXA3cNI6iJEn96BoKRwMnVdVdAEnWAZdW1fPGVZgkafK6TnNxBHDPrPV7mjZJ0hTpeqRwAXB1kv/ZrK8C1o+nJElSX7peffTaJH8LPKlpelFVXTu+siRJfeg6fARwEHBnVb0V2Jbk2DHVJEnqSdfbcZ4DnA38XtN0f+AvxlWUJKkfXY8UfgU4DbgboKr+ATh4XEVJkvrRNRTuqaqimT47yQPGV5IkqS9dQ+FDSd4JHJrkJcAn8YY7kjR19nj1UZIAFwIrgDuBRwF/UFWXjbk2SdKE7TEUqqqSfKyqTgAMAkmaYl2Hjz6f5OSxViJJ6l3XbzT/NPC8JDczuAIpDA4iThxXYZKkydttKCR5WFV9A/jFCdUjSerRnoaPPgpQVbcAb6qqW2Y/9mXHSfZLcm2Sv2nWj01yVZKtSS5Msv++vL8kae/tKRQya/kRI973K4Ats9bfALy5qh4J/DNw5oj3J0nagz2FQu1ieZ8kORp4BvCuZj3AU4GLmk3WM5iJVZI0QXs60fyYJHcyOGI4sFmGH55oftCQ+30L8J/54VQZDwG+W1X3NuvbgKPmemGSNcAagIc97GFD7l6SNJfdHilU1X5V9aCqOriqljTLO9aHCoQkzwRur6prhnl9VZ1fVSurauXSpUuHeQtJ0i50vSR1lJ4InJbk6cABwIOAtzKYQmNJc7RwNHBrD7VJ0qK2N/dTGImq+r2qOrqqZoBnAxur6rnA5cAZzWargYsnXZskLXYTD4XdOBt4ZZKtDM4xvLvneiRp0elj+KhVVVcAVzTLXwMe32c9krTYzacjBUlSzwwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVJr4qGQ5Jgklye5MckNSV7RtB+W5LIkNzU/Hzzp2iRpsevjSOFe4Heq6njgCcDLkhwPrAU2VNVyYEOzLkmaoImHQlXdVlWfb5bvArYARwGnA+ubzdYDqyZdmyQtdr2eU0gyAzwWuAo4oqpua576FnDELl6zJsmmJJu2b98+kTolabHoLRSSPBD4K+C3qurO2c9VVQE11+uq6vyqWllVK5cuXTqBSiVp8eglFJLcn0EgvL+qPtI0fzvJkc3zRwK391GbJC1mfVx9FODdwJaqetOspy4BVjfLq4GLJ12bJC12fRwpPBF4PvDUJJubx9OB1wM/n+Qm4GnNurRLM2sv7bsEaeosmfQOq+ozQHbx9CmTrEWSdF9+o1mS1DIUJEktQ0Ea1rpD+q5AGjlDQZLUMhQkSS1DQZLUMhSkvbBh47K+S5DGylCQJLUMBUlSy1CQJLUMBS0o69at+5G2LSuOm3whjRPWn9DbvqVxMBQkSS1DQdpLD71884+0nfvrz+yhEmn0DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJvdiwcdkPJxdcd0g7Dfl5Z210gsEeGQqSpJahIGkqzay9tD0C2bLiOM47ayMw942adrZt7afHXN38ZShIklqGgiRNyOwjkJm1l7bLfd5SdmeGgiSpZShImrd2Nf4/n36znjbzKhSSnJrky0m2Jlnbdz2StNjMm1BIsh9wHvBLwPHAc5Ic329VkuaL2WPwk3Kf71JA+10KYGq/SzFvQgF4PLC1qr5WVfcAHwRO77kmSVpUUlV91wBAkjOAU6vqxc3684GfrqqX77TdGmBNs/oo4Mt7sZvDgX8cQbnzjf1aWKaxX9PYJ5jefj28qpbO9cSSSVeyr6rqfOD8YV6bZFNVrRxxSb2zXwvLNPZrGvsE09uv3ZlPw0e3AsfMWj+6aZMkTch8CoXPAcuTHJtkf+DZwCU91yRJi8q8GT6qqnuTvBz4BLAf8J6qumHEuxlq2GkBsF8LyzT2axr7BNPbr12aNyeaJUn9m0/DR5KknhkKkqTWVIbCnqbLSPJjSS5snr8qyczkq9x7Hfr15CSfT3Jv872Pea9Dn16Z5MYkX0yyIcnD+6hzb3Xo11lJrkuyOclnFsq397tORZPkV5NUkgVxOWeHz+uFSbY3n9fmJC/uo86JqKqpejA4Sf1V4BHA/sAXgON32ualwDua5WcDF/Zd94j6NQOcCFwAnNF3zSPq088BBzXL/3GKPqsHzVo+Dfh433WPol/NdgcDnwKuBFb2XfeIPq8XAn/ad62TeEzjkUKX6TJOB9Y3yxcBpyTJBGscxh77VVU3V9UXgR/0UeAQuvTp8qr6XrN6JYPvr8x3Xfp156zVBwAL4YqPrlPR/DfgDcD/nWRx+8ApdmaZxlA4CvjmrPVtTduc21TVvcAdwEMmUt3wuvRrodnbPp0J/O1YKxqNTv1K8rIkXwX+B/CbE6ptX+yxX0lOAo6pqsnPXje8rn8Pf7UZxrwoyTFzPD8VpjEUNIWSPA9YCbyx71pGparOq6plwNnAa/quZ18luR/wJuB3+q5lDP4amKmqE4HL+OFIw9SZxlDoMl1Gu02SJcAhwHcmUt3wpnEakE59SvI04PeB06rq+xOqbV/s7Wf1QWDVWCsajT3162Dg0cAVSW4GngBcsgBONu/x86qq78z6u/cu4HETqm3ipjEUukyXcQmwulk+A9hYzdmkeWwapwHZY5+SPBZ4J4NAuL2HGofRpV/LZ60+A7hpgvUNa7f9qqo7qurwqpqpqhkG54BOq6pN/ZTbWZfP68hZq6cBWyZY32T1faZ7HA/g6cBXGFxR8PtN239l8BcU4ADgw8BW4GrgEX3XPKJ+ncxgPPRuBkc+N/Rd8wj69Eng28Dm5nFJ3zWPqF9vBW5o+nQ58JN91zyKfu207RUsgKuPOn5er2s+ry80n9eKvmse18NpLiRJrWkcPpIkDclQkCS1DAVJUstQkCS1DAVJUstQ0NRIcmiSl/ZdRxdJVu1uZtRmFtUX7MX7vXfHzLhJ3t/M+Hl9kvckuf8oatbiYChomhzKYAbczjLQx7+DVcCcoZBkSVW9o6ouGPK93w+sAE4ADgSmd5pnjZyhoGnyemBZM9/9GwGSvCrJ55qJzP6waZtpfpO+ALgeeFKSLzW/bX+l+U37aUk+m+SmJI/feUfN/PofTXJZkpuTvLy598O1Sa5Mcliz3Uua/X8hyV8lOSjJzzL4Vuwbm1qXJbkiyVuSbAJekWRdkt9NsqR5/VOa93tdktfu7g+hqj5WDQZfzlwIM8tqnjAUNE3WAl+tqp+qqlcl+QVgOYOpkX8KeFySJzfbLgfeVlU/CdwCPBI4l8Fv2CuAfw/8G+B3gVfvYn+PBp7F4JvkrwW+V1WPBf4e2DH085GqOrmqHsNgaoQzq+p/M5hG4VVNrV9ttt2/qlZW1bk7dlCDWXxfCLy9mQPqVOAPu/xhNMNGzwc+3mV7CWBJ3wVIY/QLzePaZv2BDMLgG8AtVXXlrG2/XlXXASS5AdhQVZXkOgY3L5rL5VV1F3BXkjsYzKQJcB2Dmx0BPDrJf2cwtPVA4BO7qffCuRqr6oYk7wP+BviZGsz538XbgE9V1ac7bi8ZCppqAV5XVe+8T+Pg9qt377Tt7NlXfzBr/Qfs+t9Jl9e8F1hVVV9I8kLgKbupd+eaZjsB+C7w47vZppXkHGAp8Btdtpd2cPhI0+QuBtM37/AJ4D8keSBAkqOSdPpPdYQOBm5rhnKeO6t951p3KcmzgMOAJwN/kuTQPWz/YuAXgedU1UK5C5/mCUNBU6OqvgN8trkU841V9b+ADwB/3wwDXUTH/4hH6L8AVwGfBb40q/2DwKuaE9PLdvXiJIczOIH+4qr6CvCnDGZY3Z13AEcw6PfmJH+wLx3Q4uIsqZKklkcKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqTW/wfW3BGdMiI26gAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "ICub0BiC28b_",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 72
},
"outputId": "4ec9d1a7-bef1-44b0-cff2-1826318ef657"
},
"source": [
"import string #allows for format()\n",
" \n",
"def build_lexicon(corpus):\n",
" lexicon = set()\n",
" for doc in corpus:\n",
" lexicon.update([word for word in doc.split()])\n",
" return lexicon\n",
"\n",
"def freq(term, document):\n",
" return document.split().count(term)\n",
"\n",
"def numDocsContaining(word, doclist):\n",
" doccount = 0\n",
" for doc in doclist:\n",
" if freq(word, doc) > 0:\n",
" doccount +=1\n",
" return doccount \n",
"\n",
"def idf(word, doclist):\n",
" n_samples = len(doclist)\n",
" df = numDocsContaining(word, doclist)\n",
" return np.log(n_samples / 1+df)\n",
"\n",
"vocabulary = build_lexicon(cleaned_text)\n",
"mydoclist = cleaned_text\n",
"\n",
"my_idf_vector = [idf(word, mydoclist) for word in vocabulary]\n",
"\n",
"print ('Our vocabulary vector is [' + ', '.join(list(vocabulary)) + ']')\n",
"print ('The inverse document frequency vector is [' + ', '.join(format(freq, 'f') for freq in my_idf_vector) + ']')"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Our vocabulary vector is [document, maintain, program, facebook, color, reusabl, reread, valu, abov, per, made, develop, postcard, without, extend, second, perform, two, quit, background, read, fast, user, date, weather, applic, high, addit, design, thi, abl, seven, devic, diari, year, connect, sourc, good, save, social, less, stop, level, flexibilitywhich, way, post, use, three, secur, includ, minut, system, android, result, add, week, delet, contain, time, titl, modul, day, password, developmentso, form, code, media, photo, new, hoursth, one, creat, access, ha, need, editor, avail, continu, must, edit, interfac, interoperabilitybecaus, hour, degre, data, oper, calendar, structur, version, usabl, kitkat, flexibilityand, toler, introduc, respons, run, chang, share, open, aspect, lineinstagramand, integr, easi, age]\n",
"The inverse document frequency vector is [3.295837, 3.295837, 3.295837, 3.295837, 3.332205, 3.295837, 3.295837, 3.295837, 3.332205, 3.295837, 3.295837, 3.295837, 3.401197, 3.295837, 3.295837, 3.332205, 3.295837, 3.295837, 3.295837, 3.295837, 3.295837, 3.295837, 3.806662, 3.295837, 3.295837, 3.496508, 3.526361, 3.295837, 3.295837, 3.401197, 3.295837, 3.332205, 3.295837, 3.713572, 3.295837, 3.295837, 3.295837, 3.295837, 3.332205, 3.295837, 3.295837, 3.295837, 3.433987, 3.295837, 3.295837, 3.295837, 3.401197, 3.332205, 3.295837, 3.332205, 3.295837, 3.401197, 3.295837, 3.295837, 3.465736, 3.295837, 3.295837, 3.295837, 3.295837, 3.295837, 3.295837, 3.295837, 3.295837, 3.295837, 3.332205, 3.295837, 3.295837, 3.295837, 3.332205, 3.295837, 3.295837, 3.401197, 3.295837, 3.401197, 3.295837, 3.295837, 3.295837, 3.332205, 3.465736, 3.295837, 3.295837, 3.295837, 3.332205, 3.332205, 3.295837, 3.332205, 3.295837, 3.295837, 3.295837, 3.295837, 3.295837, 3.295837, 3.295837, 3.295837, 3.295837, 3.295837, 3.367296, 3.295837, 3.332205, 3.295837, 3.295837, 3.295837, 3.295837, 3.295837]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "RfsovBpnSWqf",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 247
},
"outputId": "46a31ce8-f3e6-4f51-e3f8-310175425b50"
},
"source": [
"import numpy as np\n",
"\n",
"def build_idf_matrix(idf_vector):\n",
" idf_mat = np.zeros((len(idf_vector), len(idf_vector)))\n",
" np.fill_diagonal(idf_mat, idf_vector)\n",
" return idf_mat\n",
"\n",
"my_idf_matrix = build_idf_matrix(my_idf_vector)\n",
"my_idf_matrix"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[3.29583687, 0. , 0. , ..., 0. , 0. ,\n",
" 0. ],\n",
" [0. , 3.29583687, 0. , ..., 0. , 0. ,\n",
" 0. ],\n",
" [0. , 0. , 3.29583687, ..., 0. , 0. ,\n",
" 0. ],\n",
" ...,\n",
" [0. , 0. , 0. , ..., 3.29583687, 0. ,\n",
" 0. ],\n",
" [0. , 0. , 0. , ..., 0. , 3.29583687,\n",
" 0. ],\n",
" [0. , 0. , 0. , ..., 0. , 0. ,\n",
" 3.29583687]])"
]
},
"metadata": {
"tags": []
},
"execution_count": 13
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Pk2SruEu3Fk_",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 178
},
"outputId": "9a60de5f-bf49-4823-a3ac-e63804c3c6f7"
},
"source": [
"doc_term_matrix_tfidf = []\n",
"\n",
"#performing tf-idf matrix multiplication\n",
"for tf_vector in doc_array:\n",
" doc_term_matrix_tfidf.append(np.dot(tf_vector, my_idf_matrix))\n",
"\n",
"#normalizing\n",
"doc_term_matrix_tfidf_l2 = []\n",
"for tf_vector in doc_term_matrix_tfidf:\n",
" doc_term_matrix_tfidf_l2.append(l2_normalizer(tf_vector))\n",
" \n",
"print (vocabulary)\n",
"print (np.matrix(doc_term_matrix_tfidf_l2))\n",
"\n",
"# np.matrix() just to make it easier to look at"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"{'document', 'maintain', 'program', 'facebook', 'color', 'reusabl', 'reread', 'valu', 'abov', 'per', 'made', 'develop', 'postcard', 'without', 'extend', 'second', 'perform', 'two', 'quit', 'background', 'read', 'fast', 'user', 'date', 'weather', 'applic', 'high', 'addit', 'design', 'thi', 'abl', 'seven', 'devic', 'diari', 'year', 'connect', 'sourc', 'good', 'save', 'social', 'less', 'stop', 'level', 'flexibilitywhich', 'way', 'post', 'use', 'three', 'secur', 'includ', 'minut', 'system', 'android', 'result', 'add', 'week', 'delet', 'contain', 'time', 'titl', 'modul', 'day', 'password', 'developmentso', 'form', 'code', 'media', 'photo', 'new', 'hoursth', 'one', 'creat', 'access', 'ha', 'need', 'editor', 'avail', 'continu', 'must', 'edit', 'interfac', 'interoperabilitybecaus', 'hour', 'degre', 'data', 'oper', 'calendar', 'structur', 'version', 'usabl', 'kitkat', 'flexibilityand', 'toler', 'introduc', 'respons', 'run', 'chang', 'share', 'open', 'aspect', 'lineinstagramand', 'integr', 'easi', 'age'}\n",
"[[0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0.29646275 0. ... 0. 0. 0.29646275]\n",
" [0. 0. 0. ... 0. 0. 0. ]]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "drZ6NEQuSatQ",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 141
},
"outputId": "f166d2ad-1db9-42df-89f5-7ad578860737"
},
"source": [
"hasil_tfidf = np.matrix(doc_term_matrix_tfidf_l2)\n",
"print (hasil_tfidf)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"[[0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0.29646275 0. ... 0. 0. 0.29646275]\n",
" [0. 0. 0. ... 0. 0. 0. ]]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "BEyaSuM2_gKr",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "1ad03890-5d49-4b39-fd9f-79cb603447eb"
},
"source": [
"plt.hist(hasil_tfidf)\n",
"plt.xlabel('tfidf')\n",
"plt.ylabel('Frequency')\n",
"plt.show"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<function matplotlib.pyplot.show>"
]
},
"metadata": {
"tags": []
},
"execution_count": 17
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAARWklEQVR4nO3deZBlZX3G8e8DY6IoAZQRKCS2TlAgsoiDWuUSjZqgQdBoREqNWupIxESjfzhRK04lldJEgZgEjRiJo3HfsUSjzqBgKooDjmwTgygmIEJrNINLRPCXP/oMNEPP9Onl3Nvd7/dTdWvOee9Zfi+3efr2e7ZUFZKkduwx7gIkSaNl8EtSYwx+SWqMwS9JjTH4Jakxq8ZdQB/7779/TUxMjLsMSVpWLr744u9X1eqd25dF8E9MTLBly5ZxlyFJy0qS78zU7lCPJDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JasxgwZ/kkCTnJ7kyyRVJXta1b0hyXZKt3etJQ9UgSbqzIc/jvwV4ZVVdkmRv4OIkn+veO7Oq3jTgviVJuzBY8FfV9cD13fRNSbYBBw+1P0lSPyMZ408yATwY+ErX9NIklyY5J8l+u1hnXZItSbZMTk4uaP+nn3wCB56/lW2HHX5b27XrL2Ri/ac469TNt723YcOGOyx/1qmb2bR5DUduPPK25U8/+QSA25ffsM+CapOkURs8+JPcA/gI8PKq2g68FVgDHMPUXwSnz7ReVZ1dVWurau3q1Xe61YQkaZ4GDf4kd2Eq9N9TVR8FqKobqurWqvol8HbgoUPWIEm6oyHP6gnwDmBbVZ0xrf2gaYs9Fbh8qBokSXc25Fk9jwCeA1yWZGvX9mrglCTHAAVcA7x4wBokSTsZ8qyeLwGZ4a3zhtqnJGl2XrkrSY0x+CWpMSs/+Gc4z/7IjUcuaJMHnr919oUkaYla+cEvSboDg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGNBf8mzavGXcJkjRWzQW/JLXO4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfh3tmGfcVcgSYMy+CWpMQa/JDXG4Jekxhj8ktSYwYI/ySFJzk9yZZIrkrysa79nks8luar7d7+hapAk3dmQ3/hvAV5ZVUcADwdOS3IEsB7YVFWHApu6eUnSiAwW/FV1fVVd0k3fBGwDDgZOAjZ2i20EnjJUDZKkOxvJGH+SCeDBwFeAA6rq+u6t7wEH7GKddUm2JNkyOTk5ijIlqQmDB3+SewAfAV5eVdunv1dVBdRM61XV2VW1tqrWrl69eugyJakZgwZ/krswFfrvqaqPds03JDmoe/8g4MYha5Ak3dGQZ/UEeAewrarOmPbWucBzu+nnAp8YqgZJ0p2tGnDbjwCeA1yWZGvX9mrgDcAHk7wA+A7wjAFrkCTtZLDgr6ovAdnF248bar+SpN3zyl1JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1plfwJzly6EIkSaPR9xv/W5JclOQlSfYZtCJJ0qB6BX9VPQp4FnAIcHGS9yZ5wqCVSZIG0XuMv6quAl4LvAr4LeDvkvxHkt8fqjhJ0uLrO8Z/VJIzgW3AbwNPrqrDu+kzd7HOOUluTHL5tLYNSa5LsrV7PWkR+iBJmoO+3/j/HrgEOLqqTquqSwCq6rtM/RUwk3cCx8/QfmZVHdO9zptrwZKkhVnVc7nfA35WVbcCJNkDuGtV/bSq3j3TClV1QZKJRalSkrRo+n7j/zxwt2nze3Vt8/HSJJd2Q0H7zXMbkqR56hv8d62qH++Y6ab3msf+3gqsAY4BrgdO39WCSdYl2ZJky+Tk5Dx2JUmaSd/g/0mSY3fMJHkI8LO57qyqbqiqW6vql8DbgYfuZtmzq2ptVa1dvXr1XHclSdqFvmP8Lwc+lOS7QIADgZPnurMkB1XV9d3sU4HLd7e8JGnx9Qr+qvpqksOAB3ZN36iqX+xunSTvAx4D7J/kWuB1wGOSHAMUcA3w4nnWLUmap77f+AGOAya6dY5NQlW9a1cLV9UpMzS/Y27lSZIWW6/gT/Jupg7KbgVu7ZoL2GXwS5KWpr7f+NcCR1RVDVmMJGl4fc/quZypA7qSpGWu7zf+/YErk1wE/HxHY1WdOEhVkqTB9A3+DUMWIUkanb6nc34xyX2BQ6vq80n2AvYctjRJ0hD63pb5RcCHgbd1TQcDHx+qKEnScPoe3D0NeASwHW57KMu9hypKkjScvsH/86q6ecdMklVMnccvSVpm+gb/F5O8Grhb96zdDwGfHK4sSdJQ+gb/emASuIyp++ucx66fvCVJWsL6ntWz4zbKbx+2HEnS0Preq+fbzDCmX1X3X/SKJEmDmsu9ena4K/AHwD0XvxxJ0tB6jfFX1Q+mva6rqr9l6gHskqRlpu9Qz7HTZvdg6i+AudzLX5K0RPQN7+kPRb+FqadnPWPRq5EkDa7vWT2PHboQSdJo9B3qecXu3q+qMxanHEnS0OZyVs9xwLnd/JOBi4CrhihKkjScvsF/H+DYqroJIMkG4FNV9eyhCpMkDaPvLRsOAG6eNn9z1yZJWmb6fuN/F3BRko91808BNg5TkiRpSH3P6vmrJJ8GHtU1Pb+qvjZcWZKkofQd6gHYC9heVW8Grk1yv4FqkiQNqO+jF18HvAr4s67pLsC/DFWUJGk4fb/xPxU4EfgJQFV9F9h7qKIkScPpG/w3V1XR3Zo5yd2HK0mSNKS+wf/BJG8D9k3yIuDz+FAWSVqWZj2rJ0mADwCHAduBBwJ/XlWfG7g2SdIAZg3+qqok51XVkYBhL0nLXN+hnkuSHDdoJZKkkeh75e7DgGcnuYapM3vC1B8DRw1VmCRpGLsN/iS/XlX/BfzuXDec5BzgBODGqnpQ13ZPpo4XTNA9zKWqfjjXbUuS5m+2oZ6PA1TVd4Azquo701+zrPtO4Pid2tYDm6rqUGBTNy9JGqHZgj/Tpu8/lw1X1QXA/+zUfBK339xtI1M3e5MkjdBswV+7mJ6vA6rq+m76e+zm1s5J1iXZkmTL5OTkIuxakgSzB//RSbYnuQk4qpvenuSmJNsXsuPpVwLv4v2zq2ptVa1dvXr1QnYlSZpmtwd3q2rPRd7fDUkOqqrrkxwE3LjI25ckzWIut2VeDOcCz+2mnwt8YsT7l6TmDRb8Sd4H/DvwwCTXJnkB8AbgCUmuAh7fzUuSRqjvBVxzVlWn7OKtxw21T0nS7EY91CNJGjODX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JK9KmzWvGXcKSZfBLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINf0rK27bDDx13CsmPwS1JjDH5JaozBL0mNMfglqTGrxrHTJNcANwG3ArdU1dpx1CFJLRpL8HceW1XfH+P+JalJDvVIUmPGFfwFfDbJxUnWzbRAknVJtiTZMjk5OeLyJGnlGlfwP7KqjgWeCJyW5NE7L1BVZ1fV2qpau3r16tFXKEkr1FiCv6qu6/69EfgY8NBx1CFJLRp58Ce5e5K9d0wDvwNcPuo6JKlV4zir5wDgY0l27P+9VfWZMdQhSU0aefBX1beAo0e9X0nSFE/nlKTGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS7PYtHnNuEuQFpXBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+LSnbDjt83CVoAY7ceOS4S1iwDRs2LHgbc732Y9TXihj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/Fp0E+s/xVmnbubA87cCt18Qc/rJJ9zWBlMX+1y7/kIm1n9q1m3uuMDl2vUXwoZ9etey88U42w47fMaLZabXxYZ9dlnrEPpcvDPUvofQt9ZNm9cs2gVf0z/n6T9Pu9v+bvc9h5+x5cjgl6TGGPyS1BiDX5IaY/BLUmPGEvxJjk/yjSTfTLJ+HDVIUqtGHvxJ9gTOAp4IHAGckuSIUdchSa0axzf+hwLfrKpvVdXNwPuBk8ZQhyQ1KVU12h0mTweOr6oXdvPPAR5WVS/dabl1wLpu9oHAN+awm/2B7y9CuUvFSusPrLw+rbT+wMrr00rrD8zep/tW1eqdG1cNV8/CVNXZwNnzWTfJlqpau8gljc1K6w+svD6ttP7AyuvTSusPzL9P4xjquQ44ZNr8fbo2SdIIjCP4vwocmuR+SX4FeCZw7hjqkKQmjXyop6puSfJS4F+BPYFzquqKRd7NvIaIlrCV1h9YeX1aaf2BldenldYfmO9w+KgP7kqSxssrdyWpMQa/JDVmWQf/bLd+SPKrST7Qvf+VJBOjr7K/Hv15dJJLktzSXQ+x5PXo0yuSXJnk0iSbktx3HHX21aM/pya5LMnWJF9a6lel9719SpKnJakkS/50yB6f0fOSTHaf0dYkLxxHnX31+YySPKP7/+iKJO+ddaNVtSxfTB0Yvhq4P/ArwNeBI3Za5iXAP3bTzwQ+MO66F9ifCeAo4F3A08dd8yL16bHAXt30H62Az+jXpk2fCHxm3HUvpD/dcnsDFwBfBtaOu+5F+IyeB/zDuGtdxP4cCnwN2K+bv/ds213O3/j73PrhJGBjN/1h4HFJMsIa52LW/lTVNVV1KfDLcRQ4D336dH5V/bSb/TJT13UsVX36s33a7N2BpXz2RN/bp/wl8NfA/42yuHlaabeE6dOfFwFnVdUPAarqxtk2upyD/2Dgv6fNX9u1zbhMVd0C/C9wr5FUN3d9+rPczLVPLwA+PWhFC9OrP0lOS3I18DfAn4yotvmYtT9JjgUOqarZn4+5NPT9mXtaN7z44SSHzPD+UtGnPw8AHpDk35J8Ocnxs210OQe/VpAkzwbWAm8cdy0LVVVnVdUa4FXAa8ddz3wl2QM4A3jluGtZZJ8EJqrqKOBz3D4qsFytYmq45zHAKcDbk+y7uxWWc/D3ufXDbcskWQXsA/xgJNXN3Uq8lUWvPiV5PPAa4MSq+vmIapuPuX5G7weeMmhFCzNbf/YGHgR8Ick1wMOBc5f4Ad5ZP6Oq+sG0n7N/Ah4yotrmo8/P3LXAuVX1i6r6NvCfTP0i2LVxH7xYwEGPVcC3gPtx+0GP39xpmdO448HdD4677oX0Z9qy72R5HNzt8xk9mKmDV4eOu95F6s+h06afDGwZd90L6c9Oy3+BpX9wt89ndNC06acCXx533Qvsz/HAxm56f6aGhu612+2Ou2ML/I/ypO6329XAa7q2v2DqmyPAXYEPAd8ELgLuP+6aF9if45j67f4Tpv5yuWLcNS9Cnz4P3ABs7V7njrvmBfbnzcAVXV/O312QLoXXbP3ZadklH/w9P6PXd5/R17vP6LBx17zA/oSpIbkrgcuAZ862TW/ZIEmNWc5j/JKkeTD4JakxBr8kNcbgl6TGGPyS1BiDX5pBkn2TvGTa/Bu7Ox++sbsD5x/OsM5Eksunzb+vuy3An46qbqmPkT96UVom9mXq7q5v6ebXAfesqlv7rJzkQOC4qvqNgeqT5s3gl2b2BmBNkq3AJHAP4OIkrwcOB35cVW9K8hDgnG6dz05b/7PAwd36f1xVF46wdmm3HOqRZrYeuLqqjqmqJwA/66Y/sNNy/8xUsB+9U/uJ09Y39LWkGPzSPHV3QNy3qi7omt49znqkvgx+SWqMwS/N7Cambku8S1X1I+BHSR7ZNT1r8KqkReDBXWkGVfWD7olGl7P7p4I9HzgnSXHHg7vSkuXdOSWpMQ71SFJjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUmP8HcdfH+899eq8AAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "qPK6pLMOgN0H",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "c7cafd48-8059-4df5-f21e-abed52403bb2"
},
"source": [
"print(\"Loading frequency matrix...\")\n",
"t0 = time()\n",
"\n",
"frequency_TFIDF = pd.DataFrame(hasil_tfidf,index=cleaned_text,columns=count_vector.get_feature_names())\n",
"print(\"done in %0.3fs.\" % (time() - t0))\n",
"frequency_TFIDF"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading frequency matrix...\n",
"done in 0.001s.\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>abl</th>\n",
" <th>abov</th>\n",
" <th>access</th>\n",
" <th>add</th>\n",
" <th>addit</th>\n",
" <th>age</th>\n",
" <th>android</th>\n",
" <th>applic</th>\n",
" <th>aspect</th>\n",
" <th>avail</th>\n",
" <th>background</th>\n",
" <th>calendar</th>\n",
" <th>chang</th>\n",
" <th>code</th>\n",
" <th>color</th>\n",
" <th>connect</th>\n",
" <th>contain</th>\n",
" <th>continu</th>\n",
" <th>creat</th>\n",
" <th>data</th>\n",
" <th>date</th>\n",
" <th>day</th>\n",
" <th>degre</th>\n",
" <th>delet</th>\n",
" <th>design</th>\n",
" <th>develop</th>\n",
" <th>developmentso</th>\n",
" <th>devic</th>\n",
" <th>diari</th>\n",
" <th>document</th>\n",
" <th>easi</th>\n",
" <th>edit</th>\n",
" <th>editor</th>\n",
" <th>extend</th>\n",
" <th>facebook</th>\n",
" <th>fast</th>\n",
" <th>flexibilityand</th>\n",
" <th>flexibilitywhich</th>\n",
" <th>form</th>\n",
" <th>good</th>\n",
" <th>...</th>\n",
" <th>password</th>\n",
" <th>per</th>\n",
" <th>perform</th>\n",
" <th>photo</th>\n",
" <th>post</th>\n",
" <th>postcard</th>\n",
" <th>program</th>\n",
" <th>quit</th>\n",
" <th>read</th>\n",
" <th>reread</th>\n",
" <th>respons</th>\n",
" <th>result</th>\n",
" <th>reusabl</th>\n",
" <th>run</th>\n",
" <th>save</th>\n",
" <th>second</th>\n",
" <th>secur</th>\n",
" <th>seven</th>\n",
" <th>share</th>\n",
" <th>social</th>\n",
" <th>sourc</th>\n",
" <th>stop</th>\n",
" <th>structur</th>\n",
" <th>system</th>\n",
" <th>thi</th>\n",
" <th>three</th>\n",
" <th>time</th>\n",
" <th>titl</th>\n",
" <th>toler</th>\n",
" <th>two</th>\n",
" <th>usabl</th>\n",
" <th>use</th>\n",
" <th>user</th>\n",
" <th>valu</th>\n",
" <th>version</th>\n",
" <th>way</th>\n",
" <th>weather</th>\n",
" <th>week</th>\n",
" <th>without</th>\n",
" <th>year</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>user creat new diari</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.508065</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user add titl diari</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.508065</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user add weather diari</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.508065</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user add date diari</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.508065</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user add hour diari</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.492102</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.492102</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.502772</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user add second diari</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.497283</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.508065</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user save diari</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.563473</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.59252</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.575690</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user add photo diari</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497283</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.508065</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user read diari ha creat</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.445266</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.445266</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.445266</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.454920</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user share diari form postcard</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.443320</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.448212</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.443320</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.448212</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.452932</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user save postcard</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.563473</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.59252</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.575690</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user chang postcard background color</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.442426</td>\n",
" <td>0.000000</td>\n",
" <td>0.456570</td>\n",
" <td>0.000000</td>\n",
" <td>0.442426</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.442426</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.452019</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user chang color post postcard</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.455581</td>\n",
" <td>0.000000</td>\n",
" <td>0.441469</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.44634</td>\n",
" <td>0.441469</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.451040</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user delet diari</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.573178</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.573178</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.585605</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user edit diari ha creat</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.444290</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.444290</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.449192</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.453923</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>user open calendar contain diari</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.445266</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.445266</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.445266</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.454920</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>system open diari editor</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.500000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.5</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.500000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>applic ha high level avail oper continu seven day per week hour per day without stop</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.222647</td>\n",
" <td>0.000000</td>\n",
" <td>0.222647</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.222647</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.445295</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.445295</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.222647</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.225104</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.222647</td>\n",
" <td>0.222647</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>applic must high degre flexibilitywhich must abl run devic use kitkat version android oper system abov</th>\n",
" <td>0.233123</td>\n",
" <td>0.233123</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.233123</td>\n",
" <td>0.233123</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.269254</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.233123</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.233123</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.235695</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.233123</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.233123</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.235695</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thi applic must high level integr data secur includ secur form password user</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.256891</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.256891</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.259725</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.259725</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.513781</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.256891</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.262460</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thi applic ha high usabl aspect easi use interfac applic introduc two hoursth new user creat diari less three minut</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.425138</td>\n",
" <td>0.214914</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.212569</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.212569</td>\n",
" <td>0.00000</td>\n",
" <td>0.212569</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.212569</td>\n",
" <td>0.212569</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.212569</td>\n",
" <td>0.212569</td>\n",
" <td>0.212569</td>\n",
" <td>0.217178</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thi applic must respons time valu quit good fast access extend perform result toler three second</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.247427</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.247427</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.278787</td>\n",
" <td>0.000000</td>\n",
" <td>0.247427</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.247427</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.247427</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.255337</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.247427</td>\n",
" <td>0.247427</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.247427</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.247427</td>\n",
" <td>0.247427</td>\n",
" <td>0.247427</td>\n",
" <td>0.000000</td>\n",
" <td>0.247427</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.247427</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>applic must high level interoperabilitybecaus one need thi applic connect social media lineinstagramand facebook</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.497957</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.251726</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.248979</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.251726</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.248979</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>document must includ system sourc code develop reread chang addit sourc code made high level maintain</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.223228</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.227850</td>\n",
" <td>0.441583</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.234235</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.22785</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.22785</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.441583</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.220792</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>applic must high degre flexibilityand use user age seven year abov</th>\n",
" <td>0.000000</td>\n",
" <td>0.296463</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.296463</td>\n",
" <td>0.000000</td>\n",
" <td>0.296463</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.342412</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.296463</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.296463</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.296463</td>\n",
" <td>0.302891</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.296463</td>\n",
" </tr>\n",
" <tr>\n",
" <th>modul system design program structur way continu use system developmentso reusabl level high</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.256412</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.256412</td>\n",
" <td>0.000000</td>\n",
" <td>0.274347</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.256412</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.256412</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.256412</td>\n",
" <td>0.512825</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.256412</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.256412</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>26 rows × 104 columns</p>\n",
"</div>"
],
"text/plain": [
" abl ... year\n",
"user creat new diari 0.000000 ... 0.000000\n",
"user add titl diari 0.000000 ... 0.000000\n",
"user add weather diari 0.000000 ... 0.000000\n",
"user add date diari 0.000000 ... 0.000000\n",
"user add hour diari 0.000000 ... 0.000000\n",
"user add second diari 0.000000 ... 0.000000\n",
"user save diari 0.000000 ... 0.000000\n",
"user add photo diari 0.000000 ... 0.000000\n",
"user read diari ha creat 0.000000 ... 0.000000\n",
"user share diari form postcard 0.000000 ... 0.000000\n",
"user save postcard 0.000000 ... 0.000000\n",
"user chang postcard background color 0.000000 ... 0.000000\n",
"user chang color post postcard 0.000000 ... 0.000000\n",
"user delet diari 0.000000 ... 0.000000\n",
"user edit diari ha creat 0.000000 ... 0.000000\n",
"user open calendar contain diari 0.000000 ... 0.000000\n",
"system open diari editor 0.000000 ... 0.000000\n",
"applic ha high level avail oper continu seven d... 0.000000 ... 0.000000\n",
"applic must high degre flexibilitywhich must ab... 0.233123 ... 0.000000\n",
"thi applic must high level integr data secur in... 0.000000 ... 0.000000\n",
"thi applic ha high usabl aspect easi use interf... 0.000000 ... 0.000000\n",
"thi applic must respons time valu quit good fas... 0.000000 ... 0.000000\n",
"applic must high level interoperabilitybecaus o... 0.000000 ... 0.000000\n",
"document must includ system sourc code develop ... 0.000000 ... 0.000000\n",
"applic must high degre flexibilityand use user ... 0.000000 ... 0.296463\n",
"modul system design program structur way contin... 0.000000 ... 0.000000\n",
"\n",
"[26 rows x 104 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 18
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "CdxnABIQgPfZ"
},
"source": [
"### Visualisation"
]
},
{
"cell_type": "code",
"metadata": {
"id": "6ZLOPMXGePau",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 370
},
"outputId": "e8c4b0ac-5254-43aa-b297-de53f4c52c46"
},
"source": [
"import umap\n",
"\n",
"print(\"Loading Topic Visualisation...\")\n",
"t0 = time()\n",
"\n",
"visual_tfidf = hasil_tfidf\n",
"embedding = umap.UMAP(n_neighbors=3, min_dist=0.1, random_state=4).fit_transform(visual_tfidf)\n",
"plt.figure(figsize=(7,5))\n",
"plt.scatter(embedding[:, 0], embedding[:, 1], c = None, s = 50 ,edgecolors=None)\n",
"plt.xlabel('widht')\n",
"plt.ylabel('height')\n",
"plt.show()\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Topic Visualisation...\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAbYAAAE9CAYAAABnfkdrAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAbC0lEQVR4nO3df3Bd5X3n8c8nUqStZJpgW0lwEkdJl6WbEpslsiGZstMWN3UYJySddIBJWuyQ9WQ2acLupB1+7CYdaClpdttpt91m6QbMpgTSpmHDmNDyI9mSnQJCprZxDAmECmrMDwEbiKVWRs53/7hH5vr6Xuno6p4f99z3a0aje885uuerK+l8dJ7znOdxRAgAgKp4VdEFAADQSQQbAKBSCDYAQKUQbACASiHYAACVQrABACqlv+gC0li9enWMjo4WXQYAoER27dr1XESMNC7vimAbHR3VxMRE0WUAAErE9uPNltMUCQCoFIINAFApmQWb7WttP2t7X92yK23vtb3b9u2212S1fwBAb8ryjG2HpM0Ny74QEesi4jRJOyV9NsP9AwB6UGbBFhF3S3qhYdlLdU+HJTECMwCgo3LvFWn7dyT9mqQXJf183vsHAFRb7p1HIuLyiHizpBskfbLVdra3256wPTE1NZVfgQCATByandNN40/o6tse0k3jT+jQ7Fwm+3GW87HZHpW0MyJObbJuraRvNlvXaGxsLLiPDQC61/2TL2jrdeOKkGYOH9HQQJ9sace2jdowurKt17S9KyLGGpfnesZm++S6p+dKejjP/QMA8ndodk5brxvX9OwRzRw+IqkWbtOzR5LlnT1zy7K7/42S7pF0iu0Dti+SdLXtfbb3SnqPpE9ntX8AQDns3HNQrRoHI6Sdew92dH+ZdR6JiAuaLP5SVvsDABTr0Oycdu45qMnnpzW6alhb1q/RisF+TT4/ffRMrdHM4SOafG6mo3V0xViRAIBya3YN7cpb92vHto0aXTWsoYG+puE2NNCn0dVDHa2FIbUAAMuy2DW0X/jp18lu/rW2tGVdZwehItgAAMuy2DW0b3/vWe3YtlHDg30aGuiTVDtTGx7sS5Z3tvGQpkgAwLKkuYZ23oa1Gr9sk3buPajJ52Y0unpIW9at6XioSQQbAGCZ0l5DGx7s13kb1mZeD02RAIBl2bJ+Ta7X0BZDsAEAlmXFYH+u19AWQ1MkAGBRre5Rm7dhdGVu19AWk+lYkZ3CWJEAUJwsxnnshFKMFQkA6C6dHOcxr9H9aYoEALSUZpzHND0dFxqZpNNnfZyxAQBa6sQ4j5UZ3R8A0P3m71FrJu04j3mP7k+wAQBa6sQ9anmP7k+wAQBaSnuP2kIdQzpx1rcUdPcHACxqenau5T1qi90OcGh2TmdcdaemZ48/axse7NP4ZZvaut+tVXd/gg0A0La0oZXFvXCtgo3u/gCAtqW9HSDPkUkINgBA25bSMYTR/QEApZd3x5A0CDYAQNvKNmWNRLABAJahbFPWSFxjAwAsU5mmrJEINgBAB+TVMSSNzJoibV9r+1nb++qWfcH2w7b32r7Z9muz2j8AoDdleY1th6TNDcvukHRqRKyT9H1Jl2a4fwBAAfKad62VzJoiI+Ju26MNy26ve3qvpA9ltX8AQP7ynHetlSJ7RX5U0m0F7h8A0EF5z7vWSiHBZvtySXOSblhgm+22J2xPTE1N5VccAKAtec+71kruwWZ7q6Qtkj4cC4zAHBHXRMRYRIyNjIzkVh8AoD15z7vWSq7BZnuzpN+U9P6IyOc7BADkoizDa2XZ3f9GSfdIOsX2AdsXSfpjSSdIusP2bttfzGr/AIB8lWV4rSx7RV7QZPGXstofAKBY88NrtZp3La+RSBh5BADQMWUYXotgAwB0VNHDazG6PwCgUgg2AEClEGwAgEoh2AAAlUKwAQAqhWADAFQKwQYAqBSCDQBQKQQbAKBSCDYAQKUQbACASiHYAACVQrABACqFYAMAVArBBgCoFIINAFApBBsAoFIINgBApRBsAIBKIdgAAJVCsAEAKoVgAwBUSmbBZvta28/a3le37Fdsf9f2j22PZbVvAEDvyvKMbYekzQ3L9kn6ZUl3Z7hfAEAP68/qhSPibtujDcsekiTbWe0WANDjuMYGAKiU0gab7e22J2xPTE1NFV0OAKBLlDbYIuKaiBiLiLGRkZGiywEAdInSBhsAAO3Isrv/jZLukXSK7QO2L7L9QdsHJL1L0q22/yar/QMAelOWvSIvaLHq5qz2CQAATZEAgEoh2AAAlUKwAQAqhWADAFQKwQYAqBSCDQBQKQQbAKBSCDYAQKUQbACASiHYAACVQrABACqFYAMAVArBBgCoFIINAFApBBsAoFIINgBApRBsAIBKIdgAAJVCsAEAKoVgAwBUCsEGAKgUgg0AUCkEGwCgUgg2AEClZBZstq+1/aztfXXLVtq+w/YjyecTs9o/AKA3ZXnGtkPS5oZll0i6KyJOlnRX8hwAgI7JLNgi4m5JLzQsPlfS9cnj6yV9IKv9AwB6U97X2F4fEU8lj5+W9Pqc9w8AqLjCOo9EREiKVuttb7c9YXtiamoqx8oAAN0s72B7xvZJkpR8frbVhhFxTUSMRcTYyMhIbgUCALpb3sF2i6QLk8cXSvpGzvsHAFRclt39b5R0j6RTbB+wfZGkqyX9ou1HJG1KngMA0DH9Wb1wRFzQYtXZWe0TAABGHgEAVArBBgCoFIINAFApBBsAoFIINgBApRBsAIBKIdgAAJVCsAEAKoVgAwBUCsEGAKiUVMFm+8tplgEAULS0Z2w/U//Edp+kd3a+HAAAlmfBYLN9qe0fSVpn+6Xk40eqzaPGlDMAgNJZMNgi4ncj4gRJX4iIn0w+ToiIVRFxaU41AgCQWqppayLiUttvlPSW+q+JiLuzKgwAgHakCjbbV0s6X9J+SUeSxSGJYAMAlEraiUY/KOmUiJjNshgAAJYrba/IxyS9OstCAADohAXP2Gz/N9WaHGck7bZ9l6SjZ20R8alsywMAYGkWa4qcSD7vknRLxrUAALBsCwZbRFyfVyEAAHRC2l6RD6rWJFnvRdXO6H47Ip7vdGEAALQjba/I21Tr5v+V5Pn5koYkPS1ph6T3dbwyAADakDbYNkXE6XXPH7T9QEScbvsjWRQGAEA70nb377O9cf6J7Q2S+pKnc0vdqe1P295n+7u2L17q1wMA0EraM7aPSbrW9gpJlvSSpI/ZHpb0u0vZoe1TJf07SRslHZb017Z3RsSjS3kdAACaSTtW5P2S3mH7NcnzF+tW/8US9/mvJd0XETOSZPtvJf2ypN9b4usAAHCcxW7Q/khE/Lnt/9iwXJIUEb/fxj73Sfod26sk/ZOkc/TK/XIAACzLYmdsw8nnEzq1w4h4yPbnJd0uaVrSbr0ysPJRtrdL2i5Ja9eu7dTuAQAV54jG29NyLsC+StKBiPjvrbYZGxuLiQlO6gAAr7C9KyLGGpen6hVp+1/Zvsv2vuT5Otv/aRnFvC75vFa162tfWfgrAABIJ22vyD+T9BuS/ockRcRe21+R9Ntt7vevkmtsL0v6RET8sM3XAVo6NDunnXsOavL5aY2uGtaW9Wu0YjDtrzyAbpX2r3woIsbnO40klnz/2ryIOKvdrwXSuH/yBW29blwR0szhIxoa6NOVt+7Xjm0btWF0ZdHlAchQ2hu0n7P9U0rGi7T9IUlPZVYVsAyHZue09bpxTc8e0czhWr+kmcNHND17JFne9v9kALpA2mD7hGrNkD9t+0lJF0v6eGZVAcuwc89BteoTFSHt3Hsw34IA5CptU+STkq6T9G1JK1UbeeRCSVdkVBfQtsnnp4+eqTWaOXxEk8/N5FwRgDylDbZvSPqhpAck8e8ucrXUTiCjq4Y1NNDXNNyGBvo0unooy3IBFCxtsL0pIjZnWgnQRDudQLasX6Mrb93fdJ0tbVm3JsuSARQs7TW2v7P9jkwrQU84NDunm8af0NW3PaSbxp/QoQU6ciy1E8j8a//xtx7Rr575Fg0NvEpDA7VJKIYG+jQ82Kcd2zZqmC7/QKUtNlbk/MzZ/ZK22X5M0qxqI/xHRKzLvkRUxVLPvtJ0Ajlvw9qWr21Zv/qut8iyRlcPacu6NYQa0AMW+yvfkksVqLz6s69582dhW68b1/hlm44LnbSdQBZ67S/f+3jT1wZQXQs2RUbE4wt95FUkul87XfDnO4E0U98JhO79AOqlvcYGLEs7XfC3rF+jYwe7eUV9JxC69wOoR7AhF2nPvuqtGOxPOnv0LdgJpJ3XBlBdXHhALtrtgr9hdKXGL9uknXsPavK5maadQOjeD6AewYZczJ99Hddz0Vq0C/7wYP/R3o+dfm0A1VP4RKNpMNFodUzPzi149lXW1wZQPq0mGuWvHrla7OyrrK8NoHvQeQQAUCmcsaFwzHQNoJM4eqBQzHQNoNNoikRhmOkaQBYINhSGobAAZIFgQ2EYCgtAFgg2FIahsABkgWBDYdIOcgwAS0GwITOLzZaddpBjAFgKhtRCJprOaJ2M3djYjZ+hsAC0o9WQWoUEm+3/IOljkkLSg5K2RcQ/t9qeYOsuh2bndMZVdx4zo/W84cE+ZrQG0BGtgi33pkjbb5T0KUljEXGqpD5J5+ddB7JDN34ARSrqGlu/pJ+w3S9pSBJHugqhGz+AIuUebBHxpKT/IukJSU9JejEibm/czvZ22xO2J6ampvIuE8tAN34ARSqiKfJESedKequkNZKGbX+kcbuIuCYixiJibGRkJO8ysQx04wdQpCKaIjdJ+oeImIqIlyV9XdK7C6gDGaEbP4AiFXGEeULSmbaHJP2TpLMl0eWxYjaMrtT4ZZvoxg8gd7kfZSLiPttfk/SApDlJfy/pmrzrQPaY0RpAEQr59zkiPifpc0XsG9lhwlAAZcBRBx3BhKEAyoKxIrFsTBgKoEwINiwbI40AKBOCDcvGSCMAyoRgw7Ix0giAMiHYsGyMNAKgTAg2LBsjjQAoE4446AhGGgFQFhx10DGMNAKgDGiKBABUCsEGAKgUgg0AUCkEGwCgUgg2AECl0CsSS8LUNADKjiMSUmNqGgDdgKZIpMLUNAC6BcGGVJiaBkC3INiQClPTAOgWBBtSYWoaAN2CYEMqTE0DoFsQbEiFqWkAdAuORkiNqWkAdAOOSFgSpqYBUHa5N0XaPsX27rqPl2xfnHcdAIBqyv2MLSK+J+k0SbLdJ+lJSTfnXQcAoJqK7jxytqQfRMTjBdcBAKiIooPtfEk3Nlthe7vtCdsTU1NTOZcFAOhWhQWb7QFJ75f0l83WR8Q1ETEWEWMjIyP5FgcA6FpFnrG9V9IDEfFMgTUAACqmyGC7QC2aIQEAaFchwWZ7WNIvSvp6EfsHAFRXITdoR8S0pFVF7BsAUG1F94oEAKCjCDYAQKUQbACASiHYAACVQrABACqFaWt63KHZOe3cc1CTz09rdNWwtqxfoxXMrwagi3EE62H3T76grdeNK0KaOXxEQwN9uvLW/dqxbaM2jK4sujwAaAtNkT3q0Oyctl43runZI5o5fERSLdymZ48ky+cKrhAA2kOw9aidew4qovm6CGnn3oP5FgQAHUKw9ajJ56ePnqk1mjl8RJPPzeRcEQB0BsHWo0ZXDWtooK/puqGBPo2uHsq5IgDoDIKtR21Zv0Z283W2tGXdmnwLAoAOIdh61IrBfu3YtlHDg31Hz9yGBvo0PNiXLKfDLIDuxNGrh20YXanxyzZp596DmnxuRqOrh7Rl3RpCDUBX4wjW44YH+3XehrVFlwEAHUNTJACgUgg2AEClEGwAgErhGlvJMUgxACwNR8gSY5BiAFg6miJLikGKAaA9BFtJMUgxALSHYCspBikGgPYQbCXFIMUA0J5Cgs32a21/zfbDth+y/a4i6iiLQ7Nzumn8CV1920O6afwJHZqdY5BiAGhTUb0i/1DSX0fEh2wPSOrZ04+Fej7u2LbxuHW2GKQYABbgaNVDIasd2q+RtFvS2yLlzsfGxmJiYiLbwgpwaHZOZ1x1p6Znj7+WNjzYp/HLNkkSgxQDQBO2d0XEWOPyIo6Qb5U0Jek62+sl7ZL06YiYLqCWY+R9M3Sano/nbVi7rEGKucEbQK8p4gjXL+l0Sb8eEffZ/kNJl0j6z/Ub2d4uabskrV2b/ejzS7kZulNhkXXPR27wBtCLiug8ckDSgYi4L3n+NdWC7hgRcU1EjEXE2MjISKYFLeVm6PsnX9AZV92pK3bu1xf/9jFdsXO/zrjqTt0/+cKS95tlz0du8AbQq3IPtoh4WtI/2j4lWXS2pP1511Ev7c3QnQ6LLHs+coM3gF5V1H1svy7pBtt7JZ0m6aqC6pCUvkmw02GxYrA/6eHYd/TMbWigT8ODfcvu+cgN3gB6VSG9CCJit6TjerJkZbFrYvNNgs2CoL5JMIuw2DC6UuOXbep4z8e03xMAVE3lu8el6UCxZf0aXXlr89bQ+ibBrMJieLB/WT0fm0n7PQFA1VR6SK2018TSNgl202ggWTZzAkCZVfrolvY+MSldk+B8WHTLaCBZNXMCQJlV+gi32DWx7z9z6JhlaZoEuy0ssmjmBIAyK+fRuEMWuiYmSV++d1KbT33Dkm9WThsWeYz6wcgiAHCs3MeKbEe7Y0UuNBbjvPkxGTt9xtWs08p8k2WnRv3IYx8AUFatxoqsdOeR+Wtig/2tv80sblbOY9QPRhYBgOYqHWxS7ZrYR85s3WyYxc3Ky72Ru9n8bJ3eBwBUVU9cjDn5dSfkerPycm7kTjtwMSOLAEBzlT9jk/K//6zdwY2X0ryY5QDKANDNeiLY8r5ZealBOt/0+IkbdunluR83/brG5sVuulkcAPLUE02RUr73ny3lRu7GpsdWGpsXu+1mcQDIS6W7+xdtenZuwSBNczvCvKGBPn3ufW8/7v65xfYBAFXVqrs/R8AMLXYj90I9Gxu1al5kZBEAOBbBVqCFejbOo3kRAJaGI2WBFhrya6DPevdPrdZ73/EGmhcBYAl6oldkWS3Us/HV/a/Sn3z4dJ23YS2hBgBLQLAViDnTAKDzOHIWrNumwQGAsuPoWQL0bASAzqEpEgBQKQQbAKBSCDYAQKUQbACASiHYAACVQrABACqFYAMAVEpXTFtje0rS4xnvZrWk5zLeR1a6tfZurVui9iJ0a91S99Ze9rrfEhEjjQu7ItjyYHui2bw+3aBba+/WuiVqL0K31i11b+3dWjdNkQCASiHYAACVQrC94pqiC1iGbq29W+uWqL0I3Vq31L21d2XdXGMDAFQKZ2wAgErp2WCz/QXbD9vea/tm269tsd2k7Qdt77Y9kXedDbVstv0924/avqTJ+kHbX03W32d7NP8qj6vpzba/bXu/7e/a/nSTbX7O9ovJe7zb9meLqLWZxX7+rvmj5D3fa/v0IupsqOmUuvdyt+2XbF/csE1p3nPb19p+1va+umUrbd9h+5Hk84ktvvbCZJtHbF+YX9VH99+s9tIfW1rU/Vu2n6z7nTinxdcueBwqhYjoyQ9J75HUnzz+vKTPt9huUtLqEtTbJ+kHkt4maUDSHklvb9jm30v6YvL4fElfLUHdJ0k6PXl8gqTvN6n75yTtLLrWdn7+ks6RdJskSzpT0n1F19zk9+Zp1e73KeV7LunfSjpd0r66Zb8n6ZLk8SXN/j4lrZT0WPL5xOTxiSWovfTHlhZ1/5akz6T4fVrwOFSGj549Y4uI2yNiLnl6r6Q3FVlPChslPRoRj0XEYUk3STq3YZtzJV2fPP6apLNtO8cajxMRT0XEA8njH0l6SNIbi6ypw86V9L+i5l5Jr7V9UtFF1Tlb0g8iIusBDtoWEXdLeqFhcf3v8vWSPtDkS39J0h0R8UJE/D9Jd0janFmhTTSrvRuOLS3e8zTSHIcK17PB1uCjqv3X3UxIut32Ltvbc6yp0Rsl/WPd8wM6PiCObpP8Yb0oaVUu1aWQNI3+G0n3NVn9Ltt7bN9m+2dyLWxhi/380/xcinS+pBtbrCvrey5Jr4+Ip5LHT0t6fZNtyv7eS91xbKn3yaQJ9doWzb/d8J6rv+gCsmT7TklvaLLq8oj4RrLN5ZLmJN3Q4mV+NiKetP06SXfYfjj5bwdLYHuFpL+SdHFEvNSw+gHVmsoOJe36/1vSyXnX2ELX/vxtD0h6v6RLm6wu83t+jIgI213XfbsLjy1/KulK1QL3Skn/VbVg7jqVPmOLiE0RcWqTj/lQ2yppi6QPR9KA3OQ1nkw+PyvpZtVOxYvwpKQ31z1/U7Ks6Ta2+yW9RtLzuVS3ANuvVi3UboiIrzeuj4iXIuJQ8vibkl5te3XOZTaV4uef5udSlPdKeiAinmlcUeb3PPHMfJNu8vnZJtuU9r3vsmPLfD3PRMSRiPixpD9rUU9p3/N6lQ62hdjeLOk3Jb0/ImZabDNs+4T5x6pdFN7XbNsc3C/pZNtvTf4TP1/SLQ3b3CJpvmfYhyR9q9UfVV6Sa3xfkvRQRPx+i23eMH8t0PZG1X4vyxDIaX7+t0j6taR35JmSXqxrQivaBWrRDFnW97xO/e/yhZK+0WSbv5H0HtsnJs1m70mWFaoLjy3zNdVfG/6gmteT5jhUvKJ7rxT1IelR1dqKdycf870J10j6ZvL4bar1+tkj6buqNWEWWfM5qvUq/MF8LZKuUO0PSJL+haS/TL63cUlvK8H7/LOqNW3srXuvz5H0cUkfT7b5ZPL+7lHtYvu7i657oZ9/Q+2W9CfJz+RBSWNF153UNaxaUL2mblkp33PVwvcpSS+rds3mItWuDd8l6RFJd0pamWw7Jul/1n3tR5Pf90clbStJ7aU/trSo+8vJ7/Be1cLqpMa6k+fHHYfK9sHIIwCASunZpkgAQDURbACASiHYAACVQrABACqFYAMAVArBBpSU7W82Gxk+GYX9M8nj/2N7rMk2p7UanR2oOoINKKmIOCciftjml5+m2v1GQM8h2ICC2P4N259KHv+B7W8lj3/B9g3JfF2rk2WX2/6+7f8r6ZSGl/oV2+PJ+rOSESGukHReMq/WeXl+X0DRCDagON+RdFbyeEzSimRczbMkHR0M1/Y7VRu6aP4sbEPD6/RHxEZJF0v6XNSmE/msavPxnRYRX8322wDKhWADirNL0jtt/6SkWUn3qBZwZ6kWevPOknRzRMxEbWaExrH55geW3iVpNNOKgS5Q6WlrgDKLiJdt/4OkrZL+TrUx+n5e0r9UbULWtGaTz0fE3zTAGRtQsO9I+oxqTY/fUW2g4r+PYwdxvVvSB2z/RDIi/PtSvO6PJJ3Q6WKBbkCwAcX6jqSTJN0TtXnT/lnHNkMqIh6Q9FXVRoK/TbWpQxbzbUlvp/MIehGj+wMAKoUzNgBApRBsAIBKIdgAAJVCsAEAKoVgAwBUCsEGAKgUgg0AUCkEGwCgUv4/wmFCkhAvLE0AAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 504x360 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
},
{
"output_type": "stream",
"text": [
"done in 1.022s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "kreXI6-1eYAK"
},
"source": [
"# Cosine SImilarity"
]
},
{
"cell_type": "code",
"metadata": {
"id": "QozqHPJHeW6U",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 265
},
"outputId": "8dbf69c9-97df-4334-ef7f-ea05c8723e35"
},
"source": [
"import numpy as np\n",
"from sklearn.metrics import pairwise_distances\n",
"from sklearn.metrics.pairwise import pairwise_kernels\n",
"X = np.array(visual_tfidf[0:])\n",
"Y = np.array(visual_tfidf)\n",
"print(\"X\",X)\n",
"print(\"y\",Y)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"X [[0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0.29646275 0. ... 0. 0. 0.29646275]\n",
" [0. 0. 0. ... 0. 0. 0. ]]\n",
"y [[0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0.29646275 0. ... 0. 0. 0.29646275]\n",
" [0. 0. 0. ... 0. 0. 0. ]]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "lHg2zERJebCx",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "729a6e6f-dcdf-4f79-f3de-df59326dfdf1"
},
"source": [
"cosine_similaritas = pairwise_kernels(X, Y, metric='linear')\n",
"cosine_similaritas"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[1. , 0.50541977, 0.50541977, 0.50541977, 0.50015444,\n",
" 0.50541977, 0.57269296, 0.50541977, 0.67397514, 0.45057401,\n",
" 0.29248764, 0.22965476, 0.22915762, 0.58255698, 0.67249742,\n",
" 0.45255199, 0.24864137, 0. , 0. , 0.13334684,\n",
" 0.42746079, 0. , 0. , 0. , 0.15388796,\n",
" 0. ],\n",
" [0.50541977, 1. , 0.75270988, 0.75270988, 0.74486836,\n",
" 0.75270988, 0.57269296, 0.75270988, 0.45255199, 0.45057401,\n",
" 0.29248764, 0.22965476, 0.22915762, 0.58255698, 0.45155975,\n",
" 0.45255199, 0.24864137, 0. , 0. , 0.13334684,\n",
" 0.21604713, 0. , 0. , 0. , 0.15388796,\n",
" 0. ],\n",
" [0.50541977, 0.75270988, 1. , 0.75270988, 0.74486836,\n",
" 0.75270988, 0.57269296, 0.75270988, 0.45255199, 0.45057401,\n",
" 0.29248764, 0.22965476, 0.22915762, 0.58255698, 0.45155975,\n",
" 0.45255199, 0.24864137, 0. , 0. , 0.13334684,\n",
" 0.21604713, 0. , 0. , 0. , 0.15388796,\n",
" 0. ],\n",
" [0.50541977, 0.75270988, 0.75270988, 1. , 0.74486836,\n",
" 0.75270988, 0.57269296, 0.75270988, 0.45255199, 0.45057401,\n",
" 0.29248764, 0.22965476, 0.22915762, 0.58255698, 0.45155975,\n",
" 0.45255199, 0.24864137, 0. , 0. , 0.13334684,\n",
" 0.21604713, 0. , 0. , 0. , 0.15388796,\n",
" 0. ],\n",
" [0.50015444, 0.74486836, 0.74486836, 0.74486836, 1. ,\n",
" 0.74486836, 0.5667268 , 0.74486836, 0.44783743, 0.44588006,\n",
" 0.28944058, 0.22726228, 0.22677031, 0.57648806, 0.44685553,\n",
" 0.44783743, 0.24605109, 0.11894297, 0. , 0.13195767,\n",
" 0.21379641, 0. , 0. , 0. , 0.1522848 ,\n",
" 0. ],\n",
" [0.50541977, 0.75270988, 0.75270988, 0.75270988, 0.74486836,\n",
" 1. , 0.57269296, 0.75270988, 0.45255199, 0.45057401,\n",
" 0.29248764, 0.22965476, 0.22915762, 0.58255698, 0.45155975,\n",
" 0.45255199, 0.24864137, 0. , 0. , 0.13334684,\n",
" 0.21604713, 0.12304112, 0. , 0. , 0.15388796,\n",
" 0. ],\n",
" [0.57269296, 0.57269296, 0.57269296, 0.57269296, 0.5667268 ,\n",
" 0.57269296, 1. , 0.57269296, 0.5127883 , 0.51054704,\n",
" 0.68249834, 0.26022264, 0.25965932, 0.66009742, 0.51166399,\n",
" 0.5127883 , 0.28173643, 0. , 0. , 0.15109579,\n",
" 0.24480379, 0. , 0. , 0. , 0.174371 ,\n",
" 0. ],\n",
" [0.50541977, 0.75270988, 0.75270988, 0.75270988, 0.74486836,\n",
" 0.75270988, 0.57269296, 1. , 0.45255199, 0.45057401,\n",
" 0.29248764, 0.22965476, 0.22915762, 0.58255698, 0.45155975,\n",
" 0.45255199, 0.24864137, 0. , 0. , 0.13334684,\n",
" 0.21604713, 0. , 0. , 0. , 0.15388796,\n",
" 0. ],\n",
" [0.67397514, 0.45255199, 0.45255199, 0.45255199, 0.44783743,\n",
" 0.45255199, 0.5127883 , 0.45255199, 1. , 0.4034432 ,\n",
" 0.26189293, 0.20563248, 0.20518734, 0.52162053, 0.79998025,\n",
" 0.40521428, 0.22263305, 0.09913733, 0. , 0.11939853,\n",
" 0.38274766, 0. , 0. , 0. , 0.13779102,\n",
" 0. ],\n",
" [0.45057401, 0.45057401, 0.45057401, 0.45057401, 0.44588006,\n",
" 0.45057401, 0.51054704, 0.45057401, 0.4034432 , 1. ,\n",
" 0.51054704, 0.40087014, 0.40000235, 0.51934066, 0.40255863,\n",
" 0.4034432 , 0.22165999, 0. , 0. , 0.23528858,\n",
" 0.19260272, 0. , 0. , 0. , 0.13718877,\n",
" 0. ],\n",
" [0.29248764, 0.29248764, 0.29248764, 0.29248764, 0.28944058,\n",
" 0.29248764, 0.68249834, 0.29248764, 0.26189293, 0.51054704,\n",
" 1. , 0.50951786, 0.50841487, 0.33712713, 0.26131872,\n",
" 0.26189293, 0. , 0. , 0. , 0.15109579,\n",
" 0.125027 , 0. , 0. , 0. , 0.174371 ,\n",
" 0. ],\n",
" [0.22965476, 0.22965476, 0.22965476, 0.22965476, 0.22726228,\n",
" 0.22965476, 0.26022264, 0.22965476, 0.20563248, 0.40087014,\n",
" 0.50951786, 1. , 0.80251794, 0.2647047 , 0.20518162,\n",
" 0.20563248, 0. , 0. , 0. , 0.11863704,\n",
" 0.09816841, 0. , 0. , 0.10402935, 0.13691222,\n",
" 0. ],\n",
" [0.22915762, 0.22915762, 0.22915762, 0.22915762, 0.22677031,\n",
" 0.22915762, 0.25965932, 0.22915762, 0.20518734, 0.40000235,\n",
" 0.50841487, 0.80251794, 1. , 0.26413167, 0.20473745,\n",
" 0.20518734, 0. , 0. , 0. , 0.11838022,\n",
" 0.0979559 , 0. , 0. , 0.10380416, 0.13661584,\n",
" 0. ],\n",
" [0.58255698, 0.58255698, 0.58255698, 0.58255698, 0.57648806,\n",
" 0.58255698, 0.66009742, 0.58255698, 0.52162053, 0.51934066,\n",
" 0.33712713, 0.2647047 , 0.26413167, 1. , 0.52047685,\n",
" 0.52162053, 0.28658904, 0. , 0. , 0.15369825,\n",
" 0.24902027, 0. , 0. , 0. , 0.17737436,\n",
" 0. ],\n",
" [0.67249742, 0.45155975, 0.45155975, 0.45155975, 0.44685553,\n",
" 0.45155975, 0.51166399, 0.45155975, 0.79998025, 0.40255863,\n",
" 0.26131872, 0.20518162, 0.20473745, 0.52047685, 1. ,\n",
" 0.40432583, 0.22214492, 0.09891997, 0. , 0.11913675,\n",
" 0.38190847, 0. , 0. , 0. , 0.1374889 ,\n",
" 0. ],\n",
" [0.45255199, 0.45255199, 0.45255199, 0.45255199, 0.44783743,\n",
" 0.45255199, 0.5127883 , 0.45255199, 0.40521428, 0.4034432 ,\n",
" 0.26189293, 0.20563248, 0.20518734, 0.52162053, 0.40432583,\n",
" 1. , 0.44526611, 0. , 0. , 0.11939853,\n",
" 0.19344823, 0. , 0. , 0. , 0.13779102,\n",
" 0. ],\n",
" [0.24864137, 0.24864137, 0.24864137, 0.24864137, 0.24605109,\n",
" 0.24864137, 0.28173643, 0.24864137, 0.22263305, 0.22165999,\n",
" 0. , 0. , 0. , 0.28658904, 0.22214492,\n",
" 0.44526611, 1. , 0. , 0.11656128, 0. ,\n",
" 0.10628444, 0. , 0. , 0.11039584, 0. ,\n",
" 0.25641247],\n",
" [0. , 0. , 0. , 0. , 0.11894297,\n",
" 0. , 0. , 0. , 0.09913733, 0. ,\n",
" 0. , 0. , 0. , 0. , 0.09891997,\n",
" 0. , 0. , 1. , 0.15571237, 0.17530336,\n",
" 0.1893116 , 0.05508894, 0.22533871, 0.1015106 , 0.19801995,\n",
" 0.17497708],\n",
" [0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0.11656128, 0.15571237, 1. , 0.23954797,\n",
" 0.19821839, 0.17304236, 0.29021277, 0.20588608, 0.50686884,\n",
" 0.23910212],\n",
" [0.13334684, 0.13334684, 0.13334684, 0.13334684, 0.13195767,\n",
" 0.13334684, 0.15109579, 0.13334684, 0.11939853, 0.23528858,\n",
" 0.15109579, 0.11863704, 0.11838022, 0.15369825, 0.11913675,\n",
" 0.11939853, 0. , 0.17530336, 0.23954797, 1. ,\n",
" 0.27542833, 0.19068493, 0.38791647, 0.23056157, 0.30797226,\n",
" 0.13601865],\n",
" [0.42746079, 0.21604713, 0.21604713, 0.21604713, 0.21379641,\n",
" 0.21604713, 0.24480379, 0.21604713, 0.38274766, 0.19260272,\n",
" 0.125027 , 0.09816841, 0.0979559 , 0.24902027, 0.38190847,\n",
" 0.19344823, 0.10628444, 0.1893116 , 0.19821839, 0.27542833,\n",
" 1. , 0.21038101, 0.31755075, 0.04693344, 0.31785607,\n",
" 0.10901062],\n",
" [0. , 0. , 0. , 0. , 0. ,\n",
" 0.12304112, 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0.05508894, 0.17304236, 0.19068493,\n",
" 0.21038101, 1. , 0.24641611, 0.0546298 , 0.14670571,\n",
" 0. ],\n",
" [0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0.22533871, 0.29021277, 0.38791647,\n",
" 0.31755075, 0.24641611, 1. , 0.16848816, 0.29525166,\n",
" 0.13182946],\n",
" [0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0.10402935, 0.10380416, 0. , 0. ,\n",
" 0. , 0.11039584, 0.1015106 , 0.20588608, 0.23056157,\n",
" 0.04693344, 0.0546298 , 0.16848816, 1. , 0.13091301,\n",
" 0.23013244],\n",
" [0.15388796, 0.15388796, 0.15388796, 0.15388796, 0.1522848 ,\n",
" 0.15388796, 0.174371 , 0.15388796, 0.13779102, 0.13718877,\n",
" 0.174371 , 0.13691222, 0.13661584, 0.17737436, 0.1374889 ,\n",
" 0.13779102, 0. , 0.19801995, 0.50686884, 0.30797226,\n",
" 0.31785607, 0.14670571, 0.29525166, 0.13091301, 1. ,\n",
" 0.15203349],\n",
" [0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0.25641247, 0.17497708, 0.23910212, 0.13601865,\n",
" 0.10901062, 0. , 0.13182946, 0.23013244, 0.15203349,\n",
" 1. ]])"
]
},
"metadata": {
"tags": []
},
"execution_count": 21
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "RwjUVrb3_u2Y",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "15d5923d-26c9-4110-a75f-a40edf018e68"
},
"source": [
"plt.hist(cosine_similaritas)\n",
"plt.xlabel('cosine')\n",
"plt.ylabel('Frequency')\n",
"plt.show"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<function matplotlib.pyplot.show>"
]
},
"metadata": {
"tags": []
},
"execution_count": 22
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEGCAYAAAB/+QKOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAXfklEQVR4nO3de5RlZX3m8e8jYLwhXroE5GIbhnBRBLEEHC/BG2LLgDFGYHlBR20xmNGJK5M2cUktXcnKjAudUZlgqz2go0SNYpgBlU43CkZRCmy1BQ1IULshdikGvI3Y+Js/zi44FLu6TlfXPqe76vtZ66zal3ef/Xurm37Yt3enqpAkaab7jboASdLOyYCQJLUyICRJrQwISVIrA0KS1Gr3URewkJYtW1bLly8fdRmStMu45pprflxVY23rFlVALF++nMnJyVGXIUm7jCTfn22dp5gkSa0MCElSKwNCktTKgJAktTIgJEmtDAhJUqvOAiLJAUkuT3Jdkm8neWOz/BFJ1ia5ofn58Fm2P6Npc0OSM7qqU5LUrssjiK3Am6vqcOA44KwkhwOrgHVVdTCwrpm/lySPAM4GjgWOAc6eLUgkSd3oLCCq6taquraZ/hlwPbAfcApwQdPsAuCFLZs/D1hbVbdV1U+BtcCJXdUqSbqvoVyDSLIceCLwVWDvqrq1WfWvwN4tm+wH/LBvflOzrO27VyaZTDI5NTW1YDUvFueeuX7UJUjaRXUeEEkeAnwKeFNV3dG/rnqvs9uhV9pV1eqqGq+q8bGx1uFEJEnz0GlAJNmDXjh8tKo+3Sz+UZJ9m/X7AltaNt0MHNA3v3+zTJI0JF3exRTgQ8D1VfWuvlUXA9N3JZ0B/EPL5p8HTkjy8Obi9AnNMknSkHR5BPFU4OXAs5JsaD4rgL8BnpvkBuA5zTxJxpN8EKCqbgPeAVzdfN7eLJMkDUlnw31X1ZeAzLL62S3tJ4HX9M2vAdZ0U50kaS4+SS1JamVASJJaGRCSpFYGhCSplQHRlYm9Rl2BJO0QA0KS1MqAkCS1MiAkSa0MCElSKwNCktTKgJAktTIgJEmtDAhJUisDQpLUyoBYRPa5fMOoS5C0iBgQkqRWnb0wKMka4CRgS1U9vln2ceCQpsnDgH+rqqNatr0Z+BlwF7C1qsa7qlOS1K6zgADOB94HfHh6QVWdOj2d5Bzg9m1s/8yq+nFn1UmStqnLV45ekWR527okAV4CPKur/UuSdsyorkE8HfhRVd0wy/oCLktyTZKVQ6xLktTo8hTTtpwOXLiN9U+rqs1JHgWsTfKdqrqirWETICsBDjzwwIWvVJKWqKEfQSTZHXgR8PHZ2lTV5ubnFuAi4JhttF1dVeNVNT42NrbQ5UrSkjWKU0zPAb5TVZvaViZ5cJI9p6eBE4CNQ6xPkkSHAZHkQuArwCFJNiV5dbPqNGacXkry6CSXNrN7A19K8g3ga8AlVfW5ruqUJLXr8i6m02dZ/sqWZbcAK5rpm4Aju6pLkjQYn6ReIBMTE/dZdsQFR9w9fc6pJw2xGknacQaEJKmVASFJamVASJJaGRCSpFYGhCSplQEhSWplQEiSWhkQkqRWBoQkqZUBsYDWrT/oPsuuP/SwEVQiSTvOgJAktTIgJEmtDAhJUisDQpLUyoCQJLUyICRJrbp85eiaJFuSbOxbNpFkc5INzWfFLNuemOS7SW5MsqqrGiVJs+vyCOJ84MSW5e+uqqOaz6UzVybZDTgXeD5wOHB6ksM7rFOS1KKzgKiqK4Db5rHpMcCNVXVTVd0J/B1wyoIWJ0ma0yiuQbwhyTebU1APb1m/H/DDvvlNzbJWSVYmmUwyOTU1tdC1StKSNeyA+FvgIOAo4FbgnB39wqpaXVXjVTU+Nja2o18nSWoMNSCq6kdVdVdV/Rb4AL3TSTNtBg7om9+/WSZJGqKhBkSSfftm/wDY2NLsauDgJI9Ncn/gNODiYdQnSbrH7l19cZILgeOBZUk2AWcDxyc5CijgZuB1TdtHAx+sqhVVtTXJG4DPA7sBa6rq213VKUlq11lAVNXpLYs/NEvbW4AVffOXAve5BVaSNDw+SS1JamVASJJaGRCSpFYGhCSplQEhSWplQEiSWhkQkqRWBoQkqZUBIUlqZUBIkloZEJKkVgaEJKmVASFJamVASJJaGRCSpFYGhOZt06orR12CpA4ZEJKkVp0FRJI1SbYk2di37J1JvpPkm0kuSvKwWba9Ocm3kmxIMtlVjZKk2Q0UEEmOmMd3nw+cOGPZWuDxVfUE4J+Bt2xj+2dW1VFVNT6PfUuSdtCgRxD/M8nXkvxxkr0G2aCqrgBum7Hssqra2sxeBew/eKmSpGEaKCCq6unAS4EDgGuSfCzJc3dw3/8R+OxsuwQuS3JNkpXb+pIkK5NMJpmcmprawZIkSdMGvgZRVTcAbwX+HPh94D3N9YQXbe9Ok/wlsBX46CxNnlZVRwPPB85K8oxt1LW6qsaranxsbGx7S5EkzWLQaxBPSPJu4HrgWcB/qKrDmul3b88Ok7wSOAl4aVVVW5uq2tz83AJcBByzPfuQJO24QY8g3gtcCxxZVWdV1bUAVXULvaOKgSQ5EfgvwMlV9ctZ2jw4yZ7T08AJwMa2tpKk7uw+YLsXAL+qqrsAktwPeEBV/bKqPtK2QZILgeOBZUk2AWfTu2vpd4C1SQCuqqozkzwa+GBVrQD2Bi5q1u8OfKyqPjffDkqS5mfQgPhH4DnAz5v5BwGXAf9+tg2q6vSWxR+ape0twIpm+ibgyAHrkiR1ZNBTTA+oqulwoJl+UDclaac0sRdHXDCfx2F2XftcvmHUJUgjNWhA/CLJ0dMzSZ4E/KqbkiRJO4NBTzG9CfhkkluAAPsAp3ZWlSRp5AYKiKq6OsmhwCHNou9W1W+6K0uSNGqDHkEAPBlY3mxzdBKq6sOdVCVJGrmBAiLJR4CDgA3AXc3iAgwISVqkBj2CGAcOn+3JZ0nS4jPoXUwb6V2YliQtEYMeQSwDrkvyNeDX0wur6uROqpIkjdygATHRZRGSpJ3PoO+D+CJwM7BHM301vcH7pCXlnFNP6uy7N626srPvluZj0OG+Xwv8PfD+ZtF+wGe6KkqSNHqDXqQ+C3gqcAfc/fKgR3VVlCRp9AYNiF9X1Z3TM0l2p/cchCRpkRo0IL6Y5C+ABzbvov4k8H+6K0uSNGqDBsQqYAr4FvA64FK2401ykqRdz6CD9f0W+EDzkSQtAYPexfQvSW6a+RlguzVJtiTZ2LfsEUnWJrmh+fnwWbY9o2lzQ5IzBu+SJGkhDHqKaZzeaK5PBp4OvAf43wNsdz5w4oxlq4B1VXUwsK6Zv5ckj6D3DutjgWOAs2cLEklSNwZ9UO4nfZ/NVfXfgRcMsN0VwG0zFp8CXNBMXwC8sGXT5wFrq+q2qvopsJb7Bo0kqUODnmI6uu8znuRMtu9dEv32rqpbm+l/BfZuabMf8MO++U3NsrbaViaZTDI5NTU1z5K0LctXXbKg7STtGgb9R/6cvumt9IbdeMmO7ryqKskOPU9RVauB1QDj4+M+myFJC2TQu5ieuYD7/FGSfavq1iT7Alta2mwGju+b3x/4wgLWIEmaw6BvlPvTba2vqndtxz4vBs4A/qb5+Q8tbT4P/HXfhekTgLdsxz4kSTtoe+5iej296wD7AWcCRwN7Np9WSS4EvgIckmRTklfTC4bnJrkBeE4zT3Nt44MAVXUb8A56o8ZeDby9WSZJGpJBr0HsDxxdVT8DSDIBXFJVL9vWRlV1+iyrnt3SdhJ4Td/8GmDNgPVJkhbYoEcQewN39s3fSfvdR5KkRWLQI4gPA19LclEz/0LueZZBkrQIDXoX018l+Sy9p6gBXlVVX++uLEnSqA16igngQcAdVfU/gE1JHttRTZKkncCgT1KfDfw599xqugeDjcUkSdpFDXoE8QfAycAvAKrqFrZxe6skadc3aEDcWVVF85rRJA/uriRJ0s5g0ID4RJL3Aw9L8lrgH/HlQZK0qM15F1OSAB8HDgXuAA4B3lZVazuuTZI0QnMGRDPi6qVVdQS99zJIkpaAQU8xXZvkyZ1WIknaqQz6JPWxwMuS3EzvTqbQO7h4QleFSZJGa5sBkeTAqvoBvVeASpKWkLmOID5DbxTX7yf5VFX94TCKkiSN3lzXINI3/btdFiJJ2rnMFRA1y7T6bFp15ahL0AJbt/6g+yw798z1C/Ldy1ddsiDfI3VtrlNMRya5g96RxAObabjnIvVDO61OkjQy2zyCqKrdquqhVbVnVe3eTE/PzysckhySZEPf544kb5rR5vgkt/e1edt89iVJmr9Bb3NdMFX1XeAogCS7AZuBi1qaXllVJw2zNknSPbbnfRBdeDbwvar6/ojrkCTNMOqAOA24cJZ1T0nyjSSfTfK42b4gycokk0kmp6amuqlSkpagkQVEkvvTe8fEJ1tWXws8pqqOBN5L73mMVlW1uqrGq2p8bGysm2IlaQka5RHE84Frq+pHM1dU1R1V9fNm+lJgjyTLhl2gJC1lowyI05nl9FKSfZphxklyDL06fzLE2iRpyRv6XUxw9xvpngu8rm/ZmQBVdR7wYuD1SbYCvwJOa95oJ0kakpEcQVTVL6rqkVV1e9+y85pwoKreV1WPq6ojq+q4qvryKOqcafmqS2BiL4644IhRl3IvExMTd0/7lK6khTLqu5gkSTspA0KS1MqAkCS1MiAkSa0MCElSKwNCktTKgJAktTIgJEmtDAhJUisDQpLUyoBo9A+jcf2hh929fGJignXrD2Kfyzfcq/31hx7GuWeu55xTd96X3vX3Q5K2lwEhSWplQEiSWhkQkqRWBoQkqZUBIUlqZUBIklqNLCCS3JzkW0k2JJlsWZ8k70lyY5JvJjl6FHVK0lI1kndS93lmVf14lnXPBw5uPscCf9v8lCQNwc58iukU4MPVcxXwsCT7jrooSVoqRhkQBVyW5JokK1vW7wf8sG9+U7PsXpKsTDKZZHJqamrBitvZn5LeHnP2o+8J8v5+T0xMdF+clhb/ru1SRhkQT6uqo+mdSjoryTPm8yVVtbqqxqtqfGxsbGErlKQlbGQBUVWbm59bgIuAY2Y02Qwc0De/f7NMkjQEIwmIJA9Osuf0NHACsHFGs4uBVzR3Mx0H3F5Vtw65VElaskZ1F9PewEVJpmv4WFV9LsmZAFV1HnApsAK4Efgl8KoR1SpJS9JIAqKqbgKObFl+Xt90AWcNsy5J0j125ttcJUkjZEBIkloZEJKkVgaEJKmVAbFIrFt/0D0zE3sNvN3yVZe0Lt+06sr7fi/3vIt75r7739l9xAVHDLz/hTJbP7bJp3qlbTIgJEmtDAhJUisDQpLUyoCQJLUyICRJrQwISVIrA0KS1MqAkCS1MiAkSa0MCElSKwOiA/Ma9qFP/7AP9/rOZmiIaeecetLdQ2IM28z6Zrr+0MMGajcqO/pntMPmGOZjn8s3jL5GLXkGhCSp1dADIskBSS5Pcl2Sbyd5Y0ub45PcnmRD83nbsOuUpKVuFK8c3Qq8uaquTbIncE2StVV13Yx2V1bVznl+QpKWgKEfQVTVrVV1bTP9M+B6YL9h1yFJ2raRXoNIshx4IvDVltVPSfKNJJ9N8rhtfMfKJJNJJqempjqqVJKWnpEFRJKHAJ8C3lRVd8xYfS3wmKo6Engv8JnZvqeqVlfVeFWNj42NdVewJC0xIwmIJHvQC4ePVtWnZ66vqjuq6ufN9KXAHkmWDblMSVrSRnEXU4APAddX1btmabNP044kx9Cr8yfDq1KSNIq7mJ4KvBz4VpLpFxn/BXAgQFWdB7wYeH2SrcCvgNOqqkZQqyQtWaO4i+lLVZWqekJVHdV8Lq2q85pwoKreV1WPq6ojq+q4qvrysOscpv6npKefQAbYtOpK1q0/iH0u33Cv9tNP325L/xPXQzWx19B32f/7me73XL+fndm69QfdZ1nb0/XzMdvT2aN6Il87N5+kliS1MiAkSa0MCElSKwNCktTKgJAktTIgJEmtDAhJUisDQpLUyoCQJLUyIBbYzKeeYdd+qncu0+9QbrOj/Z75RPEgT5B3YeZT6TOfOu703dEz3kM+re1pa1i433l/n/qf7u9SV33abrP8zudr5kgJ00/Fb1p1JRMTE3ePljBXu2216YoBIUlqZUBIkloZEJKkVgaEJKmVASFJamVASJJaGRCSpFYjCYgkJyb5bpIbk6xqWf87ST7erP9qkuXDr1KSlrahB0SS3YBzgecDhwOnJzl8RrNXAz+tqn8HvBv4r8OtUpI0iiOIY4Abq+qmqroT+DvglBltTgEuaKb/Hnh2kgyxRkla8lJVw91h8mLgxKp6TTP/cuDYqnpDX5uNTZtNzfz3mjY/bvm+lcDKZvYQ4LvbUc4y4D7fuQTY76XFfi8t29vvx1TVWNuK3RemntGpqtXA6vlsm2SyqsYXuKSdnv1eWuz30rKQ/R7FKabNwAF98/s3y1rbJNkd2Av4yVCqkyQBowmIq4GDkzw2yf2B04CLZ7S5GDijmX4xsL6GfS5Mkpa4oZ9iqqqtSd4AfB7YDVhTVd9O8nZgsqouBj4EfCTJjcBt9EKkC/M6NbUI2O+lxX4vLQvW76FfpJYk7Rp8klqS1MqAkCS1WvQBsVSH9Rig33+a5Lok30yyLsljRlFnF+bqe1+7P0xSSRbFrZCD9DvJS5o/928n+diwa+zCAH/XD0xyeZKvN3/fV4yizoWUZE2SLc0zY23rk+Q9ze/km0mOnteOqmrRfuhdBP8e8LvA/YFvAIfPaPPHwHnN9GnAx0dd95D6/UzgQc306xdDvwfte9NuT+AK4CpgfNR1D+nP/GDg68DDm/lHjbruIfV7NfD6Zvpw4OZR170A/X4GcDSwcZb1K4DPAgGOA746n/0s9iOIpTqsx5z9rqrLq+qXzexV9J5HWQwG+TMHeAe9Mb7+3zCL69Ag/X4tcG5V/RSgqrYMucYuDNLvAh7aTO8F3DLE+jpRVVfQu8NzNqcAH66eq4CHJdl3e/ez2ANiP+CHffObmmWtbapqK3A78MihVNedQfrd79X0/m9jMZiz783h9gFVdckwC+vYIH/mvwf8XpJ/SnJVkhOHVl13Bun3BPCyJJuAS4E/GU5pI7W9/wa02uWH2tCOSfIyYBz4/VHXMgxJ7ge8C3jliEsZhd3pnWY6nt4R4xVJjqiqfxtpVd07HTi/qs5J8hR6z1g9vqp+O+rCdnaL/QhiqQ7rMUi/SfIc4C+Bk6vq10OqrWtz9X1P4PHAF5LcTO/87MWL4EL1IH/mm4CLq+o3VfUvwD/TC4xd2SD9fjXwCYCq+grwAHoD2i1mA/0bMJfFHhBLdViPOfud5InA++mFw2I4Fz1tm32vqturallVLa+q5fSuv5xcVZOjKXfBDPJ3/TP0jh5IsozeKaebhllkBwbp9w+AZwMkOYxeQEwNtcrhuxh4RXM303HA7VV16/Z+yaI+xVQ717AeQzNgv98JPAT4ZHNN/gdVdfLIil4gA/Z90Rmw358HTkhyHXAX8GdVtUsfLQ/Y7zcDH0jyn+ldsH7lrv4/gUkupBf2y5prK2cDewBU1Xn0rrWsAG4Efgm8al772cV/T5Kkjiz2U0ySpHkyICRJrQwISVIrA0KS1MqAkCS1MiCkIUgynuQ9o65D2h7e5ipJauURhDSgJK9oxtb/RpKPJFmeZH3fOzUObNr9UZKNTbsrmmXHJ/m/zfREM57/F5LclOQ/9e3jZUm+lmRDkvcn2W00vZUMCGkgSR4HvBV4VlUdCbwReC9wQVU9AfgoMH0K6W3A85p2sz2dfijwPHrDVZ+dZI9mGIhTgadW1VH0nnZ+aVd9kuayqIfakBbQs4BPVtWPAarqtmZk0Bc16z8C/Ldm+p+A85N8Avj0LN93STNA4q+TbAH2pjde0JOAq5vhTx4ILKZxsrSLMSCkBVZVZyY5FngBcE2SJ7U06x899y56/y2G3hHJW4ZQpjQnTzFJg1kP/FGSRwIkeQTwZe4Z3PGlwJXNuoOq6qtV9TZ6o4Ye0PJ9bdYBL07yqOl9ZBG9K1y7Ho8gpAE0I4T+FfDFJHfRe7fznwD/K8mf0QuC6REz35nkYHpHBOvovSd5zhcyVdV1Sd4KXNa82Og3wFnA9xe8Q9IAvM1VktTKU0ySpFYGhCSplQEhSWplQEiSWhkQkqRWBoQkqZUBIUlq9f8BWqOtpv2x3xEAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "CuVBmyFpemq0",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 913
},
"outputId": "3825012b-ed55-4ac7-d462-7053e64c1c9d"
},
"source": [
"print(\"Loading frequency matrix...\")\n",
"t0 = time()\n",
"\n",
"frequency_cosine = pd.DataFrame(cosine_similaritas, index=dataset2['ID'],columns=dataset2['ID'])\n",
"print(\"done in %0.3fs.\" % (time() - t0))\n",
"frequency_cosine"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading frequency matrix...\n",
"done in 0.001s.\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>ID</th>\n",
" <th>F01</th>\n",
" <th>F02</th>\n",
" <th>F03</th>\n",
" <th>F04</th>\n",
" <th>F06</th>\n",
" <th>F07</th>\n",
" <th>F08</th>\n",
" <th>F09</th>\n",
" <th>F10</th>\n",
" <th>F11</th>\n",
" <th>F12</th>\n",
" <th>F13</th>\n",
" <th>F14</th>\n",
" <th>F15</th>\n",
" <th>F16</th>\n",
" <th>F17</th>\n",
" <th>F18</th>\n",
" <th>NF01</th>\n",
" <th>NF02</th>\n",
" <th>NF03</th>\n",
" <th>NF04</th>\n",
" <th>NF05</th>\n",
" <th>NF06</th>\n",
" <th>NF07</th>\n",
" <th>NF08</th>\n",
" <th>NF09</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>F01</th>\n",
" <td>1.000000</td>\n",
" <td>0.505420</td>\n",
" <td>0.505420</td>\n",
" <td>0.505420</td>\n",
" <td>0.500154</td>\n",
" <td>0.505420</td>\n",
" <td>0.572693</td>\n",
" <td>0.505420</td>\n",
" <td>0.673975</td>\n",
" <td>0.450574</td>\n",
" <td>0.292488</td>\n",
" <td>0.229655</td>\n",
" <td>0.229158</td>\n",
" <td>0.582557</td>\n",
" <td>0.672497</td>\n",
" <td>0.452552</td>\n",
" <td>0.248641</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.133347</td>\n",
" <td>0.427461</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.153888</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F02</th>\n",
" <td>0.505420</td>\n",
" <td>1.000000</td>\n",
" <td>0.752710</td>\n",
" <td>0.752710</td>\n",
" <td>0.744868</td>\n",
" <td>0.752710</td>\n",
" <td>0.572693</td>\n",
" <td>0.752710</td>\n",
" <td>0.452552</td>\n",
" <td>0.450574</td>\n",
" <td>0.292488</td>\n",
" <td>0.229655</td>\n",
" <td>0.229158</td>\n",
" <td>0.582557</td>\n",
" <td>0.451560</td>\n",
" <td>0.452552</td>\n",
" <td>0.248641</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.133347</td>\n",
" <td>0.216047</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.153888</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F03</th>\n",
" <td>0.505420</td>\n",
" <td>0.752710</td>\n",
" <td>1.000000</td>\n",
" <td>0.752710</td>\n",
" <td>0.744868</td>\n",
" <td>0.752710</td>\n",
" <td>0.572693</td>\n",
" <td>0.752710</td>\n",
" <td>0.452552</td>\n",
" <td>0.450574</td>\n",
" <td>0.292488</td>\n",
" <td>0.229655</td>\n",
" <td>0.229158</td>\n",
" <td>0.582557</td>\n",
" <td>0.451560</td>\n",
" <td>0.452552</td>\n",
" <td>0.248641</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.133347</td>\n",
" <td>0.216047</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.153888</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F04</th>\n",
" <td>0.505420</td>\n",
" <td>0.752710</td>\n",
" <td>0.752710</td>\n",
" <td>1.000000</td>\n",
" <td>0.744868</td>\n",
" <td>0.752710</td>\n",
" <td>0.572693</td>\n",
" <td>0.752710</td>\n",
" <td>0.452552</td>\n",
" <td>0.450574</td>\n",
" <td>0.292488</td>\n",
" <td>0.229655</td>\n",
" <td>0.229158</td>\n",
" <td>0.582557</td>\n",
" <td>0.451560</td>\n",
" <td>0.452552</td>\n",
" <td>0.248641</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.133347</td>\n",
" <td>0.216047</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.153888</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F06</th>\n",
" <td>0.500154</td>\n",
" <td>0.744868</td>\n",
" <td>0.744868</td>\n",
" <td>0.744868</td>\n",
" <td>1.000000</td>\n",
" <td>0.744868</td>\n",
" <td>0.566727</td>\n",
" <td>0.744868</td>\n",
" <td>0.447837</td>\n",
" <td>0.445880</td>\n",
" <td>0.289441</td>\n",
" <td>0.227262</td>\n",
" <td>0.226770</td>\n",
" <td>0.576488</td>\n",
" <td>0.446856</td>\n",
" <td>0.447837</td>\n",
" <td>0.246051</td>\n",
" <td>0.118943</td>\n",
" <td>0.000000</td>\n",
" <td>0.131958</td>\n",
" <td>0.213796</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.152285</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F07</th>\n",
" <td>0.505420</td>\n",
" <td>0.752710</td>\n",
" <td>0.752710</td>\n",
" <td>0.752710</td>\n",
" <td>0.744868</td>\n",
" <td>1.000000</td>\n",
" <td>0.572693</td>\n",
" <td>0.752710</td>\n",
" <td>0.452552</td>\n",
" <td>0.450574</td>\n",
" <td>0.292488</td>\n",
" <td>0.229655</td>\n",
" <td>0.229158</td>\n",
" <td>0.582557</td>\n",
" <td>0.451560</td>\n",
" <td>0.452552</td>\n",
" <td>0.248641</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.133347</td>\n",
" <td>0.216047</td>\n",
" <td>0.123041</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.153888</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F08</th>\n",
" <td>0.572693</td>\n",
" <td>0.572693</td>\n",
" <td>0.572693</td>\n",
" <td>0.572693</td>\n",
" <td>0.566727</td>\n",
" <td>0.572693</td>\n",
" <td>1.000000</td>\n",
" <td>0.572693</td>\n",
" <td>0.512788</td>\n",
" <td>0.510547</td>\n",
" <td>0.682498</td>\n",
" <td>0.260223</td>\n",
" <td>0.259659</td>\n",
" <td>0.660097</td>\n",
" <td>0.511664</td>\n",
" <td>0.512788</td>\n",
" <td>0.281736</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.151096</td>\n",
" <td>0.244804</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.174371</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F09</th>\n",
" <td>0.505420</td>\n",
" <td>0.752710</td>\n",
" <td>0.752710</td>\n",
" <td>0.752710</td>\n",
" <td>0.744868</td>\n",
" <td>0.752710</td>\n",
" <td>0.572693</td>\n",
" <td>1.000000</td>\n",
" <td>0.452552</td>\n",
" <td>0.450574</td>\n",
" <td>0.292488</td>\n",
" <td>0.229655</td>\n",
" <td>0.229158</td>\n",
" <td>0.582557</td>\n",
" <td>0.451560</td>\n",
" <td>0.452552</td>\n",
" <td>0.248641</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.133347</td>\n",
" <td>0.216047</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.153888</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F10</th>\n",
" <td>0.673975</td>\n",
" <td>0.452552</td>\n",
" <td>0.452552</td>\n",
" <td>0.452552</td>\n",
" <td>0.447837</td>\n",
" <td>0.452552</td>\n",
" <td>0.512788</td>\n",
" <td>0.452552</td>\n",
" <td>1.000000</td>\n",
" <td>0.403443</td>\n",
" <td>0.261893</td>\n",
" <td>0.205632</td>\n",
" <td>0.205187</td>\n",
" <td>0.521621</td>\n",
" <td>0.799980</td>\n",
" <td>0.405214</td>\n",
" <td>0.222633</td>\n",
" <td>0.099137</td>\n",
" <td>0.000000</td>\n",
" <td>0.119399</td>\n",
" <td>0.382748</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.137791</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F11</th>\n",
" <td>0.450574</td>\n",
" <td>0.450574</td>\n",
" <td>0.450574</td>\n",
" <td>0.450574</td>\n",
" <td>0.445880</td>\n",
" <td>0.450574</td>\n",
" <td>0.510547</td>\n",
" <td>0.450574</td>\n",
" <td>0.403443</td>\n",
" <td>1.000000</td>\n",
" <td>0.510547</td>\n",
" <td>0.400870</td>\n",
" <td>0.400002</td>\n",
" <td>0.519341</td>\n",
" <td>0.402559</td>\n",
" <td>0.403443</td>\n",
" <td>0.221660</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.235289</td>\n",
" <td>0.192603</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.137189</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F12</th>\n",
" <td>0.292488</td>\n",
" <td>0.292488</td>\n",
" <td>0.292488</td>\n",
" <td>0.292488</td>\n",
" <td>0.289441</td>\n",
" <td>0.292488</td>\n",
" <td>0.682498</td>\n",
" <td>0.292488</td>\n",
" <td>0.261893</td>\n",
" <td>0.510547</td>\n",
" <td>1.000000</td>\n",
" <td>0.509518</td>\n",
" <td>0.508415</td>\n",
" <td>0.337127</td>\n",
" <td>0.261319</td>\n",
" <td>0.261893</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.151096</td>\n",
" <td>0.125027</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.174371</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F13</th>\n",
" <td>0.229655</td>\n",
" <td>0.229655</td>\n",
" <td>0.229655</td>\n",
" <td>0.229655</td>\n",
" <td>0.227262</td>\n",
" <td>0.229655</td>\n",
" <td>0.260223</td>\n",
" <td>0.229655</td>\n",
" <td>0.205632</td>\n",
" <td>0.400870</td>\n",
" <td>0.509518</td>\n",
" <td>1.000000</td>\n",
" <td>0.802518</td>\n",
" <td>0.264705</td>\n",
" <td>0.205182</td>\n",
" <td>0.205632</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.118637</td>\n",
" <td>0.098168</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.104029</td>\n",
" <td>0.136912</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F14</th>\n",
" <td>0.229158</td>\n",
" <td>0.229158</td>\n",
" <td>0.229158</td>\n",
" <td>0.229158</td>\n",
" <td>0.226770</td>\n",
" <td>0.229158</td>\n",
" <td>0.259659</td>\n",
" <td>0.229158</td>\n",
" <td>0.205187</td>\n",
" <td>0.400002</td>\n",
" <td>0.508415</td>\n",
" <td>0.802518</td>\n",
" <td>1.000000</td>\n",
" <td>0.264132</td>\n",
" <td>0.204737</td>\n",
" <td>0.205187</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.118380</td>\n",
" <td>0.097956</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.103804</td>\n",
" <td>0.136616</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F15</th>\n",
" <td>0.582557</td>\n",
" <td>0.582557</td>\n",
" <td>0.582557</td>\n",
" <td>0.582557</td>\n",
" <td>0.576488</td>\n",
" <td>0.582557</td>\n",
" <td>0.660097</td>\n",
" <td>0.582557</td>\n",
" <td>0.521621</td>\n",
" <td>0.519341</td>\n",
" <td>0.337127</td>\n",
" <td>0.264705</td>\n",
" <td>0.264132</td>\n",
" <td>1.000000</td>\n",
" <td>0.520477</td>\n",
" <td>0.521621</td>\n",
" <td>0.286589</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.153698</td>\n",
" <td>0.249020</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.177374</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F16</th>\n",
" <td>0.672497</td>\n",
" <td>0.451560</td>\n",
" <td>0.451560</td>\n",
" <td>0.451560</td>\n",
" <td>0.446856</td>\n",
" <td>0.451560</td>\n",
" <td>0.511664</td>\n",
" <td>0.451560</td>\n",
" <td>0.799980</td>\n",
" <td>0.402559</td>\n",
" <td>0.261319</td>\n",
" <td>0.205182</td>\n",
" <td>0.204737</td>\n",
" <td>0.520477</td>\n",
" <td>1.000000</td>\n",
" <td>0.404326</td>\n",
" <td>0.222145</td>\n",
" <td>0.098920</td>\n",
" <td>0.000000</td>\n",
" <td>0.119137</td>\n",
" <td>0.381908</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.137489</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F17</th>\n",
" <td>0.452552</td>\n",
" <td>0.452552</td>\n",
" <td>0.452552</td>\n",
" <td>0.452552</td>\n",
" <td>0.447837</td>\n",
" <td>0.452552</td>\n",
" <td>0.512788</td>\n",
" <td>0.452552</td>\n",
" <td>0.405214</td>\n",
" <td>0.403443</td>\n",
" <td>0.261893</td>\n",
" <td>0.205632</td>\n",
" <td>0.205187</td>\n",
" <td>0.521621</td>\n",
" <td>0.404326</td>\n",
" <td>1.000000</td>\n",
" <td>0.445266</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.119399</td>\n",
" <td>0.193448</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.137791</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F18</th>\n",
" <td>0.248641</td>\n",
" <td>0.248641</td>\n",
" <td>0.248641</td>\n",
" <td>0.248641</td>\n",
" <td>0.246051</td>\n",
" <td>0.248641</td>\n",
" <td>0.281736</td>\n",
" <td>0.248641</td>\n",
" <td>0.222633</td>\n",
" <td>0.221660</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.286589</td>\n",
" <td>0.222145</td>\n",
" <td>0.445266</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.116561</td>\n",
" <td>0.000000</td>\n",
" <td>0.106284</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.110396</td>\n",
" <td>0.000000</td>\n",
" <td>0.256412</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NF01</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.118943</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.099137</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.098920</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.155712</td>\n",
" <td>0.175303</td>\n",
" <td>0.189312</td>\n",
" <td>0.055089</td>\n",
" <td>0.225339</td>\n",
" <td>0.101511</td>\n",
" <td>0.198020</td>\n",
" <td>0.174977</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NF02</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.116561</td>\n",
" <td>0.155712</td>\n",
" <td>1.000000</td>\n",
" <td>0.239548</td>\n",
" <td>0.198218</td>\n",
" <td>0.173042</td>\n",
" <td>0.290213</td>\n",
" <td>0.205886</td>\n",
" <td>0.506869</td>\n",
" <td>0.239102</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NF03</th>\n",
" <td>0.133347</td>\n",
" <td>0.133347</td>\n",
" <td>0.133347</td>\n",
" <td>0.133347</td>\n",
" <td>0.131958</td>\n",
" <td>0.133347</td>\n",
" <td>0.151096</td>\n",
" <td>0.133347</td>\n",
" <td>0.119399</td>\n",
" <td>0.235289</td>\n",
" <td>0.151096</td>\n",
" <td>0.118637</td>\n",
" <td>0.118380</td>\n",
" <td>0.153698</td>\n",
" <td>0.119137</td>\n",
" <td>0.119399</td>\n",
" <td>0.000000</td>\n",
" <td>0.175303</td>\n",
" <td>0.239548</td>\n",
" <td>1.000000</td>\n",
" <td>0.275428</td>\n",
" <td>0.190685</td>\n",
" <td>0.387916</td>\n",
" <td>0.230562</td>\n",
" <td>0.307972</td>\n",
" <td>0.136019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NF04</th>\n",
" <td>0.427461</td>\n",
" <td>0.216047</td>\n",
" <td>0.216047</td>\n",
" <td>0.216047</td>\n",
" <td>0.213796</td>\n",
" <td>0.216047</td>\n",
" <td>0.244804</td>\n",
" <td>0.216047</td>\n",
" <td>0.382748</td>\n",
" <td>0.192603</td>\n",
" <td>0.125027</td>\n",
" <td>0.098168</td>\n",
" <td>0.097956</td>\n",
" <td>0.249020</td>\n",
" <td>0.381908</td>\n",
" <td>0.193448</td>\n",
" <td>0.106284</td>\n",
" <td>0.189312</td>\n",
" <td>0.198218</td>\n",
" <td>0.275428</td>\n",
" <td>1.000000</td>\n",
" <td>0.210381</td>\n",
" <td>0.317551</td>\n",
" <td>0.046933</td>\n",
" <td>0.317856</td>\n",
" <td>0.109011</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NF05</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.123041</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.055089</td>\n",
" <td>0.173042</td>\n",
" <td>0.190685</td>\n",
" <td>0.210381</td>\n",
" <td>1.000000</td>\n",
" <td>0.246416</td>\n",
" <td>0.054630</td>\n",
" <td>0.146706</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NF06</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.225339</td>\n",
" <td>0.290213</td>\n",
" <td>0.387916</td>\n",
" <td>0.317551</td>\n",
" <td>0.246416</td>\n",
" <td>1.000000</td>\n",
" <td>0.168488</td>\n",
" <td>0.295252</td>\n",
" <td>0.131829</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NF07</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.104029</td>\n",
" <td>0.103804</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.110396</td>\n",
" <td>0.101511</td>\n",
" <td>0.205886</td>\n",
" <td>0.230562</td>\n",
" <td>0.046933</td>\n",
" <td>0.054630</td>\n",
" <td>0.168488</td>\n",
" <td>1.000000</td>\n",
" <td>0.130913</td>\n",
" <td>0.230132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NF08</th>\n",
" <td>0.153888</td>\n",
" <td>0.153888</td>\n",
" <td>0.153888</td>\n",
" <td>0.153888</td>\n",
" <td>0.152285</td>\n",
" <td>0.153888</td>\n",
" <td>0.174371</td>\n",
" <td>0.153888</td>\n",
" <td>0.137791</td>\n",
" <td>0.137189</td>\n",
" <td>0.174371</td>\n",
" <td>0.136912</td>\n",
" <td>0.136616</td>\n",
" <td>0.177374</td>\n",
" <td>0.137489</td>\n",
" <td>0.137791</td>\n",
" <td>0.000000</td>\n",
" <td>0.198020</td>\n",
" <td>0.506869</td>\n",
" <td>0.307972</td>\n",
" <td>0.317856</td>\n",
" <td>0.146706</td>\n",
" <td>0.295252</td>\n",
" <td>0.130913</td>\n",
" <td>1.000000</td>\n",
" <td>0.152033</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NF09</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.256412</td>\n",
" <td>0.174977</td>\n",
" <td>0.239102</td>\n",
" <td>0.136019</td>\n",
" <td>0.109011</td>\n",
" <td>0.000000</td>\n",
" <td>0.131829</td>\n",
" <td>0.230132</td>\n",
" <td>0.152033</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"ID F01 F02 F03 ... NF07 NF08 NF09\n",
"ID ... \n",
"F01 1.000000 0.505420 0.505420 ... 0.000000 0.153888 0.000000\n",
"F02 0.505420 1.000000 0.752710 ... 0.000000 0.153888 0.000000\n",
"F03 0.505420 0.752710 1.000000 ... 0.000000 0.153888 0.000000\n",
"F04 0.505420 0.752710 0.752710 ... 0.000000 0.153888 0.000000\n",
"F06 0.500154 0.744868 0.744868 ... 0.000000 0.152285 0.000000\n",
"F07 0.505420 0.752710 0.752710 ... 0.000000 0.153888 0.000000\n",
"F08 0.572693 0.572693 0.572693 ... 0.000000 0.174371 0.000000\n",
"F09 0.505420 0.752710 0.752710 ... 0.000000 0.153888 0.000000\n",
"F10 0.673975 0.452552 0.452552 ... 0.000000 0.137791 0.000000\n",
"F11 0.450574 0.450574 0.450574 ... 0.000000 0.137189 0.000000\n",
"F12 0.292488 0.292488 0.292488 ... 0.000000 0.174371 0.000000\n",
"F13 0.229655 0.229655 0.229655 ... 0.104029 0.136912 0.000000\n",
"F14 0.229158 0.229158 0.229158 ... 0.103804 0.136616 0.000000\n",
"F15 0.582557 0.582557 0.582557 ... 0.000000 0.177374 0.000000\n",
"F16 0.672497 0.451560 0.451560 ... 0.000000 0.137489 0.000000\n",
"F17 0.452552 0.452552 0.452552 ... 0.000000 0.137791 0.000000\n",
"F18 0.248641 0.248641 0.248641 ... 0.110396 0.000000 0.256412\n",
"NF01 0.000000 0.000000 0.000000 ... 0.101511 0.198020 0.174977\n",
"NF02 0.000000 0.000000 0.000000 ... 0.205886 0.506869 0.239102\n",
"NF03 0.133347 0.133347 0.133347 ... 0.230562 0.307972 0.136019\n",
"NF04 0.427461 0.216047 0.216047 ... 0.046933 0.317856 0.109011\n",
"NF05 0.000000 0.000000 0.000000 ... 0.054630 0.146706 0.000000\n",
"NF06 0.000000 0.000000 0.000000 ... 0.168488 0.295252 0.131829\n",
"NF07 0.000000 0.000000 0.000000 ... 1.000000 0.130913 0.230132\n",
"NF08 0.153888 0.153888 0.153888 ... 0.130913 1.000000 0.152033\n",
"NF09 0.000000 0.000000 0.000000 ... 0.230132 0.152033 1.000000\n",
"\n",
"[26 rows x 26 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 23
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "X6jsTFPsfST4"
},
"source": [
"### Visual Cosine"
]
},
{
"cell_type": "code",
"metadata": {
"id": "0rK2krOueqD1",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 370
},
"outputId": "cff683c4-7e97-4434-c3fa-760382d0c933"
},
"source": [
"import umap\n",
"\n",
"print(\"Loading Topic Visualisation...\")\n",
"t0 = time()\n",
"\n",
"visual_cosine = cosine_similaritas\n",
"embedding = umap.UMAP(n_neighbors=3, min_dist=0.1, random_state=4).fit_transform(visual_cosine)\n",
"plt.figure(figsize=(7,5))\n",
"plt.scatter(embedding[:, 0], embedding[:, 1], c = None, s = 50 ,edgecolors=None)\n",
"plt.xlabel('widht')\n",
"plt.ylabel('height')\n",
"plt.show()\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Topic Visualisation...\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAAE9CAYAAACY3GKJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAdP0lEQVR4nO3df3RndX3n8eebhGRNhrYyMyIDYqyiPf6YYTUz1N3CEUspsFOpPbbg0cpQOSO7ut2e3a4ruqsu7LFW13W34tZFCqPViu5uqewwqKC2QKtkMpQZYFREDMoMP+aHApNoYjLv/SM3YwjfbyYw+d77/eY+H+fkfO+9n5vv9517MnnNvffzuZ/ITCRJqqujqi5AkqQqGYSSpFozCCVJtWYQSpJqzSCUJNWaQShJqrXuqgtohRUrVuTAwEDVZUiS2sS2bdv2ZubKRm1LMggHBgYYHh6uugxJUpuIiAeatXlpVJJUawahJKnWDEJJUq0ZhJKkWjMIJUm1ZhBKkmptSQ6fkCR1jgPjk2zevpuRfaMMLO9n/ZpVLOstL54MQklSZbaO7GfDNUNkwtjEFH09XVx+w042XbSOtQPHllKDl0YlSZU4MD7JhmuGGB2fYmxiCpgOw9HxKS68+nZGxydLqcMglCRVYvP23WQ2bhubOMjHvvbdUuowCCVJlRjZN3roTLCRv7jt+6WcFRqEkqRKDCzvp6crmrYHsHnH7pbXUUoQRsTVEfFoRNw9a9uxEXFTRHy3eH12k++9sNjnuxFxYRn1SpJab/2aVUwdbHJtFJiYSkb2jrW8jrLOCDcBZ8/Z9i7gq5l5MvDVYv1JIuJY4H3AqcA64H3NAlOS1HniqOZnhM86uouBFX0tr6GUIMzMW4D9czafB3yqWP4U8NsNvvU3gZsyc39m/gi4iacGqiSpA23evpuj5wnCg5msX72q5XVUeY/wuMx8qFh+GDiuwT4nAD+ctf5gsU2S1OFG9o3yk58dbNp+zsufS38JA+vborNMZibQ/ELxAkTExogYjojhPXv2LFJlkqRWGVjeT19PV8O2Zx19FK9+4fJS6qgyCB+JiOMBitdHG+yzC3jerPUTi21PkZlXZuZgZg6uXLly0YuVJC2u9WtWEU2ujB51VJRyWRSqDcLrgZleoBcCX2ywz5eBsyLi2UUnmbOKbZKkDrest5tNF62jv7fr0JlhX08X/b1dxfZyngJayqdExOeA1wArIuJBpnuCfhD4QkS8FXgA+L1i30Hgksy8ODP3R8TlwNbirS7LzLmdbiRJHWrtwLEMvftMNu/YzcjeMQZW9LF+9arSQhAgstnzbTrY4OBgDg8PV12GJOlpatVMFBGxLTMHG7U5+4QkqS1UNRNFW/QalSTV2yOP/5Q3X3V7w5kopmeoaN0zRw1CSVKlto7s57QPfY3xycZjCjNb+8xRg1CSVJmZOQknJpv3VxmbmGrpM0cNQklSZeabk3BGX09rnzlqEEqSKnO4OQkBpg4mZ7zkOS2rwSCUJFXiwPgkjz4+Tvc8D96eccZH/patI60ZRm4QSpJKt3VkP6d+4GZuvPshJueZkxBgfPJgS3uPGoSSpFLNdJAZHZ+ad/aJuVrVe9QglCSVar4OMvNdJG1V71GDUJJUqvk6yCTQ3SSZWtV71CCUJJVq/nkIuziqSeeZCFoyNZNBKEkq1fzzEMJVF64tdWomH7otSSrVzDyEcx+wHcGhB2yXOTWTQShJKl2zsEvg2qEfHJqG6e2vfdGiTMM0H+cjlCS1hUbTMM0+SzwSzkcoSWpLMxPx3vvIE3z29h88aQaKmZ6lG64ZYujdZ3ppVJK0tMw9A2xmZiD9+WtPakkdBqEkqXSzny5zOE7DJElachYy/dIMp2GSJC05C5l+aUarBtLPMAglSaWb7+kyM1o9kH6G9wglSaVbv2YVl9+ws2FbT3fwll8d4OTjlrV0IP0Mg1CSVLqFPF2mLAahJKkSZT9KrZnKgjAiXgJ8ftamXwbem5n/fdY+rwG+CHy/2PTXmXlZaUVKklqqv7e7ZeMDF6qyIMzM7wCnAEREF7ALuK7Brrdm5voya5Mk1Ue79Br9deB7mflA1YVIkuqlXYLwAuBzTdpeHRHbI+LGiHhZmUVJkpa+yoMwInqA1wH/u0HzHcDzM3MN8DHgb+Z5n40RMRwRw3v27GlNsZKkJafyIATOAe7IzEfmNmTm45l5oFjeAhwdESsavUlmXpmZg5k5uHLlytZWLElaMtohCN9Ik8uiEfHciIhieR3T9e4rsTZJ0hJX6TjCiOgHfgN426xtlwBk5ieANwD/MiImgZ8AF+RSnElYklSZSoMwM0eB5XO2fWLW8hXAFWXXJUmqj3a4NCpJUmUMQklSrRmEkqRaMwglSbVmEEqSas0glCTVmkEoSao1g1CSVGsGoSSp1gxCSVKtGYSSpFozCCVJtWYQSpJqrdLZJyRJ8zswPsnm7bsZ2TfKwPJ+1q9ZxbJe/3QvJo+mJLWprSP72XDNEAcPwk9+NkX3UcH7rr+HT75lkNNfvLLq8pYML41KUhs6MD7JhmuGGB2f4ic/mwJg8mAyPnmQt1w9xC3f3VNxhUuHQShJbWjz9t0cPNi8/eJPbWV0fLK8gpYwg1CS2tDIvtFDZ4KNHDyYbN6xu8SKli6DUJLa0MDyfrqPiqbtkwdhy10Pc8CzwiNmEEpSG1q/ZhVd8wQhwDe+t5dTP3AzW0f2l1TV0mQQSlIbWtbbzSffMjjvPhNTyej4VNGpxjPDZ8oglKQ2dfqLV/Lpt66jpzvm/WOdifcLj4BBKElt7PSTV/KP/+ksTnvxiqb7jE1Mce/DB7h26Ad88MZvce3QD7x3+DQ4oF6S2lx/bzfnvPx4to78iLGJp/Yk7e0+is/c/gBdRwVjE1P09XRx+Q072XTROtYOHFtBxZ3FM0JJ6gDr16wimvSdGZ88yPjkwUMhOTYx5b3Dp6HyIIyIkYi4KyLujIjhBu0REX8WEfdFxI6IeGUVdUpSlZb1drPponX093bR19MFQF9PFz3dQW934z/l3jtcmHa5NHpGZu5t0nYOcHLxdSrw58WrJNXK2oFjGXr3mWzesZuRvWMMrOjjOw8/wdV/P9Jw/7GJKbbc9TD/YrUP6p5PJxyZ84BPZ2YC34yIX4qI4zPzoaoLk6Sy9fd2c/7akw6tXzv0A/p6uhreO4SfjzX0fmFzlV8aBRL4SkRsi4iNDdpPAH44a/3BYtuTRMTGiBiOiOE9e3wYraR6mO/eIfx8rOGbrvoml/2/e+xR2kA7BOGvZeYrmb4E+vaIOP2ZvElmXpmZg5k5uHKl05NIqodlvd38h7N/5bD7TUwmV//9CJdt3unTaOaoPAgzc1fx+ihwHbBuzi67gOfNWj+x2CZJtXdgfJI//dK3F7z/TI9SzxB/rtIgjIj+iDhmZhk4C7h7zm7XA28peo/+KvCY9wcladrm7bvJfPrf5xniz1V9RngccFtEbAeGgBsy80sRcUlEXFLsswW4H7gP+CTwr6opVZLaz8i+0aYdZRbCMYcV9xrNzPuBNQ22f2LWcgJvL7MuSeoUA8v75+01ulAzYw5n90iti6rPCCVJR2C+XqNdAT1NBtvPNTYxxcjesUWsrHMYhJLUwZo9caa/t4tr3/ZqPnvxqU9qa6avp4uBFX1llNx2OmFAvSRpHo2eOLN+9Sr6i6fJzLTd+/ABPnP7A4xPHnzKe0TA+tWryi69LRiEkrQEzH3iTLO2s1/xXDZcM0Qmh2aqiKA4q6xnJNTzp5akmjrc2WMd1fcnl6Samu/ssY7sLCNJqjWDUJJUawahJKnWDEJJUq0ZhJKkWjMIJUm1ZhBKkmrNIJQk1ZpBKEmqNYNQklRrBqEkqdYMQklSrRmEkqRaMwglSbVmEEqSas0glCTVmkEoSao1g1CSVGuVBWFEPC8ivh4ROyPinoj4Nw32eU1EPBYRdxZf762iVknS0tVd4WdPAv8uM++IiGOAbRFxU2bunLPfrZm5voL6JEk1UNkZYWY+lJl3FMtPAN8CTqiqHklSPbXFPcKIGAD+KXB7g+ZXR8T2iLgxIl5WamGSpCWvykujAETEMuD/An+UmY/Pab4DeH5mHoiIc4G/AU5u8j4bgY0AJ510UgsrliQtJZWeEUbE0UyH4Gcz86/ntmfm45l5oFjeAhwdESsavVdmXpmZg5k5uHLlypbWLUlaOqrsNRrAXwDfysz/1mSf5xb7ERHrmK53X3lVSpKWugUFYUT85UK2PU3/HPh94LWzhkecGxGXRMQlxT5vAO6OiO3AnwEXZGYe4edKknTIQu8RPqmTSkR0Aa86kg/OzNuAOMw+VwBXHMnnSJI0n3nPCCPi0oh4AlgdEY8XX08AjwJfLKVCSZJaaN4gzMw/ycxjgA9n5i8UX8dk5vLMvLSkGiVJapkFXRrNzEsj4gTg+bO/JzNvaVVhkiSVYUFBGBEfBC4AdgJTxeYEDEJJUkdbaGeZ1wMvyczxVhYjSVLZFjqO8H7g6FYWIklSFeY9I4yIjzF9CXQMuDMivgocOivMzD9sbXmSJLXW4S6NDhev24DrW1yLJEmlmzcIM/NTZRUiSVIVFtpr9C6mL5HO9hjTZ4z/JTN9/qckqSMttNfojUwPm/irYv0CoA94GNgE/NaiVyZJUgkWGoRnZuYrZ63fFRF3ZOYrI+LNrShMkqQyLHT4RFcxDRIAEbEW6CpWJxe9KkmSSrLQM8KLgauL2eQDeBy4OCL6gT9pVXGSJLXaQp81uhV4RUT8YrH+2KzmL7SiMEmSynC4AfVvzszPRMS/nbMdgGYzy0uS1CkOd0bYX7we0+pCJEmqwuEG1P+v4vU/l1OOJEnlWlCv0Yh4cUR8NSLuLtZXR8R/bG1pkiS13kKHT3wSuBT4GUBm7mB6UL0kSR1toUHYl5lDc7Y5flCS1PEWGoR7I+KFFM8bjYg3AA+1rCpJkkqy0AH1bweuBH4lInYB3wfe1LKqJEkqyUKDcBdwDfB14FimnyxzIXBZi+qSJKkUCw3CLwI/Bu4AdreuHEmSyrXQIDwxM89e7A+PiLOB/8H0A7yvyswPzmnvBT4NvArYB5yfmSOLXYckqb4W2lnmHyLiFYv5wRHRBXwcOAd4KfDGiHjpnN3eCvwoM18EfBT408WsQZKkwz1rdGZm+m7gooi4HxhnegaKzMzVR/DZ64D7MvP+4rOuBc4Dds7a5zzg/cXy/wGuiIjIzDyCz5Uk6ZDDXRpd38LPPgH44az1B4FTm+2TmZMR8RiwHNjbwrokSTVyuGeNPlBWIUcqIjYCGwFOOumkiquRJHWKhd4jbIVdwPNmrZ9YbGu4T0R0A7/IdKeZp8jMKzNzMDMHV65c2YJyJUlLUZVBuBU4OSJeEBE9TD+79Po5+1zP9HhFgDcAX/P+oCRpMS10+MSiK+75vQP4MtPDJ67OzHsi4jJgODOvB/4C+MuIuA/Yjw/6liQtssqCECAztwBb5mx776zlnwK/W3ZdkqT6qPLSqCRJlTMIJUm1ZhBKkmrNIJQk1ZpBKEmqNYNQklRrBqEkqdYMQklSrRmEkqRaq/TJMp3iwPgkm7fvZmTfKAPL+1m/ZhXLej10krQU+Nf8MLaO7GfDNUNkwtjEFH09XVx+w042XbSOtQPHVl2eJOkIeWl0Ho88/lPefNXtjI5PMTYxBUyH4ej4FBuuGWJ0fLLiCiVJR8ogbGLryH5O+9DXGJ882LA9Ezbv2F1yVZKkxWYQznFgfJJN/zDCG6/8BhOTzac+HJuYYstdD3PAs0JJ6mgG4SxbR/Zz6gdu5gM37KTJieCTfON7ezn1AzezdWR/64uTJLWEQVg4MD5Z3PebYmKq+ZngbBNT6f1CSepwBmFh8/bd5MLy7ym8XyhJncsgLIzsGz3UM/TpGpuYYmTv2CJXJEkqg0FYGFjeT19P1zP63r6eLgZW9C1yRZKkMhiEhfVrVhHxzL43AtavXrW4BUmSSmEQFpb1drPponX093YdOjPs6Zo/GXu6gv7eruL7fEiPJHUi/3rPsnbgWIbefSabd+xmZO8YOx96jL+7d2/T/f/ZC1fw8Te90hCUpA7mX/A5+nu7OX/tSQBcO/QDto78qGknml85/hhDUJI6nJdG57F+zSqg+ZiKv/zGA44flKQOZxDOY1lvN2959UDT9sTxg5LU6Sq5rhcRHwZ+C5gAvgdclJk/brDfCPAEMAVMZuZgmXUejuMHJanzVXVGeBPw8sxcDdwLXDrPvmdk5ilVheB84wsdPyhJna+SIMzMr2TmzM21bwInVlHHQsw3vtDxg5LU+drhHuEfADc2aUvgKxGxLSI2zvcmEbExIoYjYnjPnj2LVlyj8YV9PV2OH5SkJSLymT5p+nBvHHEz8NwGTe/JzC8W+7wHGAR+JxsUEhEnZOauiHgO05dT/3Vm3nK4zx4cHMzh4eEj+wHmGB2fPDS+cGBFH+tXrzIEJalDRMS2ZrfYWvaXPDPPnK89IjYA64FfbxSCxXvsKl4fjYjrgHXAYYOwFWaPL5QkLR2VXBqNiLOBdwKvy8yG3S4joj8ijplZBs4C7i6vSklSHVR1j/AK4Bjgpoi4MyI+ARARqyJiS7HPccBtEbEdGAJuyMwvVVOuJGmpquQmV2a+qMn23cC5xfL9wJoy65Ik1U879BqVJKkyBqEkqdYMQklSrRmEkqRaMwglSbVmEEqSas0glCTVmkEoSao1g1CSVGsGoSSp1gxCSVKtGYSSpFozCCVJtWYQSpJqzSCUJNWaQShJqjWDUJJUawahJKnWDEJJUq0ZhJKkWjMIJUm1ZhBKkmrNIJQk1ZpBKEmqtUqCMCLeHxG7IuLO4uvcJvudHRHfiYj7IuJdZdcpSVr6uiv87I9m5n9t1hgRXcDHgd8AHgS2RsT1mbmzrAIlSUtfO18aXQfcl5n3Z+YEcC1wXsU1SZKWmCqD8B0RsSMiro6IZzdoPwH44az1B4ttkiQtmpYFYUTcHBF3N/g6D/hz4IXAKcBDwEcW4fM2RsRwRAzv2bPnSN9OklQTLbtHmJlnLmS/iPgksLlB0y7gebPWTyy2Nfu8K4ErAQYHB3PhlUqS6qyqXqPHz1p9PXB3g922AidHxAsioge4ALi+jPokSfVRVa/RD0XEKUACI8DbACJiFXBVZp6bmZMR8Q7gy0AXcHVm3lNRvZKkJaqSIMzM32+yfTdw7qz1LcCWsuqSJNVPOw+fkCSp5QxCSVKtGYSSpFozCCVJtWYQSpJqzSCUJNWaQShJqjWDUJJUawahJKnWDEJJUq0ZhJKkWjMIJUm1ZhBKkmrNIJQk1ZpBKEmqNYNQklRrBqEkqdYMQklSrRmEkqRaMwglSbVmEEqSas0glCTVmkEoSao1g1CSVGsGoSSp1rqr+NCI+DzwkmL1l4AfZ+YpDfYbAZ4ApoDJzBwsrUhJUi1UEoSZef7MckR8BHhsnt3PyMy9ra9KktrTgfFJNm/fzci+UQaW97N+zSqW9Vby53tJqvRIRkQAvwe8tso6JKldbR3Zz4ZrhsiEsYkp+nq6uPyGnWy6aB1rB46turwloep7hKcBj2Tmd5u0J/CViNgWERtLrEuSKvfI4z/lzVfdzuj4FGMTU8B0GI6OT7HhmiFGxycrrnBpaFkQRsTNEXF3g6/zZu32RuBz87zNr2XmK4FzgLdHxOnzfN7GiBiOiOE9e/Ys0k8hSdXYOrKf0z70NcYnDzZsz4TNO3aXXNXS1LJLo5l55nztEdEN/A7wqnneY1fx+mhEXAesA25psu+VwJUAg4OD+QzLlqTKPfL4T3nTVd9kYrL5n7KxiSlG9o6VWNXSVeWl0TOBb2fmg40aI6I/Io6ZWQbOAu4usT5JKt3Wkf2c/qGvzxuCAH09XQys6CupqqWtyiC8gDmXRSNiVURsKVaPA26LiO3AEHBDZn6p5BolqTQHxifZcM1Q08uhs0XA+tWrSqhq6aus12hmbmiwbTdwbrF8P7Cm5LIkqTKbt+8mF3Bjp7f7KDZdtI5+h1AsCo+iJLWJkX2jh3qHNtPTHdz6zjN4zi/8k5KqWvqqHj4hSSoMLO+nr6eraXtPd/DZi3/VEFxkBqEktYn1a1YR0bitt/sobnvnax1E3wIGoSS1iWW93cW9v65DZ4Z9PV3093bxmYtP9UywRbxHKEltZO3AsQy9+0w279jNyN4xBlb0sX71KjvGtJBHVpLaTH9vN+evPanqMmrDS6OSpFozCCVJtWYQSpJqzSCUJNWaQShJqjWDUJJUawahJKnWIhfyqPMOExF7gAdK/tgVwN6SP3OxdGrt1l2+Tq3dusvXbrU/PzNXNmpYkkFYhYgYzszBqut4Jjq1dusuX6fWbt3l66TavTQqSao1g1CSVGsG4eK5suoCjkCn1m7d5evU2q27fB1Tu/cIJUm15hmhJKnWDMIjFBEfjohvR8SOiLguIn5pVtulEXFfRHwnIn6zyjrniojfjYh7IuJgRAzO2j4QET+JiDuLr09UWedczeou2tr2eM8VEe+PiF2zjvO5Vdc0n4g4uziu90XEu6quZ6EiYiQi7iqO8XDV9cwnIq6OiEcj4u5Z246NiJsi4rvF67OrrLGRJnV31O+3QXjkbgJenpmrgXuBSwEi4qXABcDLgLOB/xkRXZVV+VR3A78D3NKg7XuZeUrxdUnJdR1Ow7o74Hg38tFZx3lL1cU0UxzHjwPnAC8F3lgc705xRnGM270r/yamf3dnexfw1cw8Gfhqsd5uNvHUuqFDfr/BIDximfmVzJwsVr8JnFgsnwdcm5njmfl94D5gXRU1NpKZ38rM71Rdx9M1T91tfbw73Drgvsy8PzMngGuZPt5aRJl5C7B/zubzgE8Vy58CfrvUohagSd0dxSBcXH8A3FgsnwD8cFbbg8W2TvCCiPjHiPi7iDit6mIWqBOP9zuKS+pXt+Mlr1k68djOSOArEbEtIjZWXcwzcFxmPlQsPwwcV2UxT1On/H7TXXUBnSAibgae26DpPZn5xWKf9wCTwGfLrG0+C6m7gYeAkzJzX0S8CvibiHhZZj7eskLneIZ1t535fg7gz4HLmf5DfTnwEab/I6XF9WuZuSsingPcFBHfLs5gOk5mZkR0Sjf/jvr9NggXIDPPnK89IjYA64Ffz5+PR9kFPG/WbicW20pzuLqbfM84MF4sb4uI7wEvBkrraPBM6qYNjvdcC/05IuKTwOYWl3Mk2u7YLlRm7ipeH42I65i+zNtJQfhIRByfmQ9FxPHAo1UXtBCZ+cjMcgf8fntp9EhFxNnAO4HXZebYrKbrgQsiojciXgCcDAxVUePTERErZzqZRMQvM133/dVWtSAddbyLP2ozXs90J6B2tRU4OSJeEBE9THdKur7img4rIvoj4piZZeAs2vs4N3I9cGGxfCHQEVdEOuz32zPCRXAF0Mv0ZReAb2bmJZl5T0R8AdjJ9CXTt2fmVIV1PklEvB74GLASuCEi7szM3wROBy6LiJ8BB4FLMrNtboQ3q7vdj3cDH4qIU5i+dDQCvK3acprLzMmIeAfwZaALuDoz76m4rIU4Driu+HfZDfxVZn6p2pKai4jPAa8BVkTEg8D7gA8CX4iItzI9o87vVVdhY03qfk2n/H6DT5aRJNWcl0YlSbVmEEqSas0glCTVmkEoSao1g1CSVGsGobQERMSW2TOfzNr+/oj442L5b+fO2FFsP6XdZweQWskglJaAzDw3M3/8DL/9FMAgVG0ZhFIHiIh/HxF/WCx/NCK+Viy/NiI+W8y7t6LY9p6IuDcibgNeMuetfjcihor204onxVwGnF/MG3d+mT+X1A4MQqkz3ArMzAQyCCyLiKOLbYeenVk8KP0Cfn6Wt3bO+3Rn5jrgj4D3FdMqvRf4fDFv3Odb+2NI7ccglDrDNuBVEfELTD8U/RtMB+JpTIfkjNOA6zJzrJgxZO4zQf961vsNtLRiqUP4rFGpA2TmzyLi+8AG4B+AHcAZwIuAbz2NtxovXqfw378EeEYodZJbgT9m+lLorcAlwD/mkx8YfAvw2xHxrGLmhd9awPs+ARyz2MVKncIglDrHrcDxwDeK+d5+ypMvi5KZdwCfB7YDNzI9hdLhfB14qZ1lVFfOPiFJqjXPCCVJtWYQSpJqzSCUJNWaQShJqjWDUJJUawahJKnWDEJJUq0ZhJKkWvv/Kpu6Rpc4Ac4AAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 504x360 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
},
{
"output_type": "stream",
"text": [
"done in 0.809s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KfoeQ2IMexXO"
},
"source": [
"## K-means Clustering VSM (BOW, TFIDF, Cosine)"
]
},
{
"cell_type": "code",
"metadata": {
"id": "wXOiOgHSezle"
},
"source": [
"import numpy as np\n",
"XVSM = np.array(hasil_tfidf)\n",
"yVSM = np.array(cosine_similaritas)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "dDwIxG8Le2VE",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "cf83b326-6491-46bd-fb45-37661b568d54"
},
"source": [
"print(\"Loading Data X & Y...\")\n",
"t0 = time()\n",
"\n",
"print(\"X\", XVSM, \"\\n\")\n",
"print(\"Y\", yVSM, \"\\n\")\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Data X & Y...\n",
"X [[0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" [0. 0.29646275 0. ... 0. 0. 0.29646275]\n",
" [0. 0. 0. ... 0. 0. 0. ]] \n",
"\n",
"Y [[1. 0.50541977 0.50541977 0.50541977 0.50015444 0.50541977\n",
" 0.57269296 0.50541977 0.67397514 0.45057401 0.29248764 0.22965476\n",
" 0.22915762 0.58255698 0.67249742 0.45255199 0.24864137 0.\n",
" 0. 0.13334684 0.42746079 0. 0. 0.\n",
" 0.15388796 0. ]\n",
" [0.50541977 1. 0.75270988 0.75270988 0.74486836 0.75270988\n",
" 0.57269296 0.75270988 0.45255199 0.45057401 0.29248764 0.22965476\n",
" 0.22915762 0.58255698 0.45155975 0.45255199 0.24864137 0.\n",
" 0. 0.13334684 0.21604713 0. 0. 0.\n",
" 0.15388796 0. ]\n",
" [0.50541977 0.75270988 1. 0.75270988 0.74486836 0.75270988\n",
" 0.57269296 0.75270988 0.45255199 0.45057401 0.29248764 0.22965476\n",
" 0.22915762 0.58255698 0.45155975 0.45255199 0.24864137 0.\n",
" 0. 0.13334684 0.21604713 0. 0. 0.\n",
" 0.15388796 0. ]\n",
" [0.50541977 0.75270988 0.75270988 1. 0.74486836 0.75270988\n",
" 0.57269296 0.75270988 0.45255199 0.45057401 0.29248764 0.22965476\n",
" 0.22915762 0.58255698 0.45155975 0.45255199 0.24864137 0.\n",
" 0. 0.13334684 0.21604713 0. 0. 0.\n",
" 0.15388796 0. ]\n",
" [0.50015444 0.74486836 0.74486836 0.74486836 1. 0.74486836\n",
" 0.5667268 0.74486836 0.44783743 0.44588006 0.28944058 0.22726228\n",
" 0.22677031 0.57648806 0.44685553 0.44783743 0.24605109 0.11894297\n",
" 0. 0.13195767 0.21379641 0. 0. 0.\n",
" 0.1522848 0. ]\n",
" [0.50541977 0.75270988 0.75270988 0.75270988 0.74486836 1.\n",
" 0.57269296 0.75270988 0.45255199 0.45057401 0.29248764 0.22965476\n",
" 0.22915762 0.58255698 0.45155975 0.45255199 0.24864137 0.\n",
" 0. 0.13334684 0.21604713 0.12304112 0. 0.\n",
" 0.15388796 0. ]\n",
" [0.57269296 0.57269296 0.57269296 0.57269296 0.5667268 0.57269296\n",
" 1. 0.57269296 0.5127883 0.51054704 0.68249834 0.26022264\n",
" 0.25965932 0.66009742 0.51166399 0.5127883 0.28173643 0.\n",
" 0. 0.15109579 0.24480379 0. 0. 0.\n",
" 0.174371 0. ]\n",
" [0.50541977 0.75270988 0.75270988 0.75270988 0.74486836 0.75270988\n",
" 0.57269296 1. 0.45255199 0.45057401 0.29248764 0.22965476\n",
" 0.22915762 0.58255698 0.45155975 0.45255199 0.24864137 0.\n",
" 0. 0.13334684 0.21604713 0. 0. 0.\n",
" 0.15388796 0. ]\n",
" [0.67397514 0.45255199 0.45255199 0.45255199 0.44783743 0.45255199\n",
" 0.5127883 0.45255199 1. 0.4034432 0.26189293 0.20563248\n",
" 0.20518734 0.52162053 0.79998025 0.40521428 0.22263305 0.09913733\n",
" 0. 0.11939853 0.38274766 0. 0. 0.\n",
" 0.13779102 0. ]\n",
" [0.45057401 0.45057401 0.45057401 0.45057401 0.44588006 0.45057401\n",
" 0.51054704 0.45057401 0.4034432 1. 0.51054704 0.40087014\n",
" 0.40000235 0.51934066 0.40255863 0.4034432 0.22165999 0.\n",
" 0. 0.23528858 0.19260272 0. 0. 0.\n",
" 0.13718877 0. ]\n",
" [0.29248764 0.29248764 0.29248764 0.29248764 0.28944058 0.29248764\n",
" 0.68249834 0.29248764 0.26189293 0.51054704 1. 0.50951786\n",
" 0.50841487 0.33712713 0.26131872 0.26189293 0. 0.\n",
" 0. 0.15109579 0.125027 0. 0. 0.\n",
" 0.174371 0. ]\n",
" [0.22965476 0.22965476 0.22965476 0.22965476 0.22726228 0.22965476\n",
" 0.26022264 0.22965476 0.20563248 0.40087014 0.50951786 1.\n",
" 0.80251794 0.2647047 0.20518162 0.20563248 0. 0.\n",
" 0. 0.11863704 0.09816841 0. 0. 0.10402935\n",
" 0.13691222 0. ]\n",
" [0.22915762 0.22915762 0.22915762 0.22915762 0.22677031 0.22915762\n",
" 0.25965932 0.22915762 0.20518734 0.40000235 0.50841487 0.80251794\n",
" 1. 0.26413167 0.20473745 0.20518734 0. 0.\n",
" 0. 0.11838022 0.0979559 0. 0. 0.10380416\n",
" 0.13661584 0. ]\n",
" [0.58255698 0.58255698 0.58255698 0.58255698 0.57648806 0.58255698\n",
" 0.66009742 0.58255698 0.52162053 0.51934066 0.33712713 0.2647047\n",
" 0.26413167 1. 0.52047685 0.52162053 0.28658904 0.\n",
" 0. 0.15369825 0.24902027 0. 0. 0.\n",
" 0.17737436 0. ]\n",
" [0.67249742 0.45155975 0.45155975 0.45155975 0.44685553 0.45155975\n",
" 0.51166399 0.45155975 0.79998025 0.40255863 0.26131872 0.20518162\n",
" 0.20473745 0.52047685 1. 0.40432583 0.22214492 0.09891997\n",
" 0. 0.11913675 0.38190847 0. 0. 0.\n",
" 0.1374889 0. ]\n",
" [0.45255199 0.45255199 0.45255199 0.45255199 0.44783743 0.45255199\n",
" 0.5127883 0.45255199 0.40521428 0.4034432 0.26189293 0.20563248\n",
" 0.20518734 0.52162053 0.40432583 1. 0.44526611 0.\n",
" 0. 0.11939853 0.19344823 0. 0. 0.\n",
" 0.13779102 0. ]\n",
" [0.24864137 0.24864137 0.24864137 0.24864137 0.24605109 0.24864137\n",
" 0.28173643 0.24864137 0.22263305 0.22165999 0. 0.\n",
" 0. 0.28658904 0.22214492 0.44526611 1. 0.\n",
" 0.11656128 0. 0.10628444 0. 0. 0.11039584\n",
" 0. 0.25641247]\n",
" [0. 0. 0. 0. 0.11894297 0.\n",
" 0. 0. 0.09913733 0. 0. 0.\n",
" 0. 0. 0.09891997 0. 0. 1.\n",
" 0.15571237 0.17530336 0.1893116 0.05508894 0.22533871 0.1015106\n",
" 0.19801995 0.17497708]\n",
" [0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0.11656128 0.15571237\n",
" 1. 0.23954797 0.19821839 0.17304236 0.29021277 0.20588608\n",
" 0.50686884 0.23910212]\n",
" [0.13334684 0.13334684 0.13334684 0.13334684 0.13195767 0.13334684\n",
" 0.15109579 0.13334684 0.11939853 0.23528858 0.15109579 0.11863704\n",
" 0.11838022 0.15369825 0.11913675 0.11939853 0. 0.17530336\n",
" 0.23954797 1. 0.27542833 0.19068493 0.38791647 0.23056157\n",
" 0.30797226 0.13601865]\n",
" [0.42746079 0.21604713 0.21604713 0.21604713 0.21379641 0.21604713\n",
" 0.24480379 0.21604713 0.38274766 0.19260272 0.125027 0.09816841\n",
" 0.0979559 0.24902027 0.38190847 0.19344823 0.10628444 0.1893116\n",
" 0.19821839 0.27542833 1. 0.21038101 0.31755075 0.04693344\n",
" 0.31785607 0.10901062]\n",
" [0. 0. 0. 0. 0. 0.12304112\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.05508894\n",
" 0.17304236 0.19068493 0.21038101 1. 0.24641611 0.0546298\n",
" 0.14670571 0. ]\n",
" [0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.22533871\n",
" 0.29021277 0.38791647 0.31755075 0.24641611 1. 0.16848816\n",
" 0.29525166 0.13182946]\n",
" [0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.10402935\n",
" 0.10380416 0. 0. 0. 0.11039584 0.1015106\n",
" 0.20588608 0.23056157 0.04693344 0.0546298 0.16848816 1.\n",
" 0.13091301 0.23013244]\n",
" [0.15388796 0.15388796 0.15388796 0.15388796 0.1522848 0.15388796\n",
" 0.174371 0.15388796 0.13779102 0.13718877 0.174371 0.13691222\n",
" 0.13661584 0.17737436 0.1374889 0.13779102 0. 0.19801995\n",
" 0.50686884 0.30797226 0.31785607 0.14670571 0.29525166 0.13091301\n",
" 1. 0.15203349]\n",
" [0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0.25641247 0.17497708\n",
" 0.23910212 0.13601865 0.10901062 0. 0.13182946 0.23013244\n",
" 0.15203349 1. ]] \n",
"\n",
"done in 0.011s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "2E5JiTqVe5IH",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 105
},
"outputId": "2a3458ae-e88c-4a98-cd15-b8bafcbb6436"
},
"source": [
"from sklearn.cluster import KMeans\n",
"\n",
"print(\"Loading Kmeans...\")\n",
"t0 = time()\n",
"kmeans = KMeans(n_clusters=2) # You want cluster the passenger records into 2: Survived or Not survived\n",
"print(kmeans.fit(XVSM))\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Kmeans...\n",
"KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n",
" n_clusters=2, n_init=10, n_jobs=None, precompute_distances='auto',\n",
" random_state=None, tol=0.0001, verbose=0)\n",
"done in 0.019s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "9UnT1aCOe7Z-",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 70
},
"outputId": "b650eaa4-39a0-4251-93cc-f42fbc738fe3"
},
"source": [
"print(\"Loading Data Correction...\")\n",
"t0 = time()\n",
"\n",
"correct = 0\n",
"for i in range(len(XVSM)):\n",
" predict_me = np.array(XVSM[i].astype(float))\n",
" predict_me = predict_me.reshape(-1, len(predict_me))\n",
" prediction = kmeans.predict(predict_me)\n",
" if prediction[0] == yVSM[i].all():\n",
" correct += 1\n",
"\n",
"print(correct/len(XVSM))\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Data Correction...\n",
"0.3076923076923077\n",
"done in 0.012s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "lmroRTCxe-Nd",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "c8a6fb24-c107-4d4f-fa02-4b9f26101253"
},
"source": [
"from sklearn.preprocessing import MinMaxScaler\n",
"\n",
"print(\"Loading X Scaled...\")\n",
"t0 = time()\n",
"\n",
"scaler = MinMaxScaler()\n",
"XVSM_scaled = scaler.fit_transform(yVSM)\n",
"\n",
"print(XVSM_scaled)\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading X Scaled...\n",
"[[1. 0.50541977 0.50541977 0.50541977 0.50015444 0.50541977\n",
" 0.57269296 0.50541977 0.67397514 0.45057401 0.29248764 0.22965476\n",
" 0.22915762 0.58255698 0.67249742 0.45255199 0.24864137 0.\n",
" 0. 0.13334684 0.39926629 0. 0. 0.\n",
" 0.15388796 0. ]\n",
" [0.50541977 1. 0.75270988 0.75270988 0.74486836 0.75270988\n",
" 0.57269296 0.75270988 0.45255199 0.45057401 0.29248764 0.22965476\n",
" 0.22915762 0.58255698 0.45155975 0.45255199 0.24864137 0.\n",
" 0. 0.13334684 0.17744164 0. 0. 0.\n",
" 0.15388796 0. ]\n",
" [0.50541977 0.75270988 1. 0.75270988 0.74486836 0.75270988\n",
" 0.57269296 0.75270988 0.45255199 0.45057401 0.29248764 0.22965476\n",
" 0.22915762 0.58255698 0.45155975 0.45255199 0.24864137 0.\n",
" 0. 0.13334684 0.17744164 0. 0. 0.\n",
" 0.15388796 0. ]\n",
" [0.50541977 0.75270988 0.75270988 1. 0.74486836 0.75270988\n",
" 0.57269296 0.75270988 0.45255199 0.45057401 0.29248764 0.22965476\n",
" 0.22915762 0.58255698 0.45155975 0.45255199 0.24864137 0.\n",
" 0. 0.13334684 0.17744164 0. 0. 0.\n",
" 0.15388796 0. ]\n",
" [0.50015444 0.74486836 0.74486836 0.74486836 1. 0.74486836\n",
" 0.5667268 0.74486836 0.44783743 0.44588006 0.28944058 0.22726228\n",
" 0.22677031 0.57648806 0.44685553 0.44783743 0.24605109 0.11894297\n",
" 0. 0.13195767 0.17508009 0. 0. 0.\n",
" 0.1522848 0. ]\n",
" [0.50541977 0.75270988 0.75270988 0.75270988 0.74486836 1.\n",
" 0.57269296 0.75270988 0.45255199 0.45057401 0.29248764 0.22965476\n",
" 0.22915762 0.58255698 0.45155975 0.45255199 0.24864137 0.\n",
" 0. 0.13334684 0.17744164 0.12304112 0. 0.\n",
" 0.15388796 0. ]\n",
" [0.57269296 0.57269296 0.57269296 0.57269296 0.5667268 0.57269296\n",
" 1. 0.57269296 0.5127883 0.51054704 0.68249834 0.26022264\n",
" 0.25965932 0.66009742 0.51166399 0.5127883 0.28173643 0.\n",
" 0. 0.15109579 0.20761441 0. 0. 0.\n",
" 0.174371 0. ]\n",
" [0.50541977 0.75270988 0.75270988 0.75270988 0.74486836 0.75270988\n",
" 0.57269296 1. 0.45255199 0.45057401 0.29248764 0.22965476\n",
" 0.22915762 0.58255698 0.45155975 0.45255199 0.24864137 0.\n",
" 0. 0.13334684 0.17744164 0. 0. 0.\n",
" 0.15388796 0. ]\n",
" [0.67397514 0.45255199 0.45255199 0.45255199 0.44783743 0.45255199\n",
" 0.5127883 0.45255199 1. 0.4034432 0.26189293 0.20563248\n",
" 0.20518734 0.52162053 0.79998025 0.40521428 0.22263305 0.09913733\n",
" 0. 0.11939853 0.35235128 0. 0. 0.\n",
" 0.13779102 0. ]\n",
" [0.45057401 0.45057401 0.45057401 0.45057401 0.44588006 0.45057401\n",
" 0.51054704 0.45057401 0.4034432 1. 0.51054704 0.40087014\n",
" 0.40000235 0.51934066 0.40255863 0.4034432 0.22165999 0.\n",
" 0. 0.23528858 0.15284272 0. 0. 0.\n",
" 0.13718877 0. ]\n",
" [0.29248764 0.29248764 0.29248764 0.29248764 0.28944058 0.29248764\n",
" 0.68249834 0.29248764 0.26189293 0.51054704 1. 0.50951786\n",
" 0.50841487 0.33712713 0.26131872 0.26189293 0. 0.\n",
" 0. 0.15109579 0.08193925 0. 0. 0.\n",
" 0.174371 0. ]\n",
" [0.22965476 0.22965476 0.22965476 0.22965476 0.22726228 0.22965476\n",
" 0.26022264 0.22965476 0.20563248 0.40087014 0.50951786 1.\n",
" 0.80251794 0.2647047 0.20518162 0.20563248 0. 0.\n",
" 0. 0.11863704 0.05375802 0. 0. 0.10402935\n",
" 0.13691222 0. ]\n",
" [0.22915762 0.22915762 0.22915762 0.22915762 0.22677031 0.22915762\n",
" 0.25965932 0.22915762 0.20518734 0.40000235 0.50841487 0.80251794\n",
" 1. 0.26413167 0.20473745 0.20518734 0. 0.\n",
" 0. 0.11838022 0.05353504 0. 0. 0.10380416\n",
" 0.13661584 0. ]\n",
" [0.58255698 0.58255698 0.58255698 0.58255698 0.57648806 0.58255698\n",
" 0.66009742 0.58255698 0.52162053 0.51934066 0.33712713 0.2647047\n",
" 0.26413167 1. 0.52047685 0.52162053 0.28658904 0.\n",
" 0. 0.15369825 0.21203853 0. 0. 0.\n",
" 0.17737436 0. ]\n",
" [0.67249742 0.45155975 0.45155975 0.45155975 0.44685553 0.45155975\n",
" 0.51166399 0.45155975 0.79998025 0.40255863 0.26131872 0.20518162\n",
" 0.20473745 0.52047685 1. 0.40432583 0.22214492 0.09891997\n",
" 0. 0.11913675 0.35147076 0. 0. 0.\n",
" 0.1374889 0. ]\n",
" [0.45255199 0.45255199 0.45255199 0.45255199 0.44783743 0.45255199\n",
" 0.5127883 0.45255199 0.40521428 0.4034432 0.26189293 0.20563248\n",
" 0.20518734 0.52162053 0.40432583 1. 0.44526611 0.\n",
" 0. 0.11939853 0.15372987 0. 0. 0.\n",
" 0.13779102 0. ]\n",
" [0.24864137 0.24864137 0.24864137 0.24864137 0.24605109 0.24864137\n",
" 0.28173643 0.24864137 0.22263305 0.22165999 0. 0.\n",
" 0. 0.28658904 0.22214492 0.44526611 1. 0.\n",
" 0.11656128 0. 0.06227372 0. 0. 0.11039584\n",
" 0. 0.25641247]\n",
" [0. 0. 0. 0. 0.11894297 0.\n",
" 0. 0. 0.09913733 0. 0. 0.\n",
" 0. 0. 0.09891997 0. 0. 1.\n",
" 0.15571237 0.17530336 0.14938953 0.05508894 0.22533871 0.1015106\n",
" 0.19801995 0.17497708]\n",
" [0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0.11656128 0.15571237\n",
" 1. 0.23954797 0.15873493 0.17304236 0.29021277 0.20588608\n",
" 0.50686884 0.23910212]\n",
" [0.13334684 0.13334684 0.13334684 0.13334684 0.13195767 0.13334684\n",
" 0.15109579 0.13334684 0.11939853 0.23528858 0.15109579 0.11863704\n",
" 0.11838022 0.15369825 0.11913675 0.11939853 0. 0.17530336\n",
" 0.23954797 1. 0.23974705 0.19068493 0.38791647 0.23056157\n",
" 0.30797226 0.13601865]\n",
" [0.42746079 0.21604713 0.21604713 0.21604713 0.21379641 0.21604713\n",
" 0.24480379 0.21604713 0.38274766 0.19260272 0.125027 0.09816841\n",
" 0.0979559 0.24902027 0.38190847 0.19344823 0.10628444 0.1893116\n",
" 0.19821839 0.27542833 1. 0.21038101 0.31755075 0.04693344\n",
" 0.31785607 0.10901062]\n",
" [0. 0. 0. 0. 0. 0.12304112\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.05508894\n",
" 0.17304236 0.19068493 0.17149649 1. 0.24641611 0.0546298\n",
" 0.14670571 0. ]\n",
" [0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.22533871\n",
" 0.29021277 0.38791647 0.28394377 0.24641611 1. 0.16848816\n",
" 0.29525166 0.13182946]\n",
" [0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.10402935\n",
" 0.10380416 0. 0. 0. 0.11039584 0.1015106\n",
" 0.20588608 0.23056157 0. 0.0546298 0.16848816 1.\n",
" 0.13091301 0.23013244]\n",
" [0.15388796 0.15388796 0.15388796 0.15388796 0.1522848 0.15388796\n",
" 0.174371 0.15388796 0.13779102 0.13718877 0.174371 0.13691222\n",
" 0.13661584 0.17737436 0.1374889 0.13779102 0. 0.19801995\n",
" 0.50686884 0.30797226 0.28426413 0.14670571 0.29525166 0.13091301\n",
" 1. 0.15203349]\n",
" [0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0.25641247 0.17497708\n",
" 0.23910212 0.13601865 0.06513415 0. 0.13182946 0.23013244\n",
" 0.15203349 1. ]]\n",
"done in 0.012s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "EJ6V0_XAfdrZ"
},
"source": [
"### Visual Kmeans Clustering"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ljIbfoVCfBkp",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 370
},
"outputId": "3b26897e-1c62-485f-d050-b4ef7ec0d815"
},
"source": [
"import umap\n",
"\n",
"print(\"Loading Topic Visualisation...\")\n",
"t0 = time()\n",
"\n",
"kmeans_VSM = XVSM_scaled\n",
"embedding = umap.UMAP(n_neighbors=3, min_dist=0.1, random_state=4).fit_transform(kmeans_VSM)\n",
"plt.figure(figsize=(7,5))\n",
"plt.scatter(embedding[:, 0], embedding[:, 1], c = None, s = 50 ,edgecolors='Blue')\n",
"plt.xlabel('widht')\n",
"plt.ylabel('height')\n",
"plt.show()\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Topic Visualisation...\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAbgAAAE9CAYAAAB5t3fYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAb6klEQVR4nO3df3Dc9X3n8ddbPy1bwtZ6ZWNLNjbIQEmTEqzQa2ooIaQQTAzkkorM5YakZJx0kiZcSClMmKOXyU0z117aXNKGOByFuSRFNIXDFxMCpXDgSS8ggyH8ClYswJYBSaxsC1vWz/f9sStnJa9Xa3l3v7uffT5mNNr97mr3/WVn/eLz+X5+mLsLAIDQVEVdAAAAhUDAAQCCRMABAIJEwAEAgkTAAQCCRMABAIJUE3UBJyIej/uaNWuiLgMAUEJ27Ngx6O4ts4+XVcCtWbNG3d3dUZcBACghZvZapuN0UQIAgkTAAQCCRMABAIJEwAEAgkTAAQCCRMABAIJUVtMEAADlaXhY6uqSdvW41rWbOjulpqbCvicBBwAoqO3bpY2bJlXXmtBEc0I122K64caYtm2t1oYNhXtfuigBAAUzPJwMtwUf6lbDJTtUtWhUY40H5Ge8pss/Mql33incexNwAICC6eqS6loTstpJ9W35gEZ2L1NV7aQmDi7UocOur3+9cO9NFyUAoGB29bjGm4aUuG+94lfsVMPawaOPjfTG9bff7tAtt1SrsTH/700LDgCQd8PD0u23S88+K43tXqH6tqEZ4SZJDWsHtWBVQl1dhamBFhwAIK/SB5WMNw1pZPAMLTlzX+YnL0uopycuyfJeBwEHAMibGYNKUi22qakqHdnTrMUZnl87FFN7e/7DTaKLEgCQR9ODStK7I5f8Xo/G3lqskd74jOeO9MY11hdTZ2dhaqEFBwDIm109ronmxIxjVfWTavnoDvX/+H1qPG2/qpYnVDsU01hfci5cIQaYSAQcACCP1rWbarbFjjm+oG1IsdP364rzW7RiRVztqdVMChVuEgEHAMijzk7phhtjquqNHzMlYPzNmL7zHamxsTDX3GYj4AAAedPUJG3bWq2Nmzo01ZrQeHNxuiMzIeAAAHm1YYPU93q1urpa1NNTnO7ITAg4AEDeNTZK110nZZrfVqydBQg4AEDRFHNnAQIOAFAQs1tql19+7CRwSarqjWvjpg71vZ7fa3QFn+htZneYWb+ZPZ92LGZmD5vZrtTv5kLXAQAonu3bpbbTJnXztwa05eev6OZvDaj9rEnZkoMZ16Ssa83/mpTFWMnkTkmXzTp2k6RH3H2dpEdS9wEAAUhfrmvRxie1+P09WrTxSTVd3q2De5s0MVx3zN+MNyfU0+N5raPgAefuj0tKzDp8paS7UrfvknRVoesAABRHpuW6pGRLrb4toX23X6Qje2d23BViTcqo1qJc7u5vpG6/KWn58Z5oZpvNrNvMugcGBopTHQBg3jIt1zWtvm1IDWf0a+C+9Zoaq5ZUuDUpIx9k4u5uZsdtl7r7FklbJKmjoyO/7VcAQN4db7kuSRp7a7EaTu+Xj9Zq8CfnalF9dcEmgUfVgnvLzFZIUup3f0R1AADyrLNTGuuLZdw9YHRvsxb91j7VtQ7pd+LL9I3rW9T3ev6nCEjRteC2SrpW0jdSv++PqA4AQJ5NL9f1hx/u0PCyt1XXOqSxtxZrdG+zWq7eoaq6SdUOxfSZ66tSk8ELo+ABZ2b/KOkiSXEz2yvpViWD7R4zu07Sa5L+qNB1AACKZ8MG6devVOv09rjG68fVcHq/4ht3qqpusuD7wE0reMC5+yeO89AHC/3eAIDorFghPfyzKm3cdKqqqut08FB9URdejnyQCQAgLLNXMHn5hWo98EDxF14m4AAA85Jp0eRnnz3+WpPXXVecfeCmEXAAgBOWadHkL/9ZTJOTUuOHi7PW5FwIOADACUlfimt2kA3+7/Va2jo04/kNawc11ZpQV1dLQUdNzhbVPDgAQJnKthRX3aq3deillcf8TSHWmpwLAQcAOCFZl+JauV8TBxYec7wQa03OhS5KAMAJWdduqtq6VMPPjmpi/0LVLDmsRWfvU1X9pEb3xlS7dHjG84s17202Ag4AcEJWr5aGdjVrwfiE6lcc0MjuZdr/+Fk65Xd7VP12TBqK6dCRJo03J4o67202Ag4AkLPhYenj10yq5aMzB5iM9MY1cG+HHtxWrfe/X+rqKv68t9kIOABAzrINMImdldCePS1qbFRqtGRxr7nNRsABQA4yTWpuaoq6quLLNsBkIpZQT09cUQfbNAIOAOaQaVLz9OochdjmpZRl2+stipGS2Zh7+ewh2tHR4d3d3VGXAaCCDA9LbacdO6l5pDeuIw8Xf3WOqJXifw8z2+HuHbOP04IDgCy6uqTqUzNfcxo/tfirc0Rteq+3jZs6NNWaiHykZDYEHABk8cKLLo9nvuY0FU/oxZdK55pTsWzYIPW9Xl0SIyWzIeAAIIvE26bRvuaMj431NevtwcoKt2mlMlIyG5bqAoAsmmOuI33NGumNzzg+0hvXkb5mLV1aPuMYKg0tOADI4rffZTpl5Tsa/Mm5qm8bUt3yAxp7a7FG9zZrwbJ3NDjYrOHhypwyUOpowQFAFp2dku8/RbFLn1PD6f3yiWo1nN6v2KXP6Uh/o7Y9NaC20ya1fXvUlWI2WnAAkMVvRg2ep9qVCVksoZGeZRrd16xlH3tKC9qGNBLRhp7IjhYcAMxhetTgxvUtGnu5TQ3t/Wr97KNa0Jbc2LNh7aDqWhPq6oq4UMxAwAFADhobpVNXuOrP3qum39mjqrrJGY9HsaEnsiPgACBH69pNNUPlsUwVCDgAyFlnpzTWF8s4ZSCKDT2RHYNMACBH5bRMFQg4ADgh5bJMFSIOODP7T5I+I8kl/VLSp939SJQ1AcBcsi1Tlb5v3Kq25ON79v5mDzmJfeWKJbLtcsysVdJ2See4+4iZ3SPpAXe/83h/w3Y5AEpZ+r5xR6pGdPjllaprHdKCtoRqhmI6/FpMZlLD6tS+ckMxje6N6U82V0tG4M1XqW6XUyOpwczGJS2UtC/iegBgXoaHk+G24EPdql85pMSWDyh+1Y6j2+xMjVYrcdvFim965uixI3ubdainQ//jnxLJEKzgjVQLIbKAc/c+M/trSa9LGpH0kLs/FFU9AHAyurqkutbkvnHDz65SfdvQjD3kDr28UvWrEzMCb+C+9TMCT5KqWBUlbyKbJmBmzZKulLRW0kpJi8zskxmet9nMus2se2BgoNhlzml4WLr9dunPb3LdfnvyPoDKs6vHNdGc3DduYv9C1S8/MOPx2ccOvbzymBCUWBUln6LsorxEUq+7D0iSmd0r6f2SfpD+JHffImmLlLwGV+wiZ0u/gCw3fXfLpOrbUv3pdC8AFWtdu6lmW3ISeM2SwxrZvWzG47OPZQrBaclVUSpvI9V8izLgXpf078xsoZJdlB+UVNIjSNIvII83DWl4x+kz+tgluheAStXZKd1wY0xVvXEtOnuf9j9+lkZ640f/fVh09j7tf+zso8cyheA0VkXJjyivwf3CzH4s6WlJE5KeUaqlVorSLyAf7WNfM5ixe2GqNaGurpbUMGIAlSB9EvjEqUOqib2jgXvXq75tSPWrkhPCF9RW69CDHZpandBE05BGX43PCEGJVVHyKdJRlO5+q6Rbo6whV11dUu3K31wgpnsBwGwbNkj/dHe1rvr3zapfmVDD+bs18eoKjTzVrs99qVq33JJ83vQkcb/Q9Pff69BUG6uiFELU0wTKxqOPuSZjiaP36V4AMNvwsPTxaybVdHn3b1plF+zSSG9c393SoVtuqT5mkvgtt7AqSqEQcDkYHpbuvc9lKxcfPZapj11Kdi+M7qV7AahE6VMF0mW7dJFtVRScHAIuB11d0sLTEtr/WvPRQKuqn1TL1Ts08M8dqjv1gOpXJTS6b4nG9izVgnpp504xkhKoMOlTBWbj0kXxEXA52NXjmoq/rZbzXtHAfcmLxnXLD2jsrcWamjJNHFyguolqLVz3llo2PaPRvmZGUgIVKH2qwGxcuig+9oPLwbp2U9XgUo2/3ahF79orq53Q1GiNGk7v14KV+3XK+b1qvvBXR3f5ZaImUJnYL6600ILLwerV0tCuZi0Yn1D9igOaOLBIh55v0Sm/26PRN5ao5eodx/wN3RFA5WG/uNJCwM1helRUy0e7jxlMMnBvh5paD0ouDT+7ShP7F6pmyWEtOnsf3RFAhWK/uNJBwM0h26io5jMTeufXS/XObRerfnVC9csPaGT3Mu1/7GwtqK2mOwKoUIyMLA0E3ByyjYqaWDwkt9gxq4GP9MZ16MFjtiYCABQRg0zmsK7dVDOUeVTU1GsrtPC0zK27htUMMgGAKBFwc8g6KiqxULYs25yXyDc/AICKRRflHLKNirr+i9W64wHmvABAKSLgcnC8UVHu0m3fT26PwWrgAFBaCLgcHW9UFHNeAKA0EXAniTkvAFCaCLg8YM4LAJQeRlECAIJEwAEAgkTAAQCCRMABAIJEwAEAgkTAAQCCRMABAIJEwAEAgkTAAQCCRMABAIJEwAEAghRpwJnZEjP7sZm9bGYvmdnvRVkPACAcUS+2/C1JD7r7x8ysTtLCiOsBAAQisoAzs8WSLpT0KUly9zFJY1HVAwAIS5RdlGslDUj6BzN7xsxuN7NFEdYDAAhIlAFXI+k8Sd919/dKOiTpptlPMrPNZtZtZt0DAwPFrhEAUKaiDLi9kva6+y9S93+sZODN4O5b3L3D3TtaWlqKWiAAoHxFFnDu/qakPWZ2VurQByW9GFU9AICwRD2K8k8l/TA1gnK3pE9HXA8AIBCRBpy775TUEWUNAIAwsZIJACBIBBwAIEgEHAAgSAQcACBIBBwAIEgEHAAgSAQcACBIBBwAIEgEHAAgSAQcACBIBBwAIEgEHAAgSAQcACBIBBwAIEgEHAAgSAQcACBIBBwAIEg5BZyZ/a9cjgEAUCpybcG9K/2OmVVLWp//cgAAyI+sAWdmN5vZsKT3mNnB1M+wpH5J9xelQgAA5iFrwLn7X7p7k6S/cvdTUj9N7r7U3W8uUo0AAJywmlye5O43m1mrpNPS/8bdHy9UYQAAnIycAs7MviHpGkkvSppMHXZJBBwAoCTlFHCSrpZ0lruPFrIYAADyJddRlLsl1RayEAAA8ilrC87Mvq1kV+RhSTvN7BFJR1tx7v7FwpYHAMD8zNVF2Z36vUPS1kIUkJpT1y2pz92vKMR7AAAqT9aAc/e7ilDDlyS9JOmUIrwXAKBC5DqK8pdKdlWmO6Bky+vr7v72fN7czNokbZT0XyV9eT6vAQBAJrmOovypktMDfpS6f42khZLelHSnpI/M8/3/VtKNkprm+fcAAGSUa8Bd4u7npd3/pZk97e7nmdkn5/PGZnaFpH5332FmF2V53mZJmyVp9erV83krAEAFynWaQLWZnT99x8zeJ6k6dXdinu/9+5I2mdmrku6WdLGZ/WD2k9x9i7t3uHtHS0vLPN8KAFBpcm3BfUbSHWbWKMkkHZT0GTNbJOkv5/PGqbUsb5akVAvuK+4+r9YgAACz5boW5VOS3m1mi1P3D6Q9fE8hCgMA4GTMNdH7k+7+AzP78qzjkiR3/2Y+inD3xyQ9lo/XAgBAmrsFtyj1m1GOAICyMtdE7++lfv+X4pQDAEB+5DSK0szONLNHzOz51P33mNkthS0NAID5y3WawPeVHPE4Lknu/pySk70BAChJuQbcQnd/ctax+c5/AwCg4HINuEEzO0Op9SjN7GOS3ihYVQAAnKRcJ3p/XtIWSWebWZ+kXkn/oWBVAQBwknINuD5J/yDpUUkxJVcyuVbS1wpUFwAAJyXXgLtf0n5JT0vaV7hyAADIj1wDrs3dLytoJQAA5FGug0x+bmbvLmglAADk0VxrUU7v5F0j6dNmtlvSqJI7Cri7v6fwJQIAcOLm6qK8oihVAACQZ3OtRflasQoBACCfcr0GBwBAWSHgAABBIuAAAEEi4AAAQSLgAABBIuAAAEEi4AAAQSLgAABBIuAAAEEi4AAAQSLgAABBIuAAAEEi4AAAQYos4MxslZk9amYvmtkLZvalqGoBAIRnrv3gCmlC0g3u/rSZNUnaYWYPu/uLEdYEAAhEZC04d3/D3Z9O3R6W9JKk1qjqAQCEpSSuwZnZGknvlfSLaCsBAIQi8oAzs0ZJ/yzpenc/mOHxzWbWbWbdAwMDxS8QAFCWIg04M6tVMtx+6O73ZnqOu29x9w5372hpaSlugQCAshXlKEqT9D8lveTu34yqDgBAmKJswf2+pP8o6WIz25n6uTzCegAAAYlsmoC7b5dkUb0/ACBskQ8yAQCgEAg4AECQCDgAQJAIOABAkAg4AECQCDgAQJAIOABAkAg4AECQCDgAQJAIOABAkAg4AECQCDgAQJAIOABAkAg4AECQCDgAQJAIOABAkAg4AECQCDgAQJAIOABAkAg4AECQCDgAQJAIOABAkAg4AECQCDgAQJAIOABAkAg4AECQCDgAQJAiDTgzu8zMfmVmPWZ2U5S1AADCElnAmVm1pL+T9GFJ50j6hJmdE1U9AICwRNmCO19Sj7vvdvcxSXdLujLCegAAAYky4Fol7Um7vzd1bAYz22xm3WbWPTAwULTiAADlreQHmbj7FnfvcPeOlpaWqMsBAJSJKAOuT9KqtPttqWMAAJy0KAPuKUnrzGytmdVJukbS1gjrAQAEpCaqN3b3CTP7gqSfSaqWdIe7vxBVPQCAsEQWcJLk7g9IeiDKGgAAYSr5QSYAAMwHAQcACBIBBwAIEgEHAAgSAQcACBIBBwAIEgEHAAgSAQcACBIBBwAIEgEHAAgSAQcACBIBBwAIEgEHAAgSAQcACBIBBwAIEgEHAAgSAQcACBIBBwAIEgEHAAgSAQcACBIBBwAIEgEHAAgSAQcACBIBBwAIEgEHAAgSAQcACFIkAWdmf2VmL5vZc2Z2n5ktiaIOAEC4omrBPSzpt939PZJekXRzRHUAAAIVScC5+0PuPpG6+/8ktUVRBwAgXKVwDe6PJf006iIAAGGpKdQLm9m/SDo1w0Nfdff7U8/5qqQJST/M8jqbJW2WpNWrVxegUgBAiAoWcO5+SbbHzexTkq6Q9EF39yyvs0XSFknq6Og47vMAAEhXsIDLxswuk3SjpD9w98NR1AAACFtU1+C+I6lJ0sNmttPMbouoDgBAoCJpwbl7exTvCwCoHKUwihIAgLyLpAVXKoaHpa4uaVePa127qbNTamqKuioAQD5UbMBt3y5t3DSputaEJpoTqtkW0w03xrRta7U2bIi6OgDAyarIgBseTobbgg91q2Ht4NHjVb1xbdzUob7Xq9XYGGGBAICTVpHX4Lq6pLrWxIxwk6SGtYOqa02oqyuiwgAAeVORAffCi66J5kTGx8abE+rpYT45AJS7iuui3L5duu17Lju1OePjtUMxtbdbkasCAORbRbXgpq+9NV66Q6NvLtZIb3zG4yO9cY31xdTZGVGBAIC8qagW3F13SVMNhzX2RrMWvXuPBv/PuapfNaS65Qc01tcs9S/VQz9lgAkAhKBiWnDbt0tf+fNJTTQcVlXtpCaGGuWSapvfkU9US9VT+pPNVUwRAIBAVEQLbrprcvEVM6cFjPTGNfiTc9X62Uc18vB6nXMO194AIBQVEXDZpgXUtw1p/xNnyrj2BgBBqYiA29Vz/GkBdcsPaOSpdj3yMNfeACAkFXENbl27qWYolvnB/pi++dcszwUAoamIgOvslMb6YhmnBXh/TNdeG1FhAICCqYguyqYmadvWam3c1KGp1oTGmxOqHYpprC+5uDJdkwAQnooIOEnasEHqe71aXV0t6umJqz21PQ7hBgBhqpiAk5Jhdt11ksR0AAAIXUVcgwMAVB4CDgAQJAIOABAkAg4AECQCDgAQJAIOABAkAg4AECRz96hryJmZDUh6rUhvF5c0OOezygvnVB44p/LAOZWO09y9ZfbBsgq4YjKzbnfviLqOfOKcygPnVB44p9JHFyUAIEgEHAAgSATc8W2JuoAC4JzKA+dUHjinEsc1OABAkGjBAQCCRMClMbOPm9kLZjZlZh1px9eY2YiZ7Uz93BZlnSfieOeUeuxmM+sxs1+Z2aVR1XiyzOwvzKwv7fO5POqa5sPMLkt9Fj1mdlPU9eSLmb1qZr9MfTbdUdczH2Z2h5n1m9nzacdiZvawme1K/W6OssYTdZxzCuK7NI2Am+l5SR+V9HiGx37t7uemfj5X5LpORsZzMrNzJF0j6V2SLpP092ZWXfzy8uZv0j6fB6Iu5kSl/tv/naQPSzpH0idSn1EoPpD6bMp1CPqdSn5P0t0k6RF3XyfpkdT9cnKnjj0nqcy/S+kIuDTu/pK7/yrqOvIpyzldKeludx91915JPZLOL251SHO+pB533+3uY5LuVvIzQglw98clJWYdvlLSXanbd0m6qqhFnaTjnFNQCLjcrTWzZ8zs/5rZBVEXkwetkvak3d+bOlauvmBmz6W6XcqqqygltM8jnUt6yMx2mNnmqIvJo+Xu/kbq9puSlkdZTB6V+3fpqIoLODP7FzN7PsNPtv9bfkPSand/r6QvS/qRmZ1SnIrnNs9zKitznON3JZ0h6VwlP6v/HmmxmG2Du5+nZPfr583swqgLyjdPDkcPYUh6UN+lmqgLKDZ3v2QefzMqaTR1e4eZ/VrSmZJK4oL5fM5JUp+kVWn321LHSlKu52hm35f0kwKXUwhl9XmcCHfvS/3uN7P7lOyOzXSdu9y8ZWYr3P0NM1shqT/qgk6Wu781fbuMv0tHVVwLbj7MrGV6AIaZnS5pnaTd0VZ10rZKusbM6s1srZLn9GTENc1L6h+XaVcrObCm3DwlaZ2ZrTWzOiUHAG2NuKaTZmaLzKxp+rakP1R5fj6ZbJV0ber2tZLuj7CWvAjku3RUxbXgsjGzqyV9W1KLpG1mttPdL5V0oaSvmdm4pClJn3P3srg4e7xzcvcXzOweSS9KmpD0eXefjLLWk/DfzOxcJbuIXpX02WjLOXHuPmFmX5D0M0nVku5w9xciLisflku6z8yk5L83P3L3B6Mt6cSZ2T9KukhS3Mz2SrpV0jck3WNm1ym5y8kfRVfhiTvOOV1U7t+ldKxkAgAIEl2UAIAgEXAAgCARcACAIBFwAIAgEXAAgCARcECJM7MHzGxJhuN/YWZfSd1+bPZuEanj55b7ivDAfBFwQIlz98vdff88//xcSQQcKhIBB0TMzP7MzL6Yuv03ZvavqdsXm9kPU/upxVPHvmpmr5jZdklnzXqpj5vZk6nHL0itiPI1SZ2pvb06i3leQNQIOCB6T0ia3qGiQ1KjmdWmjh1ds9HM1iu5hNd0q+x9s16nxt3Pl3S9pFtT2+78Z0ldqb29ugp7GkBpIeCA6O2QtD61Q8WopH9TMuguUDL8pl0g6T53P+zuB3XsWpX3pr3emoJWDJQB1qIEIubu42bWK+lTkn4u6TlJH5DULumlE3ip0dTvSfHdBmjBASXiCUlfUbJL8glJn5P0jM9cLPZxSVeZWUNqhf6P5PC6w5Ka8l0sUA4IOKA0PCFphaR/S+3JdUQzuyfl7k9L6pL0rKSfKrnFzlwelXQOg0xQidhNAAAQJFpwAIAgEXAAgCARcACAIBFwAIAgEXAAgCARcACAIBFwAIAgEXAAgCD9fzGVM2U5S6xwAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 504x360 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
},
{
"output_type": "stream",
"text": [
"done in 0.803s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "9hB6M7H88ODJ"
},
"source": [
"### Splitting data"
]
},
{
"cell_type": "code",
"metadata": {
"id": "GRVwU4ci8K9O",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "3c4d059a-3f62-4361-e50f-c706c50a7001"
},
"source": [
"# Import train_test_split function\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"# Split dataset into training set and test set\n",
"X_train, X_test, y_train, y_test = train_test_split(X, kmeans_VSM, test_size=0.3,random_state=109) # 70% training and 30% test\n",
"X_train.shape, X_test.shape, y_train.shape, y_test.shape"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"((18, 104), (8, 104), (18, 26), (8, 26))"
]
},
"metadata": {
"tags": []
},
"execution_count": 32
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "oxpsL11k8cgl"
},
"source": [
"y_train = np.argmax(y_train, axis=1)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "uvVbzyqT8e0d"
},
"source": [
"### Model Classifer"
]
},
{
"cell_type": "code",
"metadata": {
"id": "rWlAfxy-8isF"
},
"source": [
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.tree import DecisionTreeRegressor\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.svm import SVC\n",
"from sklearn.svm import SVR\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"\n",
"# Logistic Regression\n",
"clf = LogisticRegression(penalty='l2', C=0.1)\n",
"clf.fit(X_train, y_train)\n",
"y_predLR = clf.predict(X_test)\n",
"\n",
"# Decision Tree\n",
"Dt = DecisionTreeClassifier()\n",
"Dt.fit(X_train, y_train)\n",
"y_predDT = Dt.predict(X_test)\n",
"\n",
"# Decision Tree Regressor\n",
"Dtr = DecisionTreeRegressor()\n",
"Dtr.fit(X_train, y_train)\n",
"y_predDTR = Dtr.predict(X_test)\n",
"\n",
"# Gaussian Naive Bias\n",
"gnb = GaussianNB()\n",
"gnb.fit(X_train, y_train)\n",
"y_predGNB = gnb.predict(X_test)\n",
"\n",
"# random forest classifier\n",
"rfc = RandomForestClassifier()\n",
"rfc.fit(X_train, y_train)\n",
"y_predRFC = rfc.predict(X_test)\n",
"\n",
"# random forest regressor\n",
"rfr = RandomForestRegressor(n_estimators = 100)\n",
"rfr.fit(X_train,y_train)\n",
"y_predRFR = rfr.predict(X_test)\n",
"\n",
"# Support vector classifier\n",
"ppn = SVC(C=1, random_state = 0)\n",
"ppn.fit(X_train,y_train)\n",
"y_predSVC = ppn.predict(X_test)\n",
"\n",
"# support vector regression\n",
"svm = SVR(C = 2, kernel = 'rbf', degree = 2)\n",
"svm.fit(X_train, y_train)\n",
"y_predSVR = svm.predict(X_test)\n",
"\n",
"# k neearest neighbor classifier\n",
"Knn = KNeighborsClassifier(n_neighbors = 2, p =2, metric = 'minkowski')\n",
"Knn = Knn.fit(X_train,y_train)\n",
"y_predKNN = Knn.predict(X_test)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "14yVyKP_8kTh",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 194
},
"outputId": "bc8563a4-dbf4-4f30-9c95-500763a4625b"
},
"source": [
"print(\"prediksi logistic regression\", y_predLR)\n",
"print(\"prediksi Decision Tree\", y_predDT)\n",
"print(\"prediksi Decision Tree Regression\", y_predDTR)\n",
"print(\"prediksi Gaussian Naive Bias\", y_predGNB)\n",
"print(\"prediksi random forest classifer\", y_predRFC)\n",
"print(\"prediksi random forest regressor\", y_predRFR)\n",
"print(\"prediksi support vector classifer\", y_predSVC)\n",
"print(\"prediksi support vector regression\", y_predSVR)\n",
"print(\"prediksi k nearest neighbor classifer\", y_predKNN)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"prediksi logistic regression [ 0 24 11 0 3 21 25 19]\n",
"prediksi Decision Tree [ 0 24 11 0 6 11 11 11]\n",
"prediksi Decision Tree Regression [ 9. 24. 11. 9. 2. 24. 16. 19.]\n",
"prediksi Gaussian Naive Bias [ 0 24 11 0 1 0 19 19]\n",
"prediksi random forest classifer [ 0 24 11 0 3 4 19 19]\n",
"prediksi random forest regressor [ 9.59 19.74 11.29 10.13 3. 19.64 17.99 19.38]\n",
"prediksi support vector classifer [ 0 24 11 0 5 0 19 19]\n",
"prediksi support vector regression [ 9.87352192 13.20041381 11.14640452 9.86494133 7.39004161 11.91608436\n",
" 12.67811855 13.05675104]\n",
"prediksi k nearest neighbor classifer [ 0 19 10 0 2 0 19 19]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "YdyGrHnC-IlF"
},
"source": [
"### Model Evaluation"
]
},
{
"cell_type": "code",
"metadata": {
"id": "k5_rrC33-GS5",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 552
},
"outputId": "159e1c3c-8d2c-4f1d-ddf2-17af5e0baf74"
},
"source": [
"# Python script for confusion matrix creation. \n",
"from sklearn.metrics import confusion_matrix \n",
"from sklearn.metrics import accuracy_score \n",
"from sklearn.metrics import classification_report \n",
"\n",
"actual = [ 0, 24, 11, 0, 2, 0, 25, 19]\n",
"predicted = y_predSVC\n",
"results = confusion_matrix(actual, predicted) \n",
"\n",
"print ('Confusion Matrix :')\n",
"print(results,'\\n') \n",
"print ('Accuracy Score :',accuracy_score(actual, predicted)) \n",
"\n",
"print ('Report : ')\n",
"print (classification_report(actual, predicted)) "
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Confusion Matrix :\n",
"[[3 0 0 0 0 0 0]\n",
" [0 0 1 0 0 0 0]\n",
" [0 0 0 0 0 0 0]\n",
" [0 0 0 1 0 0 0]\n",
" [0 0 0 0 1 0 0]\n",
" [0 0 0 0 0 1 0]\n",
" [0 0 0 0 1 0 0]] \n",
"\n",
"Accuracy Score : 0.75\n",
"Report : \n",
" precision recall f1-score support\n",
"\n",
" 0 1.00 1.00 1.00 3\n",
" 2 0.00 0.00 0.00 1\n",
" 5 0.00 0.00 0.00 0\n",
" 11 1.00 1.00 1.00 1\n",
" 19 0.50 1.00 0.67 1\n",
" 24 1.00 1.00 1.00 1\n",
" 25 0.00 0.00 0.00 1\n",
"\n",
" accuracy 0.75 8\n",
" macro avg 0.50 0.57 0.52 8\n",
"weighted avg 0.69 0.75 0.71 8\n",
"\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n",
"/usr/local/lib/python3.6/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "P7DNbxIebJT3"
},
"source": [
"# Latent Semantic Analysis"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "CJW-9I_Rue4A"
},
"source": [
"### Document Term Matrix"
]
},
{
"cell_type": "code",
"metadata": {
"id": "M4bwx745rVpv",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 70
},
"outputId": "31b7efca-f5f3-4069-b0be-40fc5815dc95"
},
"source": [
"from time import time\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"\n",
"print(\"Loading Document matrix term matrix...\")\n",
"t0 = time()\n",
"\n",
"vectorizer = TfidfVectorizer(stop_words='english', \n",
"max_features= 1000, # keep top 1000 terms \n",
"max_df = 0.5, \n",
"smooth_idf=True)\n",
"X = vectorizer.fit_transform(cleaned_text)\n",
"print(X.shape) # check shape of the document-term matrix\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Document matrix term matrix...\n",
"(26, 93)\n",
"done in 0.011s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "z48EnElfZnWY",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 923
},
"outputId": "8fb00c19-7f2f-4795-9ee1-987560b2a8f9"
},
"source": [
"print(X)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
" (0, 56)\t0.7656187292181281\n",
" (0, 18)\t0.6432946148308866\n",
" (1, 83)\t0.8375737369732849\n",
" (1, 3)\t0.546324294840168\n",
" (2, 90)\t0.8375737369732849\n",
" (2, 3)\t0.546324294840168\n",
" (3, 20)\t0.8375737369732849\n",
" (3, 3)\t0.546324294840168\n",
" (4, 41)\t0.805768114051409\n",
" (4, 3)\t0.5922311595804763\n",
" (5, 73)\t0.805768114051409\n",
" (5, 3)\t0.5922311595804763\n",
" (6, 72)\t1.0\n",
" (7, 61)\t0.8375737369732849\n",
" (7, 3)\t0.546324294840168\n",
" (8, 39)\t0.5130898243327779\n",
" (8, 66)\t0.688097132919705\n",
" (8, 18)\t0.5130898243327779\n",
" (9, 63)\t0.4870826174343147\n",
" (9, 37)\t0.579702621453368\n",
" (9, 76)\t0.6532192545182897\n",
" (10, 63)\t0.6432946148308866\n",
" (10, 72)\t0.7656187292181281\n",
" (11, 14)\t0.5127301086919458\n",
" (11, 10)\t0.5777534324912074\n",
" :\t:\n",
" (23, 28)\t0.25597118360918264\n",
" (23, 43)\t0.22716287851035757\n",
" (23, 49)\t0.17791475565921835\n",
" (23, 40)\t0.1491064505603933\n",
" (23, 12)\t0.20672306075804345\n",
" (24, 92)\t0.3881763061742992\n",
" (24, 5)\t0.3881763061742992\n",
" (24, 35)\t0.3881763061742992\n",
" (24, 1)\t0.34448896097110665\n",
" (24, 86)\t0.2894494152301012\n",
" (24, 22)\t0.34448896097110665\n",
" (24, 75)\t0.34448896097110665\n",
" (24, 40)\t0.2261176058538875\n",
" (24, 7)\t0.23880828634624607\n",
" (25, 70)\t0.33030080805627915\n",
" (25, 26)\t0.33030080805627915\n",
" (25, 89)\t0.33030080805627915\n",
" (25, 80)\t0.33030080805627915\n",
" (25, 64)\t0.33030080805627915\n",
" (25, 24)\t0.33030080805627915\n",
" (25, 54)\t0.33030080805627915\n",
" (25, 86)\t0.24629369237954235\n",
" (25, 17)\t0.2931270671737824\n",
" (25, 49)\t0.22957813739338098\n",
" (25, 40)\t0.19240439651088426\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "zW44sCVsumhx"
},
"source": [
"### Topic Modelling"
]
},
{
"cell_type": "code",
"metadata": {
"id": "laioMKSIui9o",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 70
},
"outputId": "20d29bac-34d0-46c0-fd44-08946a51f870"
},
"source": [
"from sklearn.decomposition import TruncatedSVD\n",
"\n",
"print(\"Loading Topic Modelling...\")\n",
"t0 = time()\n",
"\n",
"# SVD represent documents and terms in vectors \n",
"svd_model = TruncatedSVD(n_components=20, algorithm='randomized', n_iter=100, random_state=122)\n",
"svd_model.fit(X)\n",
"print(len(svd_model.components_))\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Topic Modelling...\n",
"20\n",
"done in 0.057s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "_3jjbNyxZs4-",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 247
},
"outputId": "20f8d0ad-f43f-4fee-d586-323d16eb87f4"
},
"source": [
"print(svd_model.components_)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"[[ 3.13451948e-03 6.88350778e-03 1.04130496e-02 ... 2.06247695e-01\n",
" 1.24442311e-02 4.62193987e-03]\n",
" [ 2.56435442e-02 5.55499806e-02 1.09247139e-02 ... -1.31262699e-02\n",
" 3.05536061e-02 3.69511648e-02]\n",
" [ 3.57652406e-02 7.73560119e-02 1.50072894e-02 ... -1.74403555e-02\n",
" 4.35789082e-02 5.14008936e-02]\n",
" ...\n",
" [ 1.07476301e-02 4.97056991e-03 -2.30400615e-03 ... 7.34066690e-03\n",
" 3.96725368e-04 -5.14670331e-03]\n",
" [-2.57774455e-01 6.78116362e-02 2.15504328e-03 ... 3.33721650e-02\n",
" 5.89974262e-04 3.34185816e-01]\n",
" [-1.20527608e-15 3.17549545e-16 -9.55736860e-16 ... 1.11292670e-16\n",
" -1.05371133e-16 1.77516083e-15]]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "CxM33WrqEFaK",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "f64364ff-f6f5-4451-d6e0-3991bb5c1795"
},
"source": [
"plt.hist(svd_model.components_)\n",
"plt.xlabel('svd model components')\n",
"plt.ylabel('Frequency')\n",
"plt.show"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<function matplotlib.pyplot.show>"
]
},
"metadata": {
"tags": []
},
"execution_count": 41
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEGCAYAAAB/+QKOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAaBUlEQVR4nO3dfZRddX3v8feHBKRgJGLGJDyEEYwBaiSmY9AlaJCHhohg1RqyRIFLjcGHXlfbezs+LM3S294qF2m9oWLELIIKUmxDUxMeAgkSvYRkwJAEAgQQdQIlAygQoUDge//YvyGH4XdmTiazzz7JfF5rnTV7//bv7P2Zk5Pznf1wflsRgZmZWV97VR3AzMxakwuEmZlluUCYmVmWC4SZmWW5QJiZWdbIqgMMpTFjxkR7e3vVMczMdhu33377YxHRllu2RxWI9vZ2urq6qo5hZrbbkPTrest8iMnMzLJcIMzMLMsFwszMslwgzMwsywXCzMyyXCDMzCzLBcLMzLJcIMzMLMsFwszMslwgzMwsywXCzMyyXCDMzCzLBcLMzLJcIMzMLMsFwszMslwgzMwsywXCzMyyXCDMzCzLBcLMzLJcIMzMLMsFwlpGd+eqVzbMO6CaIGYGuECYmVkdI8tasaSFwGnA1oh4a2q7CpiUuowGfh8RUzLPfQh4GngR2B4RHWXlNDOzvNIKBHAZMB+4vLchImb1Tku6EHiyn+efEBGPlZbOzMz6VVqBiIhbJLXnlkkS8FHgfWVt38zMdk1V5yCOBx6NiM11lgdwg6TbJc3pb0WS5kjqktTV09Mz5EHNzIarqgrEbODKfpYfFxFTgVOBz0h6T72OEbEgIjoioqOtrW2oc5qZDVtNLxCSRgIfAq6q1ycitqSfW4HFwLTmpDMzs15V7EGcBNwTEd25hZL2lzSqdxo4BdjYxHxmZkaJBULSlcCtwCRJ3ZLOS4vOpM/hJUkHSVqWZscCP5d0J7AGWBoR15WV08zM8sq8iml2nfZzMm0PAzPT9IPAMWXlMjOzxvib1GZmluUCYWZmWS4QZmaW5QJhZmZZLhBmZpblAmFmZlkuEGZmluUCYWZmWS4QZmaW5QJhZmZZLhBmZpblAmFmZlkuEGZmluUCYWZmWS4QZmaW5QJhZmZZLhBmZpblAmFmZlll3pN6oaStkjbWtM2TtEXSuvSYWee5MyTdK+l+SZ1lZTQzs/rK3IO4DJiRab8oIqakx7K+CyWNAC4GTgWOBmZLOrrEnGZmllFagYiIW4AnBvHUacD9EfFgRDwP/Bg4Y0jDmZnZgKo4B/FZSevTIajXZ5YfDPy2Zr47tWVJmiOpS1JXT0/PUGe1Clw8dwU3rTii3+X1tHcuLSOS2bDU7ALxHeAIYArwCHDhrq4wIhZEREdEdLS1te3q6szMLGlqgYiIRyPixYh4CfgexeGkvrYAh9bMH5LazMysiZpaICSNr5n9M2BjpttaYKKkN0naBzgTWNKMfGZmtsPIslYs6UpgOjBGUjfwVWC6pClAAA8Bn0p9DwIujYiZEbFd0meB64ERwMKIuKusnGZmlldagYiI2Znm79fp+zAws2Z+GfCqS2DNzKx5/E1qMzPLcoEwM7MsFwgzM8tygTAzsywXCDMzy3KBsEpdOOu0XV4+buW6V7T1NxSHmTXOBcLMzLJcIMzMLMsFwszMslwgzMwsywXCzMyyXCDMzCzLBcLMzLJcIMzMLMsFwszMslwgzMwsywXCKtd3qIx6ujtX0d65FICbVhzRb9+bVhwB8w7Y5Wxmw5kLhJmZZZVWICQtlLRV0saatgsk3SNpvaTFkkbXee5DkjZIWiepq6yMZmZWX5l7EJcBM/q0LQfeGhFvA+4DvtDP80+IiCkR0VFSPjMz60dpBSIibgGe6NN2Q0RsT7OrgUPK2r6Zme2aKs9B/Dfg2jrLArhB0u2S5jQxk5mZJSOr2KikLwHbgR/V6XJcRGyR9EZguaR70h5Jbl1zgDkAEyZMKCWvmdlw1PQ9CEnnAKcBH4uIyPWJiC3p51ZgMTCt3voiYkFEdERER1tbWwmJzcyGp6YWCEkzgP8JnB4Rz9Tps7+kUb3TwCnAxlxfMzMrT5mXuV4J3ApMktQt6TxgPjCK4rDROkmXpL4HSVqWnjoW+LmkO4E1wNKIuK6snGZmltfQOQhJkyNiw86sOCJmZ5q/X6fvw8DMNP0gcMzObMvMzIZeo3sQ/yxpjaRPS/L4BdZ0m448apeWm9nOa6hARMTxwMeAQ4HbJV0h6eRSk5mZWaUaPgcREZuBLwN/C7wX+HYaNuNDZYUzM7PqNFQgJL1N0kXAJuB9wAci4qg0fVGJ+czMrCKNflHu/wKXAl+MiGd7GyPiYUlfLiWZmZlVqtEC8X7g2Yh4EUDSXsC+EfFMRPygtHRmZlaZRs9B3Aj8Uc38fqnNzMz2UI0WiH0jYlvvTJrer5xIZmbWChotEH+QNLV3RtKfAM/209/MzHZzjZ6D+DxwtaSHAQHjgFmlpTIzs8o1VCAiYq2kI4FJqeneiHihvFhmZla1nbkfxDuA9vScqZKIiMtLSWVWY9zKdaysOoTZMNToYH0/AI4A1gEvpuYAXCDMzPZQje5BdABH17vBj5mZ7XkavYppI8WJaTMzGyYa3YMYA9wtaQ3wXG9jRJxeSiozM6tcowViXpkhzMys9TR6mevPJB0GTIyIGyXtB4woN5qZmVWp0eG+Pwn8BPhuajoYuKasUGZmVr1GT1J/Bng38BS8fPOgNw70JEkLJW2VtLGm7UBJyyVtTj9fX+e5Z6c+myWd3WBOMzMbIo0WiOci4vneGUkjKb4HMZDLgBl92jqBmyJiInBTmn8FSQcCXwWOBaYBX61XSMzMrByNFoifSfoi8EfpXtRXA/8x0JMi4hbgiT7NZwCL0vQi4IOZp/4psDwinoiI3wHLeXWhMTOzEjVaIDqBHmAD8ClgGcX9qQdjbEQ8kqb/Exib6XMw8Nua+e7U9iqS5kjqktTV09MzyEhWiXkHvDy56cijdnl13Z2rdnkdOZMXTS5lvWatrtGrmF4CvpceQyYiQtIufTs7IhYACwA6Ojr8TW8zsyHS6FhMvyJzziEiDh/ENh+VND4iHpE0Htia6bMFmF4zfwhw8yC2ZWZmg7QzYzH12hf4c+DAQW5zCXA28A/p579n+lwP/H3NielTgC8McntmZjYIDZ2DiIjHax5bIuIfgfcP9DxJVwK3ApMkdUs6j6IwnCxpM3BSmkdSh6RL0/aeAL4OrE2Pr6U2MzNrkkYPMU2tmd2LYo9iwOdGxOw6i07M9O0C/qJmfiGwsJF8ZmY29Bo9xHRhzfR24CHgo0OexszMWkajVzGdUHYQMzNrLY0eYvqr/pZHxLeGJo6ZmbWKnbmK6R0UVyABfABYA2wuI5SZmVWv0QJxCDA1Ip4GkDQPWBoRZ5UVzMzMqtXoUBtjgedr5p8nP0SGmZntIRrdg7gcWCNpcZr/IDsG3DMzsz1Qo1cx/Z2ka4HjU9O5EfHL8mKZmVnVGj3EBLAf8FRE/BPQLelNJWUyM7MW0OgtR78K/C07xkPaG/hhWaHMzKx6je5B/BlwOvAHgIh4GBhVVigzM6teowXi+YgI0pDfkvYvL5KZmbWCRgvEv0j6LjBa0ieBGxnimweZmVlrGfAqJkkCrgKOBJ4CJgFfiYjlJWczM7MKNTJkd0haFhGTARcFM7NhotFDTHdIekepSczMrKU0WiCOBVZLekDSekkbJK0vM5gNT+2dSxvvPO+APWbbZq2o30NMkiZExG+AP21SHjMzaxED7UFcAxARvwa+FRG/rn0MZoOSJklaV/N4StLn+/SZLunJmj5fGcy2zMxs8AY6Sa2a6cOHYoMRcS8wBUDSCGALsDjTdVVEnDYU2zQzs5030B5E1JkeKicCDwx2b8TMzMozUIE4Jh0Cehp4W5p+StLTkp4agu2fCVxZZ9m7JN0p6VpJf1xvBZLmSOqS1NXT0zMEkczMDAY4xBQRI8rasKR9KMZ3+kJm8R3AYRGxTdJMinMhE+tkXAAsAOjo6ChjL8fMbFjameG+h9qpwB0R8WjfBRHxVERsS9PLgL0ljWl2QDOz4azKAjGbOoeXJI1LQ3wgaRpFzsebmM3MbNhr9JajQyqNBnsy8KmatrkAEXEJ8BHgfEnbgWeBM9NosmZm1iSVFIiI+APwhj5tl9RMzwfmNzuXmZntUOUhJrPqpKEyxq1cV+r6zXZnLhBmZpblAmFmZlkuEGZmluUCYWZmWS4QZmaW5QJhZmZZLhBmZpblAmFmZlkuEGZmluUCYdbHvHnzXtXW3bmK9s6lr2i7cNZpL38Tu7tzFQAXz12Rfb7Z7sgFwszMslwgzMwsywXCzMyyXCDMzCzLBcLMzLJcIMzMLMsFwszMsiorEJIekrRB0jpJXZnlkvRtSfdLWi9pahU5zcyGq0ruSV3jhIh4rM6yU4GJ6XEs8J3008zMmqCVDzGdAVwehdXAaEnjqw5lZjZcVFkgArhB0u2S5mSWHwz8tma+O7W9gqQ5krokdfX09JQU1fZEF846DYBNRx7V7/KBTF40ecgymbWSKgvEcRExleJQ0mckvWcwK4mIBRHREREdbW1tQ5vQzGwYq6xARMSW9HMrsBiY1qfLFuDQmvlDUpuZmTVBJQVC0v6SRvVOA6cAG/t0WwJ8Il3N9E7gyYh4pMlRzcyGraquYhoLLJbUm+GKiLhO0lyAiLgEWAbMBO4HngHOrSirmdmwVEmBiIgHgWMy7ZfUTAfwmWbmMjOzHVr5MlczM6uQC4SZmWW5QJiZWZYLhJmZZblAmJlZlguEWY2L5654xfy4lev6XV6rvXNpKZnMquICYWZmWS4QZmaW5QJhZmZZLhBmZpblAmFmZlkuEGZmluUCYWZmWS4QZmaW5QJhZmZZLhBmZpblAmEv23TkUZU+38xaiwuEmZllNb1ASDpU0kpJd0u6S9J/z/SZLulJSevS4yvNzmlmNtxVcU/q7cBfR8QdkkYBt0taHhF39+m3KiJOqyCfmZlRwR5ERDwSEXek6aeBTcDBzc5hZmb9q/QchKR24O3AbZnF75J0p6RrJf1xU4OZmVklh5gAkPRa4F+Bz0fEU30W3wEcFhHbJM0ErgEm1lnPHGAOwIQJE0pMbGY2vFSyByFpb4ri8KOI+Le+yyPiqYjYlqaXAXtLGpNbV0QsiIiOiOhoa2srNbeZ2XBSxVVMAr4PbIqIb9XpMy71Q9I0ipyPNy+lmZlVcYjp3cDHgQ2Sem/4+0VgAkBEXAJ8BDhf0nbgWeDMiIgKspqZDVtNLxAR8XNAA/SZD8xvTiIzM8vxN6mtIeNWrhu4kzXHvAOGdHXtnUuHdH2253CBMDOzLBcIMzPLcoEwM7MsFwgzM8tygTAzsywXCDMzy3KBMDOzLBcIMzPLcoEwM7MsFwgzM8tygbBBu2nFEUxeNLnf5ZZ34azibrq9r19751IunrviVcsBujtXDTgcRnvnUjYdedRO5+juXPXKhp0YxmPyosmvfv6ubHuQen/vsoYMmTdv3ivm+3vP74zB/Hs1mwuEmZlluUCYmVmWC4SZmWW5QJiZWZYLhJmZZblAmJlZlguEmZllVVIgJM2QdK+k+yV1Zpa/RtJVafltktqbn9LMbHhreoGQNAK4GDgVOBqYLenoPt3OA34XEW8GLgK+0dyUZmZWxR7ENOD+iHgwIp4Hfgyc0afPGcCiNP0T4ERJamJGM7NhTxHR3A1KHwFmRMRfpPmPA8dGxGdr+mxMfbrT/AOpz2OZ9c0B5qTZScC9JcQeA7xq2y2gFXO1YiZozVytmAlaM1crZoLWzLWzmQ6LiLbcgpFDk6c6EbEAWFDmNiR1RURHmdsYjFbM1YqZoDVztWImaM1crZgJWjPXUGaq4hDTFuDQmvlDUlu2j6SRwAHA401JZ2ZmQDUFYi0wUdKbJO0DnAks6dNnCXB2mv4IsCKafSzMzGyYa/ohpojYLumzwPXACGBhRNwl6WtAV0QsAb4P/EDS/cATFEWkSqUewtoFrZirFTNBa+ZqxUzQmrlaMRO0Zq4hy9T0k9RmZrZ78DepzcwsywXCzMyyXCAyJB0oabmkzenn6+v0myDpBkmbJN1d9pAgjeZKfV8nqVvS/KozSZoi6VZJd0laL2lWiXlabhiXBjL9VXr/rJd0k6TDys7USK6afh+WFJJKv5yzkUySPpper7skXVF2pkZypc+ClZJ+mf4dZ5acZ6Gkrek7Y7nlkvTtlHe9pKmD2lBE+NHnAXwT6EzTncA36vS7GTg5Tb8W2K8VcqXl/wRcAcyvOhPwFmBimj4IeAQYXUKWEcADwOHAPsCdwNF9+nwauCRNnwlcVfLr00imE3rfO8D5ZWdqNFfqNwq4BVgNdFSdCZgI/BJ4fZp/Yyu8VhQnhs9P00cDD5Wc6T3AVGBjneUzgWsBAe8EbhvMdrwHkVc71Mci4IN9O6Txo0ZGxHKAiNgWEc9UnStl+xNgLHBDyXkayhQR90XE5jT9MLAVyH5zcxe14jAuA2aKiJU1753VFN8NKlsjrxXA1ynGQvuvFsn0SeDiiPgdQERsbZFcAbwuTR8APFxmoIi4heIKz3rOAC6PwmpgtKTxO7sdF4i8sRHxSJr+T4oP277eAvxe0r+l3coL0kCEleaStBdwIfA3JWdpOFMtSdMo/gp7oIQsBwO/rZnvTm3ZPhGxHXgSeEMJWXYmU63zKP7yK9uAudJhiUMjYmkT8jSUieL/3Vsk/ULSakkzWiTXPOAsSd3AMuBzTcjVn51932Xt9kNtDJakG4FxmUVfqp2JiJCUuxZ4JHA88HbgN8BVwDkU3+GoMtengWUR0T1UfxgPQabe9YwHfgCcHREvDUm4PYiks4AO4L0tkGUv4FsU7+lWMpLiMNN0ij2tWyRNjojfV5oKZgOXRcSFkt5F8T2ut+7u7/NhWyAi4qR6yyQ9Kml8RDySPtRyu7HdwLqIeDA95xqKY327VCCGINe7gOMlfZrivMg+krZFRN2TkE3IhKTXAUuBL6Vd3jLszDAu3WrOMC6NZELSSRQF970R8VyJeRrNNQp4K3Bz+kNjHLBE0ukR0VVRJij+390WES8Av5J0H0XBWFtSpkZznQfMAIiIWyXtSzFoXjMOgeU09L4biA8x5dUO9XE28O+ZPmspjuv1Hkt/H3B31bki4mMRMSEi2ikOM12+K8VhKDKpGFJlccrykxKztOIwLgNmkvR24LvA6U06pj5groh4MiLGRER7ei+tTvnKKg4DZkquodh7QNIYikNOD5aYqdFcvwFOTLmOAvYFekrO1Z8lwCfS1UzvBJ6sORTcuDLPtO+uD4pj0jcBm4EbgQNTewdwaU2/k4H1wAbgMmCfVshV0/8cyr+KacBMwFnAC8C6mseUkvLMBO6jOMfxpdT2NYoPNyj+414N3A+sAQ5vwvtpoEw3Ao/WvDZLys7USK4+fW+m5KuYGnytRHHo6+70/+7MVnitKK5c+gXFFU7rgFNKznMlxdWAL1DsVZ0HzAXm1rxOF6e8Gwb7b+ehNszMLMuHmMzMLMsFwszMslwgzMwsywXCzMyyXCDMzCzLBcJ2W5LOUfmj1U6X9NNd7bMnkDQ6fQHThgkXCDNr1GiKoVxsmHCBsEpJ2l/SUkl3StooaVYae//qmj4v/4Uu6VxJ90laA7y7zjrnSVokaZWkX0v6kKRvStog6TpJe6d+J6aBFjek8fVfk9pnSLpH0h3Ah/pkXShpTXpebvTT2hwjJP2f9Hutl/S5Abb7kKT/LWmdpC5JUyVdL+kBSXNrXotb0mt2r6RL0rhJSJqd1rlR0jdqcmyT9HfpNV4taWxqb5P0r5LWpse7a16/hZJulvSgpL9Mq/oH4IiU7wJJ41OWdWmbxzf+L2+7hWZ8C9EPP+o9gA8D36uZP4BijLDfAPuntu9QfBt7fGpvoxgR9hdkvilOMbLmz4G9gWOAZ4BT07LFFEOS70sx2uVbUvvlwOdr2idSfBv1X4Cfpj5/D5yVpkdTfLN2f4qhH36ayXE+xZDiI9P8gfW2m6YfYsc9BS6i+Jb+qPT7Pprap1MMvX04xX0KllMMGXJQzWszElgBfDA9J4APpOlvAl9O01cAx6XpCcCmmtfv/wGvoRhP6PH0WrZTc/8B4K/Z8a3iEcCoqt9Pfgztw3sQVrUNwMmSviHp+CjGANoOXAd8IA2o936KMZ6OBW6OiJ4oxuW/qp/1XhvFgG4bKD68rqvZXjswCfhVRNyX2hdR3ITlyNS+OYpPvh/WrPMUoFPSOoqhJ/al+GCt5yTgu+n3ISKe6Ge7vXrH+NlAMSjd0xHRAzwnaXRatiaKexO8SDHkwnHAO2pem+3Aj2rW+zzQe47k9vT79+abn36fJcDrJL02LVsaEc9FxGMUA87lhnFfC5wraR4wOSKe7ue1sN2QC4RVKn1QTqX4QPxfkr6SFv0Y+CjFIIhdg/jweS6t/yXghfRhD/ASgx/FWMCHI2JKekyIiE2DXFc9vSO5vlQz3Tvfm7vv+DgDjZdT+/u/WLOevYB31vw+B0fEtj45+j5nx0aLm9a8h2KU0MskfWKAHLabcYGwSkk6CHgmIn4IXEBRLAB+lqY/SVEsAG4D3ivpDek8wp/vwqbvBdolvTnNfzxt857UfkRqn13znOuBz0nF+NcqRmHtz3LgU2kvCEkH9rPdnTEtjSy6FzCL4nDaGorXZoyKG1fNbmC9N1BzYxtJUwbo/zTFIa/e/odRHPr6HnApO/7tbA8xbO8HYS1jMnCBpJcoRqY8HyAiXkwnps8hDc8dxT0n5gG3Ar+nGDVzUCLivySdC1ydPsDXUtyr+jlJc4Clkp4BVrHjQ/HrwD8C69OH86+A0/rZzKUUw1Gvl/QCxbmW+bnt7mT8tcB84M3ASmBxRLwkqTPNi+IQUW6Y+lp/CVwsaT3FZ8EtFCOCZkXE4yru5LaR4q53G4H/kX63bYD3IPYwHs3VbDciaTrwNxHRX2EyGxI+xGRmZlnegzAzsyzvQZiZWZYLhJmZZblAmJlZlguEmZlluUCYmVnW/wfmwcqRQFfTgQAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "vunEB7gCurJr",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "8f5335f7-3b99-46a9-9bea-2bf6d1f5c59c"
},
"source": [
"print(\"Loading LSA Result...\")\n",
"t0 = time()\n",
"\n",
"terms = vectorizer.get_feature_names()\n",
"hasil_LSA = []\n",
"for i, comp in enumerate(svd_model.components_):\n",
" terms_comp = zip(terms, comp)\n",
" sorted_terms = sorted(terms_comp, key= lambda x:x[1], reverse=True)[:7]\n",
" print(\"\\nTopic \"+str(i)+\": \")\n",
" # hasil_LSA.append(i)\n",
" for t in sorted_terms:\n",
" # print(t[0] + \"\\n\")\n",
" print(t[0])\n",
" hasil_LSA.append(t)\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading LSA Result...\n",
"\n",
"Topic 0: \n",
"add\n",
"hour\n",
"second\n",
"date\n",
"titl\n",
"weather\n",
"photo\n",
"\n",
"Topic 1: \n",
"postcard\n",
"save\n",
"color\n",
"creat\n",
"chang\n",
"ha\n",
"applic\n",
"\n",
"Topic 2: \n",
"creat\n",
"ha\n",
"applic\n",
"new\n",
"high\n",
"edit\n",
"read\n",
"\n",
"Topic 3: \n",
"creat\n",
"ha\n",
"edit\n",
"read\n",
"new\n",
"save\n",
"add\n",
"\n",
"Topic 4: \n",
"save\n",
"applic\n",
"use\n",
"degre\n",
"abov\n",
"seven\n",
"age\n",
"\n",
"Topic 5: \n",
"open\n",
"editor\n",
"calendar\n",
"contain\n",
"use\n",
"save\n",
"degre\n",
"\n",
"Topic 6: \n",
"thi\n",
"second\n",
"secur\n",
"form\n",
"access\n",
"extend\n",
"fast\n",
"\n",
"Topic 7: \n",
"level\n",
"code\n",
"sourc\n",
"hour\n",
"day\n",
"secur\n",
"includ\n",
"\n",
"Topic 8: \n",
"delet\n",
"code\n",
"sourc\n",
"second\n",
"chang\n",
"save\n",
"addit\n",
"\n",
"Topic 9: \n",
"form\n",
"share\n",
"secur\n",
"postcard\n",
"data\n",
"integr\n",
"password\n",
"\n",
"Topic 10: \n",
"code\n",
"sourc\n",
"new\n",
"addit\n",
"develop\n",
"document\n",
"maintain\n",
"\n",
"Topic 11: \n",
"design\n",
"developmentso\n",
"modul\n",
"program\n",
"reusabl\n",
"structur\n",
"way\n",
"\n",
"Topic 12: \n",
"applic\n",
"connect\n",
"facebook\n",
"interoperabilitybecaus\n",
"lineinstagramand\n",
"media\n",
"need\n",
"\n",
"Topic 13: \n",
"new\n",
"hour\n",
"day\n",
"aspect\n",
"easi\n",
"hoursth\n",
"interfac\n",
"\n",
"Topic 14: \n",
"titl\n",
"date\n",
"share\n",
"editor\n",
"postcard\n",
"code\n",
"sourc\n",
"\n",
"Topic 15: \n",
"date\n",
"photo\n",
"contain\n",
"calendar\n",
"secur\n",
"integr\n",
"password\n",
"\n",
"Topic 16: \n",
"weather\n",
"date\n",
"share\n",
"sourc\n",
"code\n",
"postcard\n",
"connect\n",
"\n",
"Topic 17: \n",
"secur\n",
"data\n",
"integr\n",
"password\n",
"color\n",
"includ\n",
"post\n",
"\n",
"Topic 18: \n",
"age\n",
"flexibilityand\n",
"year\n",
"seven\n",
"abov\n",
"degre\n",
"use\n",
"\n",
"Topic 19: \n",
"calendar\n",
"contain\n",
"titl\n",
"second\n",
"year\n",
"flexibilityand\n",
"age\n",
"done in 0.029s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "g9dulggnZ-LR",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 54
},
"outputId": "ea9399ec-0728-4e08-fcc0-b2a3e7be11c1"
},
"source": [
"print(hasil_LSA)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"[('add', 0.8507630597530541), ('hour', 0.2240329051172435), ('second', 0.22162698682222395), ('date', 0.20624769482990352), ('titl', 0.20624769482990352), ('weather', 0.20624769482990352), ('photo', 0.2062476948299035), ('postcard', 0.5198549730224552), ('save', 0.43843688289560856), ('color', 0.2712250599927194), ('creat', 0.27002619298074737), ('chang', 0.2629143962315166), ('ha', 0.20849148548247923), ('applic', 0.18261308747104465), ('creat', 0.44760855288543144), ('ha', 0.3388411577119489), ('applic', 0.249009431659017), ('new', 0.23587015864250208), ('high', 0.17644454399818982), ('edit', 0.16724923570741218), ('read', 0.16724923570741218), ('creat', 0.4481869676902287), ('ha', 0.25147496022915916), ('edit', 0.19754632039421688), ('read', 0.1975463203942168), ('new', 0.18278396860354404), ('save', 0.1053942649457787), ('add', 0.03772418023172737), ('save', 0.770784696758237), ('applic', 0.04784446254339832), ('use', 0.03835743632650205), ('degre', 0.034682400906870726), ('abov', 0.03468240090687065), ('seven', 0.026828815695869753), ('age', 0.022260054761892186), ('open', 0.7266796667223228), ('editor', 0.45470898212216226), ('calendar', 0.364126628763787), ('contain', 0.364126628763787), ('use', 5.874338793522344e-16), ('save', 5.455392049429443e-16), ('degre', 5.26770685397418e-16), ('thi', 0.2841297570685327), ('second', 0.24929526658689552), ('secur', 0.24699453309783756), ('form', 0.20578530823083196), ('access', 0.133994081338123), ('extend', 0.13399408133812288), ('fast', 0.13399408133812288), ('level', 0.27397034360514394), ('code', 0.21052609264232908), ('sourc', 0.21052609264232908), ('hour', 0.2030903663816053), ('day', 0.17893258211050783), ('secur', 0.17834078940372644), ('includ', 0.1725509079525948), ('delet', 1.0000000000000004), ('code', 4.357662154500884e-15), ('sourc', 4.357662154500199e-15), ('second', 4.033154658835641e-15), ('chang', 3.5395592530627426e-15), ('save', 2.404706996666249e-15), ('addit', 2.217218867926048e-15), ('form', 0.4383532929093558), ('share', 0.3846517903759284), ('secur', 0.21858503873378266), ('postcard', 0.1699864363333772), ('data', 0.10929251936689133), ('integr', 0.10929251936689133), ('password', 0.10929251936689133), ('code', 0.29150769729667436), ('sourc', 0.29150769729667436), ('new', 0.22262372971849628), ('addit', 0.14575384864833724), ('develop', 0.14575384864833718), ('document', 0.14575384864833718), ('maintain', 0.14575384864833718), ('design', 0.29523084846153064), ('developmentso', 0.29523084846153064), ('modul', 0.29523084846153064), ('program', 0.29523084846153064), ('reusabl', 0.29523084846153064), ('structur', 0.29523084846153064), ('way', 0.29523084846153064), ('applic', 0.19911018001883918), ('connect', 0.1940632840899023), ('facebook', 0.1940632840899023), ('interoperabilitybecaus', 0.1940632840899023), ('lineinstagramand', 0.1940632840899023), ('media', 0.1940632840899023), ('need', 0.1940632840899023), ('new', 0.5196200061551685), ('hour', 0.35290839332470686), ('day', 0.14649932854141537), ('aspect', 0.11900567959114501), ('easi', 0.11900567959114497), ('hoursth', 0.11900567959114497), ('interfac', 0.11900567959114497), ('titl', 0.620513004696558), ('date', 0.34147198868590145), ('share', 4.561616198799555e-15), ('editor', 3.0556362504815783e-15), ('postcard', 2.161468015851982e-15), ('code', 1.946739227977853e-15), ('sourc', 1.8961980463414504e-15), ('date', 0.689432723868212), ('photo', 0.2600945074643376), ('contain', 4.3477716334149975e-15), ('calendar', 4.313568756662982e-15), ('secur', 3.742480667454989e-15), ('integr', 1.8712403337274943e-15), ('password', 1.8712403337274943e-15), ('weather', 0.5896930705635588), ('date', 0.3975920021863535), ('share', 1.2982070540646561e-14), ('sourc', 5.742202235567966e-15), ('code', 5.725699385206115e-15), ('postcard', 5.703008177038316e-15), ('connect', 4.633363994715201e-15), ('secur', 0.493150122374194), ('data', 0.246575061187097), ('integr', 0.246575061187097), ('password', 0.246575061187097), ('color', 0.18582812395233742), ('includ', 0.14120406862361765), ('post', 0.1046972224244484), ('age', 0.33418581609245), ('flexibilityand', 0.33418581609245), ('year', 0.33418581609244996), ('seven', 0.2970984120953277), ('abov', 0.06781163623181834), ('degre', 0.06781163623181785), ('use', 0.05893276585787139), ('calendar', 0.5264502976140086), ('contain', 0.5264502976140082), ('titl', 6.821579570193528e-15), ('second', 2.8137757483883088e-15), ('year', 1.7751608288787274e-15), ('flexibilityand', 1.7446520604293112e-15), ('age', 1.6634081663710092e-15)]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "TQ62UrksETS4",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "c370325f-87fe-40af-cb61-d8c3e66d5bb3"
},
"source": [
"plt.hist(hasil_LSA)\n",
"plt.xlabel('Hasil LSA')\n",
"plt.ylabel('Frequency')\n",
"plt.show"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<function matplotlib.pyplot.show>"
]
},
"metadata": {
"tags": []
},
"execution_count": 46
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAb8AAAEGCAYAAAD11pvPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAbQUlEQVR4nO3dfbRddX3n8ffH8CSCiE18IkCQYQm0COgtdEY74hPGJ7CtU8OoRStGW5ipbaed2HYJxTUzdrp8hqqppqhVsKLYWEMRCz60FiUohRBKiYCQaMvVMCAPJQa+88fexxyO9yYnN9m5Sfb7tdZZZ+/fb+99vufk5nzO3vt39klVIUlSnzxqtguQJGlHM/wkSb1j+EmSesfwkyT1juEnSeqdPWa7gO1p7ty5tWDBgtkuQ5J2Gddcc80PqmrebNexo+1W4bdgwQJWrlw522VI0i4jyXdnu4bZ4GFPSVLvGH6SpN4x/CRJvWP4SZJ6x/CTJPWO4SdJ6p3Owi/JwUmuTLI6yQ1JfmuKZZLkfUnWJLkuyTOG+k5PcnN7O72rOiVJ/dPl9/w2Ar9bVd9Ksj9wTZLLq2r10DIvBo5obycCHwBOTPJ44GxgAqh23eVVdVeH9UqSeqKzPb+q+n5Vfaud/hFwI3DQyGKnAh+rxlXA45I8GXgRcHlVrW8D73JgYVe1SpL6ZYec80uyADge+MZI10HAHUPza9u26dqn2vbiJCuTrJycnJxxjTceeRRPuvLaGa+/PZz/5itYsOQLs1oDAOccMKsPv3bJ1zjmo8fMag3nnHMONx551KzWAM3fxGz/Xb7zVS9jwZIvsHbJ12a1Dmj+XdR456teNtsl7NI6D78k+wGfAd5SVfds7+1X1dKqmqiqiXnzend5OknSDHQafkn2pAm+T1TVZ6dYZB1w8ND8/LZtunZJkrZZl6M9A3wEuLGq3jXNYsuBX2tHff4CcHdVfR+4DDg5yYFJDgRObtskSdpmXY72fBbwWuD6JIOTFn8AHAJQVR8EVgAvAdYA9wOvb/vWJ3k7cHW73rlVtb7DWiVJPdJZ+FXV3wPZwjIFnDlN3zJgWQelSZJ6ziu8SJJ6x/CTJPWO4SdJ6h3DT5LUO4afJKl3DD9JUu8YfpKk3jH8JEm9Y/hJknrH8JMk9Y7hJ0nqHcNPktQ7hp8kqXcMP0lS7xh+kqTeMfwkSb3T2Y/ZJlkGvAy4s6p+bor+3wNePVTHUcC89lfcbwN+BDwEbKyqia7qlCT1T5d7fhcAC6frrKo/rarjquo44K3AV6pq/dAiz237DT5J0nbVWfhV1VeB9VtcsHEacGFXtUiSNGzWz/kl2ZdmD/EzQ80FfDHJNUkWz05lkqTdVWfn/LbCy4F/GDnk+eyqWpfkCcDlSf653ZP8KW04LgY45JBDuq9WkrTLm/U9P2ARI4c8q2pde38ncAlwwnQrV9XSqpqoqol58+Z1Wqgkafcwq+GX5ADgOcBfD7U9Jsn+g2ngZGDV7FQoSdoddflVhwuBk4C5SdYCZwN7AlTVB9vFfgn4YlXdN7TqE4FLkgzq+2RV/W1XdUqS+qez8Kuq08ZY5gKar0QMt90CHNtNVZIk7Rzn/CRJ2qEMP0lS7xh+kqTeMfwkSb1j+EmSesfwkyT1juEnSeodw0+S1DuGnySpdww/SVLvGH6SpN4x/CRJvWP4SZJ6x/CTJPWO4SdJ6h3DT5LUO4afJKl3Ogu/JMuS3Jlk1TT9JyW5O8m17e1tQ30Lk9yUZE2SJV3VKEnqpy73/C4AFm5hma9V1XHt7VyAJHOA84EXA0cDpyU5usM6JUk901n4VdVXgfUzWPUEYE1V3VJVG4CLgFO3a3GSpF6b7XN+/zHJPyW5NMnPtm0HAXcMLbO2bZtSksVJViZZOTk52WWtkqTdxGyG37eAQ6vqWOD9wOdmspGqWlpVE1U1MW/evO1aoCRp9zRr4VdV91TVve30CmDPJHOBdcDBQ4vOb9skSdouZi38kjwpSdrpE9pafghcDRyR5LAkewGLgOWzVackafezR1cbTnIhcBIwN8la4GxgT4Cq+iDwSuA3kmwEHgAWVVUBG5OcBVwGzAGWVdUNXdUpSeqfzsKvqk7bQv95wHnT9K0AVnRRlyRJsz3aU5KkHc7wkyT1juEnSeodw0+S1DuGnySpdww/SVLvGH6SpN4x/CRJvWP4SZJ6x/CTJPWO4SdJ6h3DT5LUO4afJKl3DD9JUu8YfpKk3jH8JEm9Y/hJknqns/BLsizJnUlWTdP/6iTXJbk+ydeTHDvUd1vbfm2SlV3VKEnqp7HCL8kxM9j2BcDCzfTfCjynqo4B3g4sHel/blUdV1UTM3hsSZKmNe6e358l+WaS30xywDgrVNVXgfWb6f96Vd3Vzl4FzB+zFkmStslY4VdVvwi8GjgYuCbJJ5O8cDvW8Qbg0uGHBL6Y5Jokize3YpLFSVYmWTk5ObkdS5Ik7a72GHfBqro5yR8BK4H3AccnCfAHVfXZmRaQ5Lk04ffsoeZnV9W6JE8ALk/yz+2e5FR1LaU9ZDoxMVEzrUOS1B/jnvN7epJ3AzcCzwNeXlVHtdPvnumDJ3k68GHg1Kr64aC9qta193cClwAnzPQxJEkaNe45v/cD3wKOraozq+pbAFX1PeCPZvLASQ4BPgu8tqr+Zaj9MUn2H0wDJwNTjhiVJGkmxj3s+VLggap6CCDJo4B9qur+qvr4VCskuRA4CZibZC1wNrAnQFV9EHgb8DM0g2kANrYjO58IXNK27QF8sqr+dmZPT5KknzZu+H0JeAFwbzu/L/BF4D9Nt0JVnba5DVbVGcAZU7TfAhz702tIkrR9jHvYc5+qGgQf7fS+3ZQkSVK3xg2/+5I8YzCT5JnAA92UJElSt8Y97PkW4NNJvgcEeBLwqs6qkiSpQ2OFX1VdneRI4Glt001V9ePuypIkqTtjf8kd+HlgQbvOM5JQVR/rpCpJkjo0Vvgl+ThwOHAt8FDbXIDhJ0na5Yy75zcBHF1VXj5MkrTLG3e05yqaQS6SJO3yxt3zmwusTvJN4MFBY1Wd0klVkiR1aNzwO6fLIiRJ2pHG/arDV5IcChxRVV9Ksi8wp9vSJEnqxrg/afRG4GLgQ23TQcDnuipKkqQujTvg5UzgWcA90PywLfCEroqSJKlL44bfg1W1YTCTZA+a7/lJkrTLGTf8vpLkD4BHJ3kh8Gng892VJUlSd8YNvyXAJHA98CZgBTP8BXdJkmbbuKM9Hwb+vL1JkrRLG3e0561Jbhm9jbHesiR3Jlk1TX+SvC/JmiTXjfxm4OlJbm5vp4//lCRJ2rytubbnwD7AfwEeP8Z6FwDnMf0FsF8MHNHeTgQ+AJyY5PHA2e3jFnBNkuVVddeY9UqSNK2x9vyq6odDt3VV9R7gpWOs91Vg/WYWORX4WDWuAh6X5MnAi4DLq2p9G3iXAwvHqVWSpC0Z97DnM4ZuE0nezNb9FuB0DgLuGJpf27ZN1z5VbYuTrEyycnJycjuU1D7gkq/9ZPrGI48aa50nXXkt73zVy7ZbDaMWLPlCZ9se2J71H/PRY7bbtmbbdK/9+W++Ati+r9vA311x+NjL7oi/jVl3zgGzXQHQ/Luc/+YreNKV1852KSxY8oVHvFdpfOMG2DuHpjcCtwG/ut2rmYGqWgosBZiYmPC7h5KkLRp3tOdzO3r8dcDBQ/Pz27Z1wEkj7V/uqAZJUs+M+0vuv7O5/qp61wwffzlwVpKLaAa83F1V309yGfC/kxzYLncy8NYZPoYkSY+wNaM9f54mrABeDnwTuHlzKyW5kGYPbm6StTQjOPcEqKoP0nxZ/iXAGuB+4PVt3/okbweubjd1blVtbuCMJEljGzf85gPPqKofASQ5B/hCVb1mcytV1Wlb6C+ai2ZP1bcMWDZmfZIkjW3cy5s9EdgwNL+hbZMkaZcz7p7fx4BvJrmknX8F8NFuSpIkqVvjjvb8X0kuBX6xbXp9VX27u7IkSerOuIc9AfYF7qmq9wJrkxzWUU2SJHVq3Cu8nA38TzZ93WBP4C+7KkqSpC6Nu+f3S8ApwH0AVfU9YP+uipIkqUvjht+G9msJBZDkMd2VJElSt8YNv79K8iGaX114I/Al/GFbSdIuaoujPZME+BRwJHAP8DTgbVV1ece1SZLUiS2GX1VVkhVVdQzN7+pJkrRLG/ew57eS/HynlUiStIOMe4WXE4HXJLmNZsRnaHYKn95VYZIkdWWz4ZfkkKq6HXjRDqpHkqTObWnP73M0v+bw3SSfqapf2RFFSZLUpS2d88vQ9FO7LESSpB1lS+FX00xLkrTL2tJhz2OT3EOzB/jodho2DXh5bKfVSZLUgc2GX1XN2ZaNJ1kIvBeYA3y4qt4x0v9u4Lnt7L7AE6rqcW3fQ8D1bd/tVXXKttQiSdLAuF912GpJ5gDnAy8E1gJXJ1leVasHy1TVbw8t/9+A44c28UBVHddVfZKk/tqa3/PbWicAa6rqlqraAFwEnLqZ5U8DLuywHkmSgG7D7yDgjqH5tW3bT0lyKHAYcMVQ8z5JVia5KskrpnuQJIvb5VZOTk5uj7olSbu5LsNvaywCLq6qh4baDq2qCeC/Au9JcvhUK1bV0qqaqKqJefPm7YhaJUm7uC7Dbx1w8ND8/LZtKosYOeRZVeva+1uAL/PI84GSJM1Yl+F3NXBEksOS7EUTcMtHF0pyJHAg8I9DbQcm2budngs8C1g9uq4kSTPR2WjPqtqY5CzgMpqvOiyrqhuSnAusrKpBEC4CLmp/KX7gKOBDSR6mCeh3DI8SlSRpW3QWfgBVtQJYMdL2tpH5c6ZY7+vAMV3WJknqr51lwIskSTuM4SdJ6h3DT5LUO4afJKl3DD9JUu8YfpKk3jH8JEm9Y/hJknrH8JMk9Y7hJ0nqHcNPktQ7hp8kqXcMP0lS7xh+kqTeMfwkSb1j+EmSesfwkyT1Tqfhl2RhkpuSrEmyZIr+1yWZTHJteztjqO/0JDe3t9O7rFOS1C97dLXhJHOA84EXAmuBq5Msr6rVI4t+qqrOGln38cDZwARQwDXtund1Va8kqT+63PM7AVhTVbdU1QbgIuDUMdd9EXB5Va1vA+9yYGFHdUqSeqbL8DsIuGNofm3bNupXklyX5OIkB2/luiRZnGRlkpWTk5Pbo25J0m5utge8fB5YUFVPp9m7++jWbqCqllbVRFVNzJs3b7sXKEna/XQZfuuAg4fm57dtP1FVP6yqB9vZDwPPHHddSZJmqsvwuxo4IslhSfYCFgHLhxdI8uSh2VOAG9vpy4CTkxyY5EDg5LZNkqRt1tloz6ramOQsmtCaAyyrqhuSnAusrKrlwH9PcgqwEVgPvK5dd32St9MEKMC5VbW+q1olSf3SWfgBVNUKYMVI29uGpt8KvHWadZcBy7qsT5LUT7M94EWSpB3O8JMk9Y7hJ0nqHcNPktQ7hp8kqXcMP0lS7xh+kqTeMfwkSb1j+EmSesfwkyT1juEnSeodw0+S1DuGnySpdww/SVLvGH6SpN4x/CRJvWP4SZJ6p9PwS7IwyU1J1iRZMkX/7yRZneS6JH+X5NChvoeSXNvelndZpySpX/boasNJ5gDnAy8E1gJXJ1leVauHFvs2MFFV9yf5DeD/Aq9q+x6oquO6qk+S1F9d7vmdAKypqluqagNwEXDq8AJVdWVV3d/OXgXM77AeSZKAbsPvIOCOofm1bdt03gBcOjS/T5KVSa5K8orpVkqyuF1u5eTk5LZVLEnqhc4Oe26NJK8BJoDnDDUfWlXrkjwVuCLJ9VX1ndF1q2opsBRgYmKidkjBkqRdWpd7fuuAg4fm57dtj5DkBcAfAqdU1YOD9qpa197fAnwZOL7DWiVJPdJl+F0NHJHksCR7AYuAR4zaTHI88CGa4LtzqP3AJHu303OBZwHDA2UkSZqxzg57VtXGJGcBlwFzgGVVdUOSc4GVVbUc+FNgP+DTSQBur6pTgKOADyV5mCag3zEySlSSpBnr9JxfVa0AVoy0vW1o+gXTrPd14Jgua5Mk9ZdXeJEk9Y7hJ0nqHcNPktQ7hp8kqXcMP0lS7xh+kqTeMfwkSb1j+EmSesfwkyT1juEnSeodw0+S1DuGnySpdww/SVLvGH6SpN4x/CRJvWP4SZJ6x/CTJPVOp+GXZGGSm5KsSbJkiv69k3yq7f9GkgVDfW9t229K8qIu65Qk9Utn4ZdkDnA+8GLgaOC0JEePLPYG4K6q+g/Au4E/adc9GlgE/CywEPizdnuSJG2zLvf8TgDWVNUtVbUBuAg4dWSZU4GPttMXA89Pkrb9oqp6sKpuBda025MkaZulqrrZcPJKYGFVndHOvxY4sarOGlpmVbvM2nb+O8CJwDnAVVX1l237R4BLq+riKR5nMbC4nX0acNNWlDm3vf9BOz1638e+naGG3aFvZ6hhd+jbGWrYVfvGdWhVzduK5XcPVdXJDXgl8OGh+dcC540sswqYPzT/HZp/vPOA1wy1fwR4ZQc1rgRWDqZH7/vYtzPUsDv07Qw17A59O0MNu2qft83fujzsuQ44eGh+fts25TJJ9gAOAH445rqSJM1Il+F3NXBEksOS7EUzgGX5yDLLgdPb6VcCV1Tz8WU5sKgdDXoYcATwzQ5rlST1yB5dbbiqNiY5C7gMmAMsq6obkpxLs1u+nOZw5seTrAHW0wQk7XJ/BawGNgJnVtVDHZS5dIrp0fs+9u0MNewOfTtDDbtD385Qw67Yp83obMCLJEk7K6/wIknqHcNPktQ7W3XOL8lC4L005/C+Azy37fq7tv184MnAXcB+wL405+weBB4DPNSuuwdQ7fxq4Ontdh4CHmjXpV0mI2VUezO4JWnXMXj/H227oZ3+WWBRVV2cZBnNxU4eT/N+/2Oa3LgP2J8mVza2bT8GHk2THf8C3F5Vp2ypmLEDZORyZU8HTgbOAA4ETgL+gmZE5u/TfMHyr4DvV9Wjga+0m5kLTAIXAJ8Bbmm3tZ7mqw2heXHua5/wvwP/1j5J2if5f9rlHgY+Dvyo7Xu4vb+9XffhdjsM9T/MI9VI/6BtW06E7uiTqJ60lQTdvxcM3pdHH/N7Q/Nrh9r/dWj5u4F7ad5nNwJ3An9K8/7/FOAa4JND27kA+NV2+fcCH2hvq9pl76LJjHXAIcB9VbVvVR03TvDB1u09/eRyZcCrgfuBg6rqPuCfgD2Be2j28C5qn+Dghbi8faxjgOuB97VP+Dtt/z9Wc5WXVcBeNJ8GHqYJwnvZFIj/3m5/8IJf2r4Qw25j097iviPPdfT5ZqR/0Da6t7k1tmXdXeHxJO2ctva9YKogm85DNEfwRh9jA8179sBThmpZ3d4XzXvxo9n0vv5Y4C3AlcDjgNfQfN3tnUneVFVfZVM+3E2TJT+gyYPtEvJbc9jzIOCOdvppNEF3UDt/D014nAN8sW3fF3hW238mTcGfBPYB/ogmuH6t7f9Gez94oYaDaL/2fhBkr2/vD6D5RPAz7fyj2nWfPTQ/7OG23wtkS9JPB9nmwnMOzSHIUXu3t4HB+24Bzxva7p4078GPau/3oQm1H7fbXk3znv4XwBuTfJFNWbC4Xe4f2+0d1T7mP9McAr0GeEyS22j2Nt9RVZ/bzHN5RKHby2k0u6u/BfwNzXf4PkBzhZbDgd+j2U19GfC1tq2ASvIUmk8Ng+/zDV6wvWk+XQz2Bje2df8rzaeGYQ/SXAR7qkOXj8Lgk6RxjZ4mGjZ4fx28Xw+WHVxTdKogHW07gGZHKcCRNO//Z9Hs0BzRLnMTzXcXP0HzCz830pwyOxP4ELAC+H80O0V30/w60HuSHL75p7Z14Td8ybGbaHZbB5cceyzNk38Dzbm++TRp/ETg12kuaP1dmsEwR9IcJr2M5hAl7XaPp3kx9qTZyxvUdgDNHuogvA6keeGfCDyfTS/+j9r+Q6ep33NjkjS+0Xx4cGg6I8sMxmXc3t5vYNN7bk2xzmAMxkaa0PoezWktaDLgXTRXCVsAvJlmXMhtNIdIq53/c5qQvIRmPMklNFnyZZo82aontzk/uVwZcCFNQK1N8hjg2PZJTNIMhFlEE1KPB14KTCZ5Js0x3h8Bz6QZIXpv+4L8MnBtW8/gpGjRvNjfabdL2/Zv7TqPbZffc+T53E/zIg5/yhiMGt3dA3B3f37qjz7+Lc/Wcx487uie3saR+b2Hlhs+Qjd83u+r7f3gMCdDy/47mwbCDO8h3kUz3mMFzYDJ59H8us/zad7/P0iTNwfT5MMeNIdNXwXcTDMI81aa7Lmd5nTb6i096a26wkuSlwDvodnDuhX4z+30dcAfAp9t538AzGunNwy9GA+xaUQnbHpxB+ceB58GBvPDX3UYnh4cO5Yk7bpG9wofpgnJS2m+SjfVeUaGlv0xTSDuRxOiPwDeU1Uf2fIj7wQ/LTFbN5pd6lXTze+gGu6dpv3LwMRI235D00uA906x3n7tfYA/A367w9r3ZdMHqEXAX0+xzNE0n+r2AD4P/NJs/7t3/O/5SuDjYy7b6d/bYPvT/U0M/+1N9/fGpgENn6f5FH7TGI97HvCGkbY5wD7t9OE0H573mupxt+U1nYV/7yn/DWk+nF8LHDHD7W7T+t62fOvswtbqxEuTvJUmSL4LvG6KZd6Y5HSaN5Zv05wU7sozgfOShOak869PscxbaN7sVtGMBN7iKKxdVZL30xyCecls1zJipn8T59A8l6fSnK7Ym2ZAwrSSXEPztaTfHenaF7gyyZ40IfybVbWh+dPZ7PZ21td0WkmOphnwd0lV3byj19d4vLC1JKl3PG8mSeodw0+S1DuGnySpdww/aUiSe0fmX5fkvBls5ylJLm6nT0ryN1MsM137y5J8O8k/JVmd5E0j/Z9LctXW1iRpE0d7Sh2oqu/RDNHfKu1oyKXACVW1NsneNMPpB/2Poxlle2+Sp1ZzoXlJW8k9P2lMSV6e5BvtXtmXkjyxbX9Okmvb27eT7J9kQZJVM3iY/Wk+lP4QoKoerKqbhvp/meY7dxfRfLdS0gwYftIjPXooyK4Fzh3q+3vgF6rqeJrw+f22/X8AZ1bVccAv0vyo5oxU1XpgOfDdJBcmeXWS4f+np9FcXvDCdlrSDHjYU3qkB9oQA5pzfsBEOzsf+FSSJ9N8YfzWtv0fgHcl+QTw2fZw5YwLqKozkhwDvIAmWF8IvK7d0zwC+PuqqiQ/TvJzVTWTPUyp19zzk8b3fuC8qjoGeBPNxXWpqncAZ9D8WOc/JDlyWx+oqq6vqnfTBN+vtM2/SnPB+Fvb3y5bgHt/0owYftL4DmDTz3idPmhMcngbVn9C8+snMw6/JPslOWmo6TiaS9lBE3QLq2pBVS2gGfjieT9pBgw/aXznAJ9ur1/5g6H2tyRZleQ6mqvMX7oV23x+krWDG83vkP1+kpvac45/THPIcwHNb1X+5CsOVXUrcHeSE7flSUl95LU9JUm9456fJKl3DD9JUu8YfpKk3jH8JEm9Y/hJknrH8JMk9Y7hJ0nqnf8PmgqXnw/dkF8AAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "6fpPLETAaLnG"
},
"source": [
"# len(svd_model.components_)\n",
"# len(svd_model.fit_transform(X))"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "V114t1Kmayzt"
},
"source": [
"### Topic Visualisation"
]
},
{
"cell_type": "code",
"metadata": {
"id": "6ljiX6QaUR_3",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 385
},
"outputId": "2e4aa8ce-56e7-414d-aaf0-77fec9f2727b"
},
"source": [
"import umap\n",
"\n",
"print(\"Loading Topic Visualisation...\")\n",
"t0 = time()\n",
"\n",
"topic_lsa = svd_model.fit_transform(X)\n",
"embedding = umap.UMAP(n_neighbors=3, min_dist=0.1, random_state=4).fit_transform(topic_lsa)\n",
"plt.figure(figsize=(7,5))\n",
"plt.scatter(embedding[:, 0], embedding[:, 1], c = None, s = 50 ,edgecolors=None)\n",
"plt.title('Topic LSA VIsualisation')\n",
"plt.xlabel('widht')\n",
"plt.ylabel('height')\n",
"plt.show()\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Topic Visualisation...\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAb4AAAFNCAYAAAB/iwpeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAfuklEQVR4nO3de3hcd3ng8e+LHJtaDpfE3oKTGLFllxZonAbZ0KeEcjElgLi0pU2WcHG6rAu90e1SujGUS9KHsttnKbvQfVhvyYUm4HAtWSfhkhIIPC1R5BAHsFmWixMSk8YxJYlkkCPx7h9zxlHkkTSyNHPOzPl+nmcezZxzdOadI82887tHZiJJUl08rOwAJEnqJhOfJKlWTHySpFox8UmSasXEJ0mqFROfJKlWTHyqrYh4f0T8edlxVFlEDEVERsSK4vG1EfGaDjxPR84rtWLiU0+IiPEZt59GxI9nPD7veM6Zma/LzIuOI5b9EbFljn3bI+J7RVx3RMSVLY7ZWiSTc+Z5jqdHxERErGmx76sR8Qezk1I3ZOYLMvOypZwjIt4eEZcv93mldpn41BMyc03zBtwOvHjGtivKjg+gKLG8CthSxDkM/EOLQ18D/BB49VznysyvAHcAL5/1HE8BngR8eJnClmrHxKeeFhGrIuI9EXGguL0nIlYV+55VlLq2R8Q9RUntvBm/e2lE/MWMxy+NiFsi4r6I+E5EnL3IcDYBn8nM7wBk5l2ZuWNWvI8DfhXYBjw/Ih4zz/ku49jk+Grgmsw8NPvgiHhhROyNiPsj4s6IeGOxfWtEfHnWsRkRTyjuv6goRd4XEd+PiLfPFVBEfCEiXlvcf0JEfDEi7i2u75Uzjvvvxbnui4jdEXFWsf1sYDtwTlEq3tPivA+LiLdExG0RcXdEfDAiHlnsa5ZyXxMRtxfP++Z5rqF0DBOfet2bgacDZwAbgc3AW2bsfwywFjiFRklrR0Q8cfZJImIz8EHgT4FHAc8E9i8ylq8Ar46IP42I4YgYaHHMq4GxzPw4sA+Yr5r274BnRsRpRYwPA15BIyG28gHgdzPzROApwOfbjHuiiOtRwIuA10fEy9r4vYuAzwKPBk4F3jtj3000/iYnAR8CPhoRD8/MTwPvBK4sSusbW5x3a3F7NvCvgTXA+2Yd8wzgicBzgbdGxC+0Ea8EmPjU+84DLszMuzPzIPAOGtWNM/15Zk5m5heBq4HfbnGefw9cnJmfy8yfZuadmfnNxQSSmZcDfwg8H/gicHdE/Nmsw15NIxFQ/JyvuvP7wBdmvJ7nAquK19DKA8CTIuIRmfkvmXlzm3F/ITO/VrzuW2lUo/5qG7/6APA4YH1m/iQzj5YqM/PyzDyUmVOZ+d+KuI/5wjGH84B3Z+Z3M3McuAA4d1Zb5jsy88eZuQfYQ+NLj9QWE5963XrgthmPbyu2Nf1LZk7Ms7/pNOA7Sw0mM6/IzC00Sk+vAy6KiOcDRMSvAI8HdhaHfwj4xYg4Y55TXsaDie9VwM7MfGCOY38TeCFwW1EF+cvtxBwRT4uI6yPiYETcW8S9to1ffRMQwGhEfCMifmfGOd8YEfuKatAfAY9s85zQ+m+6AvjZGdvumnH/MI1SodQWE5963QEapY6mDcW2pkdHxOA8+5u+D/zccgWVmQ9k5keBW2lUO0KjqjWAWyLiLuDGGdvn8gng1Ih4NvAbzF3NSWbelJkvBf4V8PfAR4pdE8Dq5nEt2hU/BFwFnJaZjwTeX8S50Gu8KzP/Q2auB34X+J9Fu99ZNJLibwOPzsxHAffOOOdCS8K0+ptOAf+8UExSO0x86nUfBt4SEesiYi3wVuDyWce8IyJWFh/II8BHW5znA8D5EfHconPFKRHx8/M87wkR8fAZtxVFJ5IXRcSJxTleADwZuDEiHk4jEWyj0fbVvP0h8Iq5hiQUpdWPAZcAt2XmWKvjitd3XkQ8sigR3gf8tNi9B3hyRJxRxPH2Wb9+IvDDzPxJ0db5inle98zn/K2IOLV4+C80EtpPi/NNAQeBFRHxVuARM371n4Ghos2ylQ8D/zEiHh+N4RzNNsGpduKSFmLiU6/7C2CMRsnqa8DNxbamu2h8KB8ArgBe16rtLjNHgfOBv6ZROvkiDy11zHYN8OMZt7fTSDbbaQy3+BHwX4HXF21fLyuO+2BRUrorM+8CLqZRjTdfD9LLilg+OM8x0KgK3R8R99GorjyveG3fAi4ErgP+H/DlWb/3e8CFEXE/jS8OH6E9m2gk9XEaJcY3ZOZ3gc8Anwa+RaOa8ic0StRNzS8ehyKiVTvkxTQ69twAfK/4/T9sMyZpQeFCtOpXEfEs4PLMPHWhYyXVhyU+SVKtmPgkSbViVackqVYs8UmSasXEJ0mqla4tZ7Ic1q5dm0NDQ2WHIUmqkN27d9+TmevaPb6nEt/Q0BBjYy3H70qSaioiblv4qAdZ1SlJqhUTnySpVkx8kqRaMfFJkmrFxCdJqhUTnySpVkodzhAR+4H7gWlgKjOHy4xHkrptfHKKXXsOsP/QBEMnDzKycT1rVvXUSLOeU4Wr++zMvKfsICSp227a/0O2XjJKJhw+Ms3qlQNcdPVeLj1/M5uGTio7vAX1atKufoSS1IfGJ6fYeskoE5PTR7cdPtK4v/WSUUa3b2Gwwkmkl5N22W18CXw2InZHxLZWB0TEtogYi4ixgwcPdjk8SeqMXXsOMNfiOJmw69YD3Q1oEWYm7WayPnxkmonJ6WL7VMkRzq/sxPeMzDwTeAHw+xHxzNkHZOaOzBzOzOF169qeik2SKm3/oYmjSWO2w0em2X/P4S5H1L6lJu3xySl2jt7Ou67dx87R2xnvcqIstRydmXcWP++OiE8Cm4EbyoxJkrph6ORBVq8caJn8Vq8cYGjt6hKias9SknYVqkhLK/FFxGBEnNi8D/wa8PWy4pGkbhrZuJ6I1vsiYOT09d0NaBGaSbuV+ZJ2VapIy6zq/FngyxGxBxgFrs7MT5cYjyR1zZpVK7j0/M0Mrho4mkRWrxxgcNVAsb26HVuON2lXpV2ztCubmd8FNpb1/JJUtk1DJzG6fQu7bj3A/nsOM7R2NSOnr6900oMHk/bsKssI5k3aVWnXrPbVlaQ+N7hqBeds2lB2GIt2PEm7Ku2aJj5J0nFZbNIe2biei67e23JfN9s1yx7OIEmqiaq0a1rikyR1TRXaNU18kqSuKrtd06pOSVKtmPgkSbVi4pMk1YqJT5JUKyY+SVKtmPgkSbVi4pMk1YqJT5JUKyY+SVKtmPgkSbXilGWqtfHJKXbtOcD+QxMMnTzIyMb1rKn4WmiSlsZ3uGrrpv0/PGYhzYuu3sul529m09BJZYcnqUOs6lQtjU9OsfWSUSYmp48uinn4yDQTk9PF9qmSI5TUKSY+1dKuPQfIbL0vE3bdeqC7AUnqGhOfamn/oYmjJb3ZDh+ZZv89h7sckaRuMfGploZOHjy6AvRsq1cOMLR2dZcjktQtJj7V0sjG9US03hcBI6ev725AkrrGxKfKG5+cYufo7bzr2n3sHL2d8WXoeLJm1QouPX8zg6sGjpb8Vq8cYHDVQLHdDs9Sv4qcq4W/goaHh3NsbKzsMNRFrYYcRLBsQw4mJqfYdesB9t9zmKG1qxk5fb1JT+oxEbE7M4fbPt7Ep6oan5ziae+8jonJYzuhDK4aYHT7FpOUpEUnvtKrOiNiICK+GhG7yo5F1eKQA0mdUHriA94A7Cs7CFWPQw4kdUKpiS8iTgVeBPxtmXGomhxyIKkTyi7xvQd4E/DTuQ6IiG0RMRYRYwcPHuxeZCqdQw4kdUJpiS8iRoC7M3P3fMdl5o7MHM7M4XXr1nUpOlWBQw7m1okhHlJdlNarMyL+EngVMAU8HHgE8InMfOVcv2OvznpyyMFDdXqIh9RrenI4Q0Q8C3hjZo7Md5yJT3XnEA/pWD03nEFS+xziIS1dJb4aZuYXgC+UHIZUeQ7xkJauEolP6pbxySl27TnA/kMTDJ08yMjG9azpoarB5hCPVsnPIR5Se3rnHS8tUatOIRddvbenOoWMbFzPRVfvbbnPIR5Se2zjUy2MT06x9ZJRJianj5aWDh+ZZmJyutjeG8MBHOIhLZ3vEtVCO51Cztm0obtBHadNQycxun2LQzyk4+Q7RbXQb51CBlet6JlELVWNVZ2qBef9lNRk4lMtOO+npCYTn2rBTiGSmny3qzbsFCIJTHyqmeXqFNLrA+GlOvOdKi1SPwyEl+rMNj5pEfplILxUZyY+aRFcHUHqfSY+aRH6bSC8VEcmPmkRHAgv9T4Tn7QIDoSXep+JT1oEB8JLvc93qbRIDoSXepvvVOk4uDqC1Lus6pQk1YqJT5JUKyY+SVKtmPgkSbVi4pMk1YqJT5JUK6Ulvoh4eESMRsSeiPhGRLyjrFgkSfVR5ji+SeA5mTkeEScAX46IazPzKyXGJEnqc6UlvsxMYLx4eEJxm2PBF6l/uHq7VK5S320RMQDsBp4A/E1m3tjimG3ANoANG5wpQ73N1dul8pXauSUzpzPzDOBUYHNEPKXFMTsyczgzh9etW9f9IKVl4urtUjVUoldnZv4IuB44u+xYpE5x9XapGsrs1bkuIh5V3P8Z4HnAN8uKR+o0V2+XqqHMNr7HApcV7XwPAz6SmbtKjEfqqObq7a2Sn6u3S91TWokvM2/NzF/KzNMz8ymZeWFZsUjd4OrtUjVUoo1PqgNXb5eqwXea1EWu3i6Vz3eb1GWu3i6Vy6pOSVKtmPgkSbVi4pMk1YptfFKfc1Js6aH875f6mJNiS8eyqlPqU06KLbVm4pP6lJNiS62Z+KQ+5aTYUmsmPqlPNSfFbsVJsVVnJj6pTzkpttSaiU/qU06KLbXmf77Ux5wUWzqW//1Sn3NSbOmhrOqUJNWKiU+SVCsmPklSrZj4JEm1YuKTJNWKiU+SVCsmPklSrZj4JEm1YuKTJNVKaYkvIk6LiOsjYm9EfCMi3lBWLJKk+ihzyrIp4D9l5s0RcSKwOyI+l5l7S4xJktTnSivxZeYPMvPm4v79wD7glLLikSTVQyXa+CJiCPgl4MYW+7ZFxFhEjB08eLDboUmS+kzpiS8i1gAfB/44M++bvT8zd2TmcGYOr1u3rvsBSpL6SqmJLyJOoJH0rsjMT5QZiySpHsrs1RnAB4B9mfnusuKQJNVLmSW+XwFeBTwnIm4pbi8sMR5JUg2UNpwhM78MRFnPL0mqp9I7t0iS1E0mPklSrZj4JEm10lbii4i/a2ebJElV126J78kzH0TEAPDU5Q9HkqTOmjfxRcQFEXE/cHpE3Ffc7gfuBj7VlQglSVpG8ya+zPzLzDwR+KvMfERxOzEzT87MC7oUoyRJy6atcXyZeUFEnAI8bubvZOYNnQpMkqROaCvxRcS7gHOBvcB0sTkBE58kqae0O3PLrwNPzMzJTgYjSVKntdur87vACZ0MRJKkbpi3xBcR76VRpXkYuCUi/gE4WurLzD/qbHiSJC2vhao6x4qfu4GrOhyLJEkdN2/iy8zLuhWIJEnd0G6vzq/RqPKc6V4aJcK/yMxDyx2YJEmd0G6vzmtpDGP4UPH4XGA1cBdwKfDiZY9MkqQOaDfxbcnMM2c8/lpE3JyZZ0bEKzsRmCRJndDucIaBiNjcfBARm4CB4uHUskclSVKHtFviey1wcUSsAQK4D3htRAwCf9mp4CRJWm7tztV5E/CLEfHI4vG9M3Z/pBOBSZLUCQsNYH9lZl4eEX8yazsAmfnuDsYmSdKyW6jEN1j8PLHTgUiS1A0LDWD/X8XPd3QnHEmSOqutXp0R8W8j4h8i4uvF49Mj4i2dDU2SpOXX7nCG/w1cADwAkJm30hjELklST2k38a3OzNFZ25Y8fi8iLo6Iu5slSUmSOq3dxHdPRPwcxXydEfFy4AfL8PyXAmcvw3kkSWpLuwPYfx/YAfx8RNwJfA84b6lPnpk3RMTQUs8jSVK72k18dwKXANcDJ9GYueU1wIUdiuuoiNgGbAPYsGFDp59OktTn2q3q/BSNFRgeAA4A48BEp4KaKTN3ZOZwZg6vW7euG08pSepj7Zb4Ts1M2+IkST2v3RLfP0bEL3Y0EkmSumChuTqbK6+vAM6PiO8CkzRWaMjMPH0pTx4RHwaeBayNiDuAt2XmB5ZyTkmS5rNQVedIJ588M/9dJ88vSdJsC83VeVu3ApEkqRvabeOTJKkvmPgkSbVi4pMk1YqJT5JUKyY+SVKtmPgkSbXS7pRlklRp45NT7NpzgP2HJhg6eZCRjetZs8qPOB3L/wpJPe+m/T9k6yWjZMLhI9OsXjnARVfv5dLzN7Np6KSyw1PFWNUpqaeNT06x9ZJRJianOXxkGmgkv4nJ6WL7VMkRqmpMfJJ62q49B8hsvS8Tdt16oLsBqfKs6pT0EL3WVrb/0MTRkt5sh49Ms/+ew12OSFVX3f9mSV3Xi21lQycPsnrlQMvkt3rlAENrV5cQlarMqk5JQO+2lY1sXE9E630RMHL6+u4GpMoz8UkCeretbM2qFVx6/mYGVw2weuUA0CjpDa4aKLZbsaWH8j9CEtDbbWWbhk5idPsWdt16gP33HGZo7WpGTl9v0lNL/ldIAnq/rWxw1QrO2bSh7DDUA6zqlATYVqb6MPFJXTQ+OcXO0dt517X72Dl6O+MV6jBiW5nqInKu1uwKGh4ezrGxsbLDkI5Lq6ECEVRuqMDE5JRtZeopEbE7M4fbPt7EJ3Xe+OQUT3vndUxMHtt+NrhqgNHtW0wu0nFabOKzqlPqgl4dKiD1IxOf1AW9PFRA6jcmPqkLmkMFWumFoQJSPzHxSV3gUAGpOkpNfBFxdkT834j4dkT85zJjkTrJoQJSdZT2bouIAeBvgOcBdwA3RcRVmbm3rJikTnJaLakaynzHbQa+nZnfBYiIncBLAROf+pbTaknlKzPxnQJ8f8bjO4CnzT4oIrYB2wA2bPADQ9Ly6bVFd7U8Kv8XzswdwA5oDGAvORxJfaIXF93V8iizc8udwGkzHp9abJOkjurVRXe1PMpMfDcB/yYiHh8RK4FzgatKjEdSTTiTTr2VVtWZmVMR8QfAZ4AB4OLM/EZZ8UiqD2fSqbdS2/gy8xrgmjJjkKrODhjLr9cX3dXS+O6RKswOGJ0xsnE9F13deuSUM+n0P6cskyrKDhid40w69eZfV6qodjpgOBj++DmTTn35F5Yqyg4YnedMOvVkVadUUS5lJHWGiU+qKJcykjrDxCdVlB0wpM7wnSNVWJkdMBw/qH4VOVe3sQoaHh7OsbGxssOQ+l6r8YMROH5QlRQRuzNzuN3jreqU9BCOH1S/M/GpFsYnp9g5ejvvunYfO0dvZ9wP7zk5gbP6nRX26ntO+7U4jh9Uv7PEp75mtd3iOX5Q/c7Ep75mtd3iOX5Q/c7Ep75mtd3iOX5Q/c7/YPU11107Pk7grH7mf7H6WpnrrvX6AHAncFa/6p13oXQcmtV2cw3G7lQJxp6kUnU5c4tqYWJyqmvVduOTUzztndcxMXls9ergqgFGt2+xylBaRouducV3n2qhm9V2LiArVZu9OqVlZk9SqdpMfNIycwC4VG0mPmmZOQBcqjYTn7TMHAAuVZvvQKkDHAAuVVcp78KI+C3g7cAvAJsz0zEK6jsOAJeqqayqzq8DvwHcUNLzS5JqqpQSX2buA4i5egBIktQhdm6RJNVKx0p8EXEd8JgWu96cmZ9axHm2AdsANmywvUSStDQdS3yZuWWZzrMD2AGNuTqX45ySpPqyqlOSVCtlDWf4deC9wDrg6oi4JTOfX0YsUl30+vqA0nIpq1fnJ4FPlvHcUh25PqD0IKs6pT43PjnF1ktGmZicPrpqxOEj00xMThfbp0qOUOouE5/U59pZH1CqExOf1OdcH1B6KFu2pT7XXB+wVfJzfcBqsiNSZ3klpT43snE9F129t+U+1wesHjsidZ5VnVKfc33A3mFHpO7wP16qAdcH7A3tdERyqaul879eqgnXB6w+OyJ1h1WdklQRzY5IrdgRafmY+CSpIkY2rmeuZUrtiLR8THySVBF2ROoOr6IktVDWWDo7InVe5FxdiCpoeHg4x8bGyg5DUp9rNZYuAsfSVVRE7M7M4XaPt6pTkmZwLN2Dxien2Dl6O++6dh87R29nvE9eu2VnSZrBsXQN/TyDjCU+SZrBsXT9X+o18UnSDI6l6/+lrEx8kjRDr4yl62T7W7+Xem3jk6QZmmPp5urVWYVhBZ1uf+v3pawcziBJLUxMTlVyLN345BRPe+d1TEwem5QGVw0wun3LkuPsxnMsp8UOZ6hO5JJUIVWd1LsbvU57odS7FL0dvSTVTLfa3/p5BpnefwWSVCPdbH+raql3qezVKUk9pFd6nVaZiU+SeogrOCxdKVcoIv4KeDFwBPgOcH5m/qiMWCSp1/Rz+1s3lHWVPgdckJlTEfFfgAuAPyspFknqOZ1sfytrSaZuKeWVZOZnZzz8CvDyMuKQJD1UP09O3VSFNr7fAa4tO4h29esyHZLU75NTN3WsxBcR1wGPabHrzZn5qeKYNwNTwBXznGcbsA1gw4Zyu9XW4ZuQpPqqy5JMHUt8mbllvv0RsRUYAZ6b88yblpk7gB3QmLJsOWNcjJnfhJqa34i2XjJauSl8JGmx+n1y6qZSqjoj4mzgTcBLMrMnrmS/L9MhSXVZkqmsNr73AScCn4uIWyLi/SXF0ba6fBOSVF91GRxfVq/OJ5TxvEvR78t0SFK/T07d1B+vok1LGZsysnE9F129t+W+fvomJKne6jA4vjbr8bXqkdn8FtNuj8zlOIckaXktdj2+WiS+5VxUsaqLU0pSXbkQbQvLOTalX5fpkKS6qMLMLR1nj0xJUlMtEl9dxqZIkhZWi8RXl7EpkqSF1SLxuXCjJKmpNp/4dRibIklaWK0+9e2RKUmqRVWnJElNJj5JUq2Y+CRJtWLikyTViolPklQrJj5JUq2Y+CRJtdJTyxJFxEHgtlmb1wL3lBDOcunl+Hs5djD+shl/ufop/sdl5rp2f7GnEl8rETG2mHWYqqaX4+/l2MH4y2b85apz/FZ1SpJqxcQnSaqVfkh8O8oOYIl6Of5ejh2Mv2zGX67axt/zbXySJC1GP5T4JElqW08kvoi4OCLujoivz7E/IuJ/RMS3I+LWiDiz2zHOp434nxUR90bELcXtrd2OcS4RcVpEXB8ReyPiGxHxhhbHVPb6txl/la//wyNiNCL2FPG/o8UxqyLiyuL63xgRQ92PtLU2498aEQdnXP/XlhHrfCJiICK+GhG7Wuyr7PVvWiD+Sl//iNgfEV8rYhtrsX/xnz+ZWfkb8EzgTODrc+x/IXAtEMDTgRvLjnmR8T8L2FV2nHPE9ljgzOL+icC3gCf1yvVvM/4qX/8A1hT3TwBuBJ4+65jfA95f3D8XuLLsuBcZ/1bgfWXHusDr+BPgQ63+T6p8/duMv9LXH9gPrJ1n/6I/f3qixJeZNwA/nOeQlwIfzIavAI+KiMd2J7qFtRF/ZWXmDzLz5uL+/cA+4JRZh1X2+rcZf2UV13S8eHhCcZvdMP9S4LLi/seA50ZEdCnEebUZf6VFxKnAi4C/neOQyl5/aCv+Xrfoz5+eSHxtOAX4/ozHd9BDH26FXy6qg66NiCeXHUwrRRXOL9H41j5TT1z/eeKHCl//oprqFuBu4HOZOef1z8wp4F7g5O5GObc24gf4zaKa6mMRcVqXQ1zIe4A3AT+dY3+lrz8Lxw/Vvv4JfDYidkfEthb7F/350y+Jr9fdTGPKnY3Ae4G/LzmeY0TEGuDjwB9n5n1lx7NYC8Rf6eufmdOZeQZwKrA5Ip5SdkyL0Ub8/wcYyszTgc/xYOmpdBExAtydmbvLjuV4tBl/Za9/4RmZeSbwAuD3I+KZSz1hvyS+O4GZ31JOLbb1hMy8r1kdlJnXACdExNqSwzoqIk6gkTSuyMxPtDik0td/ofirfv2bMvNHwPXA2bN2Hb3+EbECeCRwqLvRLWyu+DPzUGZOFg//Fnhqt2Obx68AL4mI/cBO4DkRcfmsY6p8/ReMv+LXn8y8s/h5N/BJYPOsQxb9+dMvie8q4NVF756nA/dm5g/KDqpdEfGYZptARGym8XepxBuniOsDwL7MfPcch1X2+rcTf8Wv/7qIeFRx/2eA5wHfnHXYVcBrivsvBz6fRat/2dqJf1Z7zEtotMNWQmZekJmnZuYQjY4rn8/MV846rLLXv534q3z9I2IwIk5s3gd+DZjdO37Rnz8rOhLtMouID9Poebc2Iu4A3kajkZzMfD9wDY2ePd8GDgPnlxNpa23E/3Lg9RExBfwYOLcqbxwa3xhfBXytaKcB2A5sgJ64/u3EX+Xr/1jgsogYoJGQP5KZuyLiQmAsM6+ikdj/LiK+TaMT1bnlhXuMduL/o4h4CTBFI/6tpUXbph66/i310PX/WeCTxffSFcCHMvPTEfE6OP7PH2dukSTVSr9UdUqS1BYTnySpVkx8kqRaMfFJkmrFxCdJqhUTn1RxEXFNcyzcrO1vj4g3Fve/EBHDLY45IyJe2I04pV5h4pMqLjNfWMx6cjzOoDHGSVLBxCeVLCL+NCL+qLj/1xHx+eL+cyLiimisR7a22PbmiPhWRHwZeOKsU/1WNNa++1ZEnBURK4ELgXOisZbZOd18XVJVmfik8n0JOKu4PwysKeYXPQu4oXlQRDyVxqwgzVLcplnnWZGZm4E/Bt6WmUeAt9JYH+6MzLyysy9D6g0mPql8u4GnRsQjgEngn2gkwLNoJMWms4BPZubhYoWJq2adpzkB925gqKMRSz2sJ+bqlPpZZj4QEd+jMUfiPwK3As8GnsDiJgxuzrA/je9taU6W+KRq+BLwRhpVm18CXgd8ddZk2TcAL4uInylmrH9xG+e9HzhxuYOVepmJT6qGL9FYyeCfMvOfgZ/w0GpOMvNm4EpgD3AtcFMb570eeJKdW6QHuTqDJKlWLPFJkmrFxCdJqhUTnySpVkx8kqRaMfFJkmrFxCdJqhUTnySpVkx8kqRa+f/B/d/bNfBfugAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 504x360 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
},
{
"output_type": "stream",
"text": [
"done in 1.100s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "hSldhkxiEaXX",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "1a527386-394c-4076-8406-81d3770d014c"
},
"source": [
"plt.hist(topic_lsa)\n",
"plt.xlabel('topic LSA')\n",
"plt.ylabel('Frequency')\n",
"plt.show"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<function matplotlib.pyplot.show>"
]
},
"metadata": {
"tags": []
},
"execution_count": 49
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAU70lEQVR4nO3dfZBldX3n8fdH0CX4wEMYAR8nsiiwjgIZiVlF8SkhhBXNg0gFA7usBB+2YiWpZFZTm67dSsWsATZZSSJGSmSjMRofSEGywswomFJhwJGnWcUouAMjDCYRNFkR/O4f5zQ2Pd3Tt2f63Htnfu9XVdfce86593y6+/Znzj33nN9JVSFJasdjJh1AkjReFr8kNcbil6TGWPyS1BiLX5Ias++kA4zikEMOqdWrV086hiTtUW644Yb7qmrV/Ol7RPGvXr2aTZs2TTqGJO1Rkty50HR39UhSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGDFb8SZ6eZGOS25LcmuRX++kzSe5Ksrn/OmWoDJKkHQ15HP9DwK9X1Y1JngjckOSqft6FVfUHA65bkrSIwYq/qrYB2/rbDyTZAjx1qPVJkkYzln38SVYDxwFf6Ce9NclNSS5JctAijzk3yaYkm7Zv3z6OmBqz9RuOYP2GIyYdQ2rO4MWf5AnAXwFvq6r7gT8BjgCOpXtHcP5Cj6uqi6tqbVWtXbVqh6EmJEm7aNDiT/JYutL/86r6GEBV3VNVD1fVD4D3AicMmUGS9GhDHtUT4H3Alqq6YM70w+cs9lrglqEySJJ2NORRPS8C3gDcnGRzP+3twBlJjgUKuAP4lQEzSJLmGfKons8CWWDWlUOtU5K0NM/claTGWPyS1BiLX2N1/umn7tbj1284gsM2bl56QUmLsvglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+DW1LjpvA+effipb11076SjSXsXil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGmPxS1JjBiv+JE9PsjHJbUluTfKr/fSDk1yV5Pb+34OGyiBJ2tGQW/wPAb9eVccALwTekuQYYB2wvqqOBNb39yVJYzJY8VfVtqq6sb/9ALAFeCpwGnBpv9ilwGuGyiBJ2tFY9vEnWQ0cB3wBOLSqtvWzvgkcushjzk2yKcmm7du3jyOmJDVh8OJP8gTgr4C3VdX9c+dVVQG10OOq6uKqWltVa1etWjV0TElqxqDFn+SxdKX/51X1sX7yPUkO7+cfDtw7ZAZJ0qMNeVRPgPcBW6rqgjmzLgfO6m+fBXxyqAySpB3tO+Bzvwh4A3Bzks39tLcD7wT+Msk5wJ3A6wbMIEmaZ7Dir6rPAllk9iuGWq8kaec8c1eSGmPxS1JjhtzHL62YmZkZAE58yWRzSHsDt/glqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjfEELq241euuAOCOd/7sI9MuOm/DpOJImsctfklqjMUvSY2x+CWpMRa/JDXG4pekxlj8ktQYi1+SGmPxa3rMHADAlqOOZstRR084jLT3svglqTEWvyQ1xuKXpMZY/JLUGItfkhpj8UtSYyx+SWqMxS9JjRmp+JOsGTqIJGk8Rt3i/+Mk1yV5c5IDBk0kSRrUSMVfVScCvwQ8HbghyQeTvGrQZJKkQYy8j7+qbgd+G/gt4KXAHyX5P0l+bqhwkqSVN+o+/ucluRDYArwc+HdVdXR/+8JFHnNJknuT3DJn2kySu5Js7r9OWYHvQZK0DKNu8f9P4Ebg+VX1lqq6EaCq7qZ7F7CQ9wMnLzD9wqo6tv+6crmBJUm7Z98Rl/tZ4F+q6mGAJI8B9quqf66qyxZ6QFVdk2T1iqSUJK2YUbf4rwZ+ZM79/ftpu+KtSW7qdwUdtIvPIUnaRaMW/35V9Z3ZO/3t/XdhfX8CHAEcC2wDzl9swSTnJtmUZNP27dt3YVWSpIWMWvzfTXL87J0kPw78y3JXVlX3VNXDVfUD4L3ACTtZ9uKqWltVa1etWrXcVUmSFjHqPv63AR9JcjcQ4DDg9OWuLMnhVbWtv/ta4JadLS9JWnkjFX9VXZ/kKOA5/aQvV9X3d/aYJB8CTgIOSbIV+B3gpCTHAgXcAfzKLuaWJO2iUbf4AV4ArO4fc3wSquoDiy1cVWcsMPl9y4snSVppIxV/ksvoPpTdDDzcTy5g0eKXJE2nUbf41wLHVFUNGUaSNLxRj+q5he4DXUnSHm7ULf5DgNuSXAd8b3ZiVb16kFSSpMGMWvwzQ4aQJI3PqIdzfibJM4Ejq+rqJPsD+wwbTZI0hFGHZX4j8FHgPf2kpwKfGCqUJGk4o364+xbgRcD98MhFWZ48VChJ0nBGLf7vVdWDs3eS7Et3HL8kaQ8zavF/JsnbgR/pr7X7EeCvh4slSRrKqMW/DtgO3Ew3vs6VLH7lLUnSFBv1qJ7ZYZTfO2wc7Y22HHU0nHTRpGNI6o06Vs/XWWCfflU9a8UTSZIGtZyxembtB/wicPDKx5EkDW2kffxV9a05X3dV1f+guwC7JGkPM+qunuPn3H0M3TuA5YzlL0maEqOW99yLoj9Ed/Ws1614GknS4EY9qudlQweRJI3HqLt6fm1n86vqgpWJI0ka2qgncK0F3kQ3ONtTgfOA44En9l/SyLauu3aHaavXXTGBJFKbRt3H/zTg+Kp6ACDJDHBFVZ05VDBJ0jBG3eI/FHhwzv0H+2mSpD3MqFv8HwCuS/Lx/v5rgEuHiSRJGtKoR/X8bpK/AU7sJ/37qvricLEkSUMZdVcPwP7A/VX1h8DWJD82UCZJ0oBGvfTi7wC/BfznftJjgf81VChJ0nBG3eJ/LfBq4LsAVXU3HsYpSXukUYv/waoq+qGZkzx+uEiSpCGNWvx/meQ9wIFJ3ghcjRdl0W6YmZmZdASpWUse1ZMkwIeBo4D7gecA/6Wqrho4myRpAEsWf1VVkiurag1g2UvSHm7UXT03JnnBoEkkSWMx6pm7PwGcmeQOuiN7Qvdm4HlDBZMkDWOnxZ/kGVX1DeCnl/vESS4BTgXurarn9tMOpvu8YDX9xVyq6h+X+9ySpF231K6eTwBU1Z3ABVV159yvJR77fuDkedPWAeur6khgfX9fkjRGSxV/5tx+1nKeuKquAf5h3uTT+OHgbpfSDfYmSRqjpYq/Frm9qw6tqm397W+yk6Gdk5ybZFOSTdu3b1+BVWsS1ly6ZtIRJM2zVPE/P8n9SR4Antffvj/JA0nu350Vzz0TeJH5F1fV2qpau2rVqt1ZlSRpjp1+uFtV+6zw+u5JcnhVbUtyOHDvCj+/JGkJyxmWeSVcDpzV3z4L+OSY1y9JzRus+JN8CPgc8JwkW5OcA7wTeFWS24FX9vclSWM06glcy1ZVZywy6xVDrVOStLRx7+qRJE2YxS9JjbH4NZyZA5b9EI/7l4Zn8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPyS1BiLX5IaY/Fr4g7buHmi61697oqJrV+aBItfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGWPzagRdDkfZuFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqjMUvSY2x+MVhGzdPdEz8QcwcMOkETfGaBnsWi1+SGmPxS1JjLH5JaozFL0mN2XcSK01yB/AA8DDwUFWtnUQOSWrRRIq/97Kqum+C65ekJrmrR5IaM6niL+BTSW5Icu5CCyQ5N8mmJJu2b98+5nh7v63rrp10hKmz5tI1K3MtAs8h0JSbVPG/uKqOB34GeEuSl8xfoKourqq1VbV21apV408oSXupiRR/Vd3V/3sv8HHghEnkkKQWjb34kzw+yRNnbwM/Bdwy7hyS1KpJHNVzKPDxJLPr/2BV/e0EckhSk8Ze/FX1NeD5416vJKnj4ZyS1BiLX5IaY/FLUmMs/obNzMxMOsLYbV137cLf9wqfdLUiJ4JJA7H4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmMsfklqTHPFv+Wooyey3jWXrlnWumeXH3vevegiIrt6LP1CP/OVOudh8HMnZg5Y9mttJdetPUNzxS9JrbP4JakxFr8kNcbil6TGWPyS1BiLX5IaY/FLUmOaLP6Lztsw6QjATsaGH8CWo45e8vteve6KsWQZ0up1Vzzq+5j9vqfld76USZ1nsjc6//RTJx1hajVZ/JLUMotfkhpj8UtSYyx+SWqMxS9JjbH4JakxFr8kNcbil6TGNFX8i12YY/YkqsM2buawjZsXPJHpovM2cP7ppz5y0tXMzAzrNxzBYRs3r0i2R6173gUtFlr3cuzqBUnm25V164e2rrv2UffnvtbWXLrmkd/T3N/3/Mcsx9wT10Y5WXDuSX67uu6FXmujft/w6JOududvbMkT9mYOGOuFY9ZvOGJs6xpFU8UvSbL4Jak5Fr8kNcbil6TGTKT4k5yc5MtJvppk3SQySFKrxl78SfYBLgJ+BjgGOCPJMePOIUmtmsQW/wnAV6vqa1X1IPAXwGkTyCFJTUpVjXeFyS8AJ1fVf+zvvwH4iap667zlzgXO7e8+B/jyAk93CHDfgHF3xzRnA/PtjmnOBtOdb5qzwd6X75lVtWr+xH1XLs/KqqqLgYt3tkySTVW1dkyRlmWas4H5dsc0Z4PpzjfN2aCdfJPY1XMX8PQ595/WT5MkjcEkiv964MgkP5bkccDrgcsnkEOSmjT2XT1V9VCStwL/G9gHuKSqbt3Fp9vprqAJm+ZsYL7dMc3ZYLrzTXM2aCTf2D/clSRNlmfuSlJjLH5JasweVfxJDk5yVZLb+38PWmS5ZyT5VJItSW5Lsnqa8vXLPinJ1iTvnpZsSY5N8rkktya5KcnpA2fa6dAdSf5Vkg/3878wrt/jMvL9Wv/6uinJ+iTPnKZ8c5b7+SSVZGyHKY6SLcnr+p/frUk+OK5so+TrO2Rjki/2v99TxpjtkiT3JrllkflJ8kd99puSHL/slVTVHvMF/HdgXX97HfD7iyz3aeBV/e0nAPtPU75+/h8CHwTePS3ZgGcDR/a3nwJsAw4cKM8+wN8DzwIeB3wJOGbeMm8G/rS//Xrgw2N8rY2S72Wzry3gTdOWr1/uicA1wOeBtdOSDTgS+CJwUH//ydP0s6P7EPVN/e1jgDvGmO8lwPHALYvMPwX4GyDAC4EvLHcde9QWP93QDpf2ty8FXjN/gX7cn32r6iqAqvpOVf3ztOQDSPLjwKHAp8aUC0bIVlVfqarb+9t3A/cCO5z1t0JGGbpjbuaPAq9IkoHyLDtfVW2c89r6PN05KeMy6tAn/w34feD/TVm2NwIXVdU/AlTVvVOWr4An9bcPAO4eV7iqugb4h50schrwgep8HjgwyeHLWceeVvyHVtW2/vY36cpzvmcD/5TkY/3btHf1A8NNRb4kjwHOB35jTJlmjfKze0SSE+i2hv5+oDxPBf7vnPtb+2kLLlNVDwHfBn50oDzzjZJvrnPotsLGZcl8/S6Ap1fVjtcSHdYoP7tnA89O8ndJPp/k5LGlGy3fDHBmkq3AlcB/Gk+0kSz3tbmDqRuyIcnVwGELzHrH3DtVVUkWOhZ1X+BE4DjgG8CHgbOB901JvjcDV1bV1pXeeF2BbLPPczhwGXBWVf1gRUPuhZKcCawFXjrpLLP6DYwL6F7702hfut09J9G9U7omyZqq+qeJpvqhM4D3V9X5SX4SuCzJc/eWv4epK/6qeuVi85Lck+TwqtrWl9NCbw+3Apur6mv9Yz5Btx9sRYp/BfL9JHBikjfTff7wuCTfqardvi7BCmQjyZOAK4B39G8jhzLK0B2zy2xNsi/dW+5vDZhpoXXPWnBokSSvpPuP9aVV9b0xZYOl8z0ReC7w6X4D4zDg8iSvrqpNE84G3d/pF6rq+8DXk3yF7j+C6wfONmq+c4CTAarqc0n2oxsgbZy7pBaz28Pe7Gm7ei4HzupvnwV8coFlrqfb5zW7b/rlwG1jyAYj5KuqX6qqZ1TVarrdPR9YidJfiWzphtD4eJ/powPnGWXojrmZfwHYUP2nW2OwZL4kxwHvAV495n3US+arqm9X1SFVtbp/rX2+zzl06S+ZrfcJuq19khxCt+vna2PINmq+bwCv6PMdDewHbB9TvqVcDvxyf3TPC4Fvz9mNO5pxfVK9El90+3fXA7cDVwMH99PXAn82Z7lXATcBNwPvBx43TfnmLH824zuqZ8lswJnA94HNc76OHTDTKcBX6D5HeEc/7b/SFRR0f2wfAb4KXAc8a8yvt6XyXQ3cM+dndfk05Zu37KcZ01E9I/7sQrcr6rb+7/T10/SzozuS5+/ojvjZDPzUGLN9iO6Iuu/TvTM6BzgPOG/Oz+6iPvvNu/J7dcgGSWrMnrarR5K0myx+SWqMxS9JjbH4JakxFr8kNcbi114vyYH9CXO78xxXJjlwxGXPzgKjrib5D0lu7kdUvCXJafPmb07yF7uTUxrF1J25Kw3gQLqhMv54V5+gqnZrWN4kT6M7w/f4qvp2kicwZwC8/iShfejO6n58VX13d9Yn7Yxb/GrBO4Ej+i3qd/VnPL6r3+q+Of11B5KclOSaJFf0Y7X/aT/mDUnu6M8wJckv91vtX0py2YgZngw8AHwHHhk19utz5p9BNz7Sp1h4lE1pxbjFrxasA55bVcdCd2ES4Fjg+XTjr1yf5Jp+2RPoztq8E/hb4OfohoSmf+y/AX4b+LdVdV+Sg0fM8CW6s3y/nmQ98LGq+us580+nO+P8KLqRIMd6YRK1xS1+tejFwIeq6uGqugf4DPCCft511Y3T/jDdqfMvnvfYlwMfqar7AKpqZ+OmP6J/vpPpxhz6CnBhkhmAdFfGuq+qvkE3rMZxy/gPRVo2i196tPljmKzYmCbVua6qfo9uYLCf72edARyV5A668VeeNGeetOIsfrXgAbphimddC5yeZJ9+FNeX0A0CB3BCP2rjY+h2v3x23nNtAH4xyY9Cdy3jUQIkeUoefW3UY4E7+/W8DlhTPxxJ8zS6/wykQbiPX3u9qvpWf6WnW+iukvWbdNdF+BLdFv1vVtU3kxxFN2Tvu4F/DWykG6Z67nPdmuR3gc8keZjuurFnL7Das5PMvbzli4A/SPIUussgbqcbcfFE4K7qLnU56xrgmNnrJ+zmty/twNE5pV6Sk4DfqKpTJ51FGpK7eiSpMW7xS1Jj3OKXpMZY/JLUGItfkhpj8UtSYyx+SWrM/wfwrxpgXn9yGQAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nEOR43MHzHI-"
},
"source": [
"## Kmeans Clustering LSA"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ETHkUvQyRh0M"
},
"source": [
"import numpy as np\n",
"XLSA = np.array(svd_model.components_)\n",
"yLSA = np.array(topic_lsa)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "OVUmDmkKhr_k",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "f35e564c-992c-48c4-fb32-269843b3044d"
},
"source": [
"print(\"Loading Data X & Y...\")\n",
"t0 = time()\n",
"\n",
"print(\"X\", XLSA, \"\\n\")\n",
"print(\"Y\", yLSA, \"\\n\")\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Data X & Y...\n",
"X [[ 3.13451948e-03 6.88350778e-03 1.04130496e-02 ... 2.06247695e-01\n",
" 1.24442311e-02 4.62193987e-03]\n",
" [ 2.56435442e-02 5.55499806e-02 1.09247139e-02 ... -1.31262699e-02\n",
" 3.05536061e-02 3.69511648e-02]\n",
" [ 3.57652406e-02 7.73560119e-02 1.50072894e-02 ... -1.74403555e-02\n",
" 4.35789082e-02 5.14008936e-02]\n",
" ...\n",
" [ 1.07476301e-02 4.97056991e-03 -2.30400615e-03 ... 7.34066690e-03\n",
" 3.96725368e-04 -5.14670331e-03]\n",
" [-2.57774455e-01 6.78116362e-02 2.15504328e-03 ... 3.33721650e-02\n",
" 5.89974262e-04 3.34185816e-01]\n",
" [-1.20527608e-15 3.17549545e-16 -9.55736860e-16 ... 1.11292670e-16\n",
" -1.05371133e-16 1.77516083e-15]] \n",
"\n",
"Y [[ 2.00064442e-02 2.84444564e-01 4.68530783e-01 4.28259093e-01\n",
" -3.00672654e-02 0.00000000e+00 5.28505783e-02 -9.54195664e-02\n",
" -0.00000000e+00 8.27814522e-02 2.25283942e-01 1.56128209e-01\n",
" 8.41971234e-02 4.18000601e-01 0.00000000e+00 0.00000000e+00\n",
" 2.23612512e-15 -2.26583577e-02 -9.22909278e-02 -0.00000000e+00]\n",
" [ 6.37540181e-01 -3.40101334e-02 -4.44276117e-02 3.47513234e-02\n",
" -5.15671634e-03 1.90916364e-16 -2.78485309e-02 1.02725344e-02\n",
" -1.01808371e-15 8.01557783e-02 1.06378250e-01 3.19836289e-02\n",
" 3.42842669e-02 -8.14814565e-02 5.19725396e-01 -3.50893769e-01\n",
" -3.64563300e-01 6.09124668e-03 2.62954293e-02 5.50149106e-15]\n",
" [ 6.37540181e-01 -3.40101334e-02 -4.44276117e-02 3.47513234e-02\n",
" -5.15671634e-03 2.92018328e-19 -2.78485309e-02 1.02725344e-02\n",
" -1.74875666e-15 8.01557783e-02 1.06378250e-01 3.19836289e-02\n",
" 3.42842669e-02 -8.14814565e-02 -2.91037370e-01 -4.44405303e-01\n",
" 4.93911429e-01 6.09124668e-03 2.62954293e-02 1.66513525e-16]\n",
" [ 6.37540181e-01 -3.40101334e-02 -4.44276117e-02 3.47513234e-02\n",
" -5.15671634e-03 -4.73376599e-17 -2.78485309e-02 1.02725344e-02\n",
" -2.93523822e-16 8.01557783e-02 1.06378250e-01 3.19836289e-02\n",
" 3.42842669e-02 -8.14814565e-02 2.86007970e-01 5.77450743e-01\n",
" 3.33012619e-01 6.09124668e-03 2.62954293e-02 -4.41796403e-15]\n",
" [ 6.84366965e-01 -4.42559036e-03 -1.67202841e-03 -2.09317162e-02\n",
" 4.82051295e-03 1.07246723e-16 -1.26137780e-01 1.67625277e-01\n",
" -1.04710198e-15 -5.52305882e-02 -2.43602586e-01 -1.66168409e-01\n",
" 9.82403684e-02 2.76195397e-01 4.43486118e-16 -1.41881723e-16\n",
" 9.37731071e-16 3.36919079e-03 -8.47181811e-02 -1.02455970e-16]\n",
" [ 6.82428353e-01 -2.44539147e-02 -3.10396059e-02 2.51216385e-03\n",
" 1.53033957e-03 8.84203866e-17 1.88833087e-01 -1.93134573e-01\n",
" 3.18031736e-15 -1.66883657e-01 -5.99403609e-02 6.80009271e-02\n",
" -2.04982803e-01 6.46687792e-03 2.28339870e-16 -7.99454656e-16\n",
" 1.23668893e-15 -2.59681853e-02 -1.56886392e-02 2.52892651e-15]\n",
" [ 1.03223612e-03 4.38436883e-01 -3.13618071e-01 1.05394265e-01\n",
" 7.70784697e-01 5.31210446e-16 -8.54225915e-03 4.59588538e-02\n",
" 2.52226884e-15 -1.42545947e-01 1.05770253e-01 -3.59472371e-02\n",
" 3.00969519e-02 -1.39244385e-02 -9.00634991e-16 7.47100871e-16\n",
" -2.93127057e-15 9.72011023e-02 -9.92061572e-04 -5.27043986e-17]\n",
" [ 6.37540181e-01 -3.40101334e-02 -4.44276117e-02 3.47513234e-02\n",
" -5.15671634e-03 -3.88926166e-16 -2.78485309e-02 1.02725344e-02\n",
" 3.88811723e-17 8.01557783e-02 1.06378250e-01 3.19836289e-02\n",
" 3.42842669e-02 -8.14814565e-02 -5.14695996e-01 2.17848329e-01\n",
" -4.62360748e-01 6.09124667e-03 2.62954293e-02 -3.57383771e-15]\n",
" [ 2.66411565e-02 3.14038633e-01 5.18603063e-01 4.94920472e-01\n",
" -3.74759150e-02 7.37627071e-18 -5.44476506e-02 7.39990676e-02\n",
" 2.97188383e-16 -3.38204701e-02 -9.64505397e-02 -8.09017630e-02\n",
" -1.50035868e-01 -3.05264153e-01 6.75152546e-16 -1.41112390e-15\n",
" -1.32509845e-15 7.10449282e-03 2.22773210e-02 1.27390586e-15]\n",
" [ 3.72560359e-03 4.15817124e-01 -2.43182924e-01 -3.75456731e-02\n",
" -1.43447982e-01 -1.05177915e-15 1.93634772e-01 7.76552404e-02\n",
" -1.00211993e-14 5.88173947e-01 -2.68125299e-01 6.16078066e-02\n",
" -2.89743054e-01 9.46613214e-02 4.88571464e-15 -3.43520263e-15\n",
" 1.37744887e-14 -4.22608124e-01 6.04929154e-03 1.15406172e-15]\n",
" [ 2.14242590e-03 6.70095394e-01 -4.64368578e-01 9.96535779e-02\n",
" 5.00289237e-01 4.13119314e-16 -1.86330759e-03 5.51983124e-03\n",
" -2.24365863e-16 2.15512158e-04 -3.84299063e-03 4.58548230e-03\n",
" -4.40073326e-03 4.12844235e-03 4.78251506e-16 -1.86258337e-16\n",
" 1.27140204e-15 -3.87202509e-02 4.40609160e-04 2.49577178e-18]\n",
" [ 2.61018770e-03 5.73985675e-01 -3.79081007e-01 5.79768277e-03\n",
" -5.35054208e-01 3.73106506e-16 -9.82256509e-02 -1.06466265e-01\n",
" 2.06826664e-15 -1.35602887e-01 5.66677926e-03 1.40182864e-02\n",
" 1.23110613e-01 -3.59699638e-02 -1.35715124e-15 1.17091458e-15\n",
" -4.18725446e-15 1.25946571e-01 -2.31969903e-03 -2.96185511e-16]\n",
" [ 2.61018770e-03 5.73985675e-01 -3.79081007e-01 5.79768277e-03\n",
" -5.35054208e-01 1.58409076e-16 -9.82256509e-02 -1.06466265e-01\n",
" 1.84386404e-15 -1.35602887e-01 5.66677926e-03 1.40182864e-02\n",
" 1.23110613e-01 -3.59699638e-02 -1.53970903e-15 9.40707942e-16\n",
" -4.03084504e-15 1.25946571e-01 -2.31969903e-03 -7.51870321e-16]\n",
" [ 2.41485675e-18 1.57584097e-16 -1.10227182e-16 4.70323267e-17\n",
" 2.87004526e-16 -4.14743069e-18 -1.45888572e-15 5.12210914e-16\n",
" 1.00000000e+00 1.50597285e-14 -5.57908079e-15 -1.36954100e-17\n",
" 1.45836765e-15 2.65050804e-16 3.18228661e-17 -9.98058037e-16\n",
" 4.53959240e-16 -4.99816024e-16 2.74502397e-16 4.80866204e-16]\n",
" [ 2.66411565e-02 3.14038633e-01 5.18603063e-01 4.94920472e-01\n",
" -3.74759150e-02 -8.35394692e-17 -5.44476506e-02 7.39990676e-02\n",
" 6.68290916e-17 -3.38204701e-02 -9.64505397e-02 -8.09017630e-02\n",
" -1.50035868e-01 -3.05264153e-01 -1.84164592e-16 5.73072142e-16\n",
" -4.64946824e-16 7.10449282e-03 2.22773210e-02 -1.07662384e-15]\n",
" [ 3.82827725e-17 -1.21215946e-16 6.80088803e-17 -6.65569035e-18\n",
" -5.63777711e-16 8.22439938e-01 7.57154211e-16 -1.04095958e-17\n",
" -4.56491213e-16 -3.57156326e-16 -4.62970894e-16 7.22089293e-16\n",
" 1.57607839e-16 -1.79079039e-16 -3.14162697e-15 4.83566720e-15\n",
" 1.78449044e-15 9.57254822e-16 -2.88394505e-15 5.68851957e-01]\n",
" [-4.17283220e-18 -7.14481613e-17 4.45874110e-17 -4.10001694e-17\n",
" -6.25192481e-16 8.22439938e-01 2.17298644e-15 2.22097601e-15\n",
" 7.52902276e-16 1.21279260e-15 9.31462381e-16 -1.49801374e-15\n",
" -5.66420279e-16 2.93963031e-16 2.77148661e-15 -4.81181882e-15\n",
" -1.77712002e-15 -1.16181444e-15 2.73606206e-15 -5.68851957e-01]\n",
" [ 1.11176976e-01 2.28800945e-01 3.20850198e-01 -2.94404007e-01\n",
" 4.11745293e-02 1.46274144e-16 -2.47333237e-01 3.37107904e-01\n",
" -5.28916694e-16 -2.17832974e-01 -5.30210258e-01 -2.36526244e-01\n",
" 7.97507548e-02 1.95385269e-01 -4.06538210e-16 1.05973638e-15\n",
" -3.54373228e-16 9.51456582e-04 1.34356067e-03 -2.99625908e-16]\n",
" [ 2.64674893e-02 1.81496548e-01 2.48875473e-01 -4.45906786e-01\n",
" 8.21197165e-02 1.02800990e-15 -3.95980958e-01 -3.54647150e-01\n",
" -1.06060022e-15 1.39206836e-01 1.25262826e-01 -1.21386569e-01\n",
" -2.22894614e-01 -8.85956408e-02 6.62235360e-16 -1.04331062e-15\n",
" -9.42564177e-17 2.43616498e-02 -5.54828901e-01 -3.04125638e-15]\n",
" [ 2.59508034e-02 2.23131240e-01 1.80665913e-01 -3.80118845e-01\n",
" -2.18187304e-02 -1.74505851e-15 4.63258138e-01 3.12981477e-01\n",
" -4.16600061e-15 3.50898222e-01 3.92656366e-02 -8.33287777e-02\n",
" -1.11540529e-01 -4.80196739e-02 -5.84789391e-15 4.09324882e-15\n",
" -1.76868306e-14 5.50854115e-01 -7.63205147e-03 5.09250821e-16]\n",
" [ 4.22110730e-02 3.73046464e-01 5.61034974e-01 -4.95996095e-02\n",
" 2.91152938e-02 3.14591328e-17 1.33038005e-01 -1.71237289e-01\n",
" -3.99096517e-16 6.60606456e-02 1.70583528e-01 1.13392662e-01\n",
" 2.66314565e-01 3.16869284e-01 -5.49632930e-16 1.14899942e-15\n",
" -9.29893648e-17 6.59868636e-03 4.95563349e-02 -1.87500974e-16]\n",
" [ 9.61608610e-02 8.45627358e-02 1.14209396e-01 -1.78643912e-01\n",
" 2.86486847e-02 -1.89913516e-16 5.57746040e-01 -4.48151169e-01\n",
" 6.12958647e-15 -4.19851497e-01 -2.52225089e-01 5.11119792e-02\n",
" -2.78726890e-01 3.54806851e-02 2.65763701e-16 2.53700010e-16\n",
" 3.40512901e-16 -5.71157981e-03 5.07287110e-03 -2.08120520e-15]\n",
" [ 3.25866090e-02 2.08053838e-01 2.68389228e-01 -4.27084786e-01\n",
" 4.37711924e-02 -4.46857625e-16 3.32012832e-01 3.62106602e-02\n",
" 3.61987569e-16 -3.47763903e-02 5.06377455e-02 -8.09958937e-02\n",
" 5.42040541e-01 -3.54519784e-01 3.74188887e-15 -3.54204499e-15\n",
" 9.97124742e-15 -3.34069146e-01 -7.38726225e-02 7.76085194e-16]\n",
" [ 1.07126194e-02 1.68950715e-01 2.62021467e-02 -2.01175479e-01\n",
" -1.86428714e-01 -1.43752423e-15 9.24573198e-02 4.49044186e-01\n",
" 8.50838066e-15 -3.41985552e-01 5.53575355e-01 -2.58218009e-01\n",
" -3.50200957e-01 1.15289981e-01 2.75854577e-15 -1.73630000e-15\n",
" 7.99527847e-15 -2.37482152e-01 9.77312917e-03 1.59178553e-15]\n",
" [ 3.08273915e-02 2.06580425e-01 2.82528543e-01 -4.85485656e-01\n",
" 8.58421316e-02 1.23117444e-15 -3.65584050e-01 -3.09921272e-01\n",
" -1.32296015e-15 1.09779939e-01 9.48126089e-02 -7.88633579e-02\n",
" -1.38628487e-01 -2.26496156e-02 -9.06884806e-16 6.24582873e-16\n",
" -5.61065193e-16 -9.21496799e-03 5.68169511e-01 2.78469313e-15]\n",
" [ 2.04201868e-02 1.33345640e-01 1.70146649e-01 -3.16049574e-01\n",
" 3.04247046e-02 4.07714336e-16 -1.81514223e-01 3.29687899e-01\n",
" 2.23873437e-15 -1.71007813e-01 -4.89418164e-02 8.06530227e-01\n",
" -1.07796477e-01 -8.81987431e-02 1.07707437e-15 -5.05632089e-16\n",
" 9.18903503e-16 -1.00047415e-02 -3.83170917e-02 -1.20727254e-15]] \n",
"\n",
"done in 0.009s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "folq32qURbmr",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 105
},
"outputId": "ab9ef04b-5c55-4d44-90aa-00bea40c9514"
},
"source": [
"from sklearn.cluster import KMeans\n",
"\n",
"print(\"Loading Kmeans...\")\n",
"t0 = time()\n",
"kmeans = KMeans(n_clusters=2) # You want cluster the passenger records into 2: Survived or Not survived\n",
"print(kmeans.fit(XLSA))\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Kmeans...\n",
"KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n",
" n_clusters=2, n_init=10, n_jobs=None, precompute_distances='auto',\n",
" random_state=None, tol=0.0001, verbose=0)\n",
"done in 0.024s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "XRq4nyzmUOfm",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 70
},
"outputId": "28aeddad-cdf6-4bcf-95c0-0ca9675024a4"
},
"source": [
"print(\"Loading Data Correction...\")\n",
"t0 = time()\n",
"\n",
"correct = 0\n",
"for i in range(len(XLSA)):\n",
" predict_me = np.array(XLSA[i].astype(float))\n",
" predict_me = predict_me.reshape(-1, len(predict_me))\n",
" prediction = kmeans.predict(predict_me)\n",
" if prediction[0] == yLSA[i].all():\n",
" correct += 1\n",
"\n",
"print(correct/len(XLSA))\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Data Correction...\n",
"0.05\n",
"done in 0.016s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "xG82xx1EcN1Z",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 176
},
"outputId": "3394a4c1-1a68-4adb-8b0a-f0ca5c2ba57f"
},
"source": [
"from sklearn.preprocessing import MinMaxScaler\n",
"\n",
"\n",
"print(\"Loading X Scaled...\")\n",
"t0 = time()\n",
"\n",
"scaler = MinMaxScaler()\n",
"XLSA_scaled = scaler.fit_transform(XLSA)\n",
"\n",
"print(XLSA_scaled)\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading X Scaled...\n",
"[[0.86822798 0.7461405 0.5092903 ... 0.65772349 0.68882049 0.27693733]\n",
" [0.94313137 0.92144925 0.51132199 ... 0.46190276 0.76198481 0.3478676 ]\n",
" [0.97681338 1. 0.52753289 ... 0.45805185 0.81460879 0.37957029]\n",
" ...\n",
" [0.89356217 0.73924962 0.45879402 ... 0.48017225 0.64014695 0.25550494]\n",
" [0. 0.96561878 0.4764998 ... 0.50340886 0.6409277 1. ]\n",
" [0.85779723 0.72134439 0.46794265 ... 0.47361971 0.63854412 0.2667968 ]]\n",
"done in 0.002s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "746FC1O187aP",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 443
},
"outputId": "365b7496-70ff-47ab-e57a-bd56e14cf88e"
},
"source": [
"print(\"Loading Correction...\")\n",
"t0 = time()\n",
"\n",
"print(\"X\", XLSA, \"\\n\")\n",
"print(\"X scaled\", XLSA_scaled, \"\\n\")\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Correction...\n",
"X [[ 3.13451948e-03 6.88350778e-03 1.04130496e-02 ... 2.06247695e-01\n",
" 1.24442311e-02 4.62193987e-03]\n",
" [ 2.56435442e-02 5.55499806e-02 1.09247139e-02 ... -1.31262699e-02\n",
" 3.05536061e-02 3.69511648e-02]\n",
" [ 3.57652406e-02 7.73560119e-02 1.50072894e-02 ... -1.74403555e-02\n",
" 4.35789082e-02 5.14008936e-02]\n",
" ...\n",
" [ 1.07476301e-02 4.97056991e-03 -2.30400615e-03 ... 7.34066690e-03\n",
" 3.96725368e-04 -5.14670331e-03]\n",
" [-2.57774455e-01 6.78116362e-02 2.15504328e-03 ... 3.33721650e-02\n",
" 5.89974262e-04 3.34185816e-01]\n",
" [-1.20527608e-15 3.17549545e-16 -9.55736860e-16 ... 1.11292670e-16\n",
" -1.05371133e-16 1.77516083e-15]] \n",
"\n",
"X scaled [[0.86822798 0.7461405 0.5092903 ... 0.65772349 0.68882049 0.27693733]\n",
" [0.94313137 0.92144925 0.51132199 ... 0.46190276 0.76198481 0.3478676 ]\n",
" [0.97681338 1. 0.52753289 ... 0.45805185 0.81460879 0.37957029]\n",
" ...\n",
" [0.89356217 0.73924962 0.45879402 ... 0.48017225 0.64014695 0.25550494]\n",
" [0. 0.96561878 0.4764998 ... 0.50340886 0.6409277 1. ]\n",
" [0.85779723 0.72134439 0.46794265 ... 0.47361971 0.63854412 0.2667968 ]] \n",
"\n",
"done in 0.004s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "bUFeBFL6uzg-",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 385
},
"outputId": "cec6c7e9-e8c4-4dad-aa28-ff3517ba3f86"
},
"source": [
"import umap\n",
"\n",
"print(\"Loading Topic Visualisation...\")\n",
"t0 = time()\n",
"\n",
"kmeans_lsa = XLSA_scaled\n",
"embedding = umap.UMAP(n_neighbors=3, min_dist=0.1, random_state=4).fit_transform(kmeans_lsa)\n",
"plt.figure(figsize=(7,5))\n",
"plt.scatter(embedding[:, 0], embedding[:, 1], c = None, s = 50 ,edgecolors='Blue')\n",
"plt.title('Kmeans LSA Visualisation')\n",
"plt.xlabel('widht')\n",
"plt.ylabel('height')\n",
"plt.show()\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Topic Visualisation...\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAbYAAAFNCAYAAABsXEqqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAfxElEQVR4nO3df3xld13n8denyQyddkJJzICQbtvRjEUpUCV0EVNA2q6FwamyQkDYrTha8eE+EK3btYsPUdYVcFXorj92q1SKSL2IoOMGpH1Upcyj/EqhC63FTiB0JPTHDHfaZtop8+uzf5yTkkkzmcxM7j33nvt6Ph555Oace8/53JuZ8873fL/neyIzkSSpLk6pugBJklaTwSZJqhWDTZJUKwabJKlWDDZJUq0YbJKkWjHYpBqKiLMiYm9E9LVwHy+JiK8v+PnOiHhJC/bTku2qvgw2VSoivhYRFy/4+TURsSciXlxlXScqIt4bEb91lHWXRcTtEfFwROyOiH+IiI2LnrMxIg5HxB8vs49TI+LBiHjpEuveFREfysydmbk+Mw+d/Ltamcx8Vmb+08lsY6nPbzW2q95isKljRMTlwB8CmzPzE1XXs5oiYhR4H3AlcAawkeK9Lg6e/wjsASYi4klLbSszHwMa5XMX7qMPeC1w/aoWL3UZg00dISJ+Dvg94Ecy89Zy2TkRkRHxhoj417Il98aIeH5EfLFstfzBou38dETcVT734xFx9oJ115TbeTgibouICxes+42I+GBEvC8i5srTX2ML1v+XiJgt1/1LRFx0nG/xfGAmM2/Owlxm/nVm7lywj6AIq18DDgA/usz2rgf+fUSctmDZj1D8n/7Ygs+uv9z2T0XEV8v6ZyLidQve9/sX1LD4dW8oP8+58vU/d7SCFra+I+KCiJgqP+v7I+L3FzzvryLivoh4KCJuiYhnlcuvAF4HXFWeRv27Jbb7pIh4d0R8o/x69/wfAPOnRiPiyoh4ICLujYg3LP9rUR0ZbOoEPw+8DbgoM6eWWP9vgU3ABPBu4C3AxcCzgFfPn7aMiMuA/wq8EtgAfBK4YcF2PkcRMEPAB4C/iohTF6zfAvwl8BRgG/AH5XbPBf4T8PzMHKAIkK8d53v8PPDM8lThD0fE+iWeMw6cWdbwQeDyo22sDP97y/c67z8AH8jMgwufGxGnA/8TeFlZ/wuB21dY9wPAK4AnA28A3hURP7CC110DXJOZTwa+u3w/8z5G8ft8KsXn8hfle7q2fPw75WnUpYL9LcALKH6PzwUuoPhDYN53UrSIR4CtwB9GxODK3qrqwmBTJ7gE+DTwpaOs/2+Z+Vhm3gg8AtyQmQ9k5ixFeH1/+bw3Am/PzLvKg/tvA+fPt9oy8/2Z+c3MPJiZvwc8CTh3wX62Z+ZHy36pP6c4cEJxuvBJwPdFxJrM/FpmfuV43mBmfhV4CcUB94PA7rI/aWHAXQ58LDP3UATvpRHx1GU2+z7K05ER8WTgMo5+GvIwcF5ErMvMezPzzhXWPZmZXylbmZ8AbgQuPNbrKFqcoxExnJl7M/PTC7Z5Xdli/RbwG8BzI+KMldRD0aJ7W/n73wX8JkWgL9zv2zLzQGZ+FNjLkb9j9QCDTZ3g54HvAf60PB232P0LHu9b4uf5cDgbuKY8Rfkg0ASCIkyIiF8pT6s9VK4/AxhesK37Fjx+FDg1Ivozcxp4M8VB+IGI+MuIeMbxvsnM/HRmvjozN1CEw4soWiBExDrgVXy79fIpYCfwk8ts8s+BHy5r+QngK5n5hSX2+whFa/eNwL0RMRkRz1xJzRHxsoj4dEQ0y8/s5Rz5mR3NVorf6Zcj4nMR8Ypye30R8Y6I+EpEPMy3W74r2SbAM4B7Fvx8T7ls3jcXtVgf5dv/PtQjDDZ1gvuBiygO9n90Etv5V+DnMvMpC77WZeatZX/aVcCrgcHMfArwEEXwHVNmfiAzxynCM4F3nkSdZObngA8D55WLfpzidN8flf1P91EE8nKnI++haLG+nqLVctRBI5n58cy8BHg68GXgT8pVjwAL++m+c/5B2Xf118DvAk8rP7OPsoLPLDN3ZOZrKU43vhP4UHlK9CcpWpYXU/xhcc787uZfeoxNf4PidzDvrHKZ9DiDTR0hM79BEW6XRsS7TnAz/xu4esFghDMi4lXlugHgILAL6I+IX6cIkmOKiHMj4qXlgf4xilbi4WVe0hfFkPz5r7URMR4RPzt/arFsMW2hOAULRYBdBzybov/ofOCHKE7TPXuZfV1P0f/3Q5StvSXqf1oUlxqcDnyL4vTcfP23Ay+K4rq3M4CrF7x0LcUp2F3AwYh4GfDvlqll4T5fHxEbMvMw8GC5+DDF7+FbwDcpAvW3F730fuC7ltn0DcCvRcSGiBgGfh14/zLPVw8y2NQxyhGCLwV+IiLefgKv/whF6+Avy9NcdwAvK1d/HPh74G6K01ePUbTwVuJJwDuA3RSnK5/KkQGw2K9ShN/81z9QHNy3AF+KiL1lLR8BficiRihC/d2Zed+Cr9vK5x211UbRohoCbs7Me4/ynFOAX6Zo2TSBF1Oc/iUzb6K4dOCLwG3A/51/UWbOAW+i6BPcQ9Ha2rZMLQtdCtxZvtdrgNdk5j6KfsF7gFngn/l2sM97D0Vf5oMR8TdLbPe3gKmy3i9RDD5Z8rpB9a7wRqOSpDqxxSZJqhWDTZJUKwabJKlWDDZJUq0YbJKkWumvuoCVGB4eznPOOafqMiRJHeS2227bXc7kc4SuCLZzzjmHqaml5saVJPWqiLhnqeWeipQk1YrBJkmqFYNNklQrBpskqVYMNklSrRhskqRa6Yrh/pKk7jc3B40G7JhONo0GExMwMLD6+zHYJEktt307bN5yiLUjTQ4ONumfHOLKq4aY3NbH+Pjq7stgkyS11NxcEWqnXjLFuo27H19+yswwm7eMMbuzj/XrV29/9rFJklqq0YC1I80jQg1g3cbdrB1p0mis7v4MNklSS+2YTg4ONpdcd2CwyfR0rur+DDZJUkttGg369wwtuW7NniFGR2NV92ewSZJaamIC9s8OsW9m+Ijl+2aG2T87xMTE6u7PwSOSpJYaGIDJbX1s3jLG4ZEmBwabrNkzxP7ZYlTkag4cAYNNktQG4+Mwu7OPRmMD09PDjJbXsa12qIHBJklqk/XrYetWgNXtU1vMYJMkPa5ds4O0ksEmSQLaOztIKxlskqS2zw7SSg73lyS1fXaQVjLYJEltnx2klQw2SVLbZwdpJYNNktT22UFaycEjkqS2zw7SSgabJAlo7+wgrWSwSZIe167ZQVrJPjZJUq0YbJKkWjHYJEm1YrBJkmrFYJMk1YrBJkmqFYNNklQrBpskqVYMNklSrTjziKRKzM0V9wDbMZ1sKqduGhiouirVQctabBFxXUQ8EBF3LFj2PyLiyxHxxYj4SEQ8pVX7l9S5tm+HM88+xNXX7OLaW+/m6mt2cebZh9i+verKVAetPBX5XuDSRctuAs7LzOcAdwNXt3D/kjrQ3Bxs3nKIUy+Z4vTNn+WMF05z+ubPcuolU2zecoi9e6uuUN2uZcGWmbcAzUXLbszMg+WPnwbObNX+JXWmRgPWjjRZt3H3EcvXbdzN2pEmjUZFhak2qhw88tPAxyrcv6QK7JhODg42l1x3YLDJ9HS2uSLVTSXBFhFvAQ4Cf7HMc66IiKmImNq1a1f7ipPUUptGg/49Q0uuW7NniNHR7r1dijpD24MtIn4KeAXwusw86p9mmXltZo5l5tiGDRvaVp+k1pqYgP2zQ+ybGT5i+b6ZYfbPDjExUVFhqo22DvePiEuBq4AXZ+aj7dy3pM4wMACT2/rYvGWMwyNNDgw2WbNniP2zQ0xu6+u6uzWr87Qs2CLiBuAlwHBEfB14K8UoyCcBN0UEwKcz842tqkFSZxofh9mdfTQaG5ieHma0vI7NUNNqaFmwZeZrl1j8nlbtT1J3Wb8etm4FsE9Nq8uZR+QMEJJqxWDrcdu3FxfLrh1pcnCwSf/kEFdeVfR1jI9XXZ0kHT+DrYctnAFi4cWyp8wMs3nLGLM77ciX1H2c3b+HOQOEpDoy2HqYM0BIqiODrYc5A4SkOjLYepgzQEiqIweP9DBngFAv8HKW3hPLTNfYMcbGxnJqaqrqMmpr797iP/70dDoDhGrlCZezLPjDzctZul9E3JaZY4uX22KTM0ColrycpXfZxyaplrycpXcZbJJqyctZepfBJqmWvJyldxlskmrJy1l6l4NHJNWSl7P0LoNNUm15Q9PeZLBJqjUvZ+k99rFJkmrFYJMk1YrBJkmqFYNNklQrBpskqVYMNklSrRhskqRaMdgkSbVisEmSasVgkyTVisEmSaoVg02SVCtOgiytkrk5aDSKOzdvKmeRHxiouiqp9xhs0irYvh02bznE2pEmBweb9E8OceVVxX2/xserrk7qLQabdJLm5opQO/WSKdZt3P348lNmhtm8ZYzZnd7UUmon+9ikk9RowNqR5hGhBrBu427WjjRpNCoqTOpRtthUqTr0S+2YTg4ONpdcd2CwyfT0MN7kUmofg02VqUu/1KbRoH9yaMl1a/YMMTpqqEntFJlZdQ3HNDY2llNTU1WXoVU0Nwdnnv3Efql9M8M8dlN39UvV6b1I3SQibsvMscXLbbGpEsv1Sx0eadJobGDr1oqKO04DAzC5rY/NW8Y4PNLkwGCTNXuG2D9btD4NNam9DDZVom79UuPjMLuzj0ZjA9PTw4yW/YWGmtR+BpsqUcd+qfXrKVuZ3Ve7VCctG+4fEddFxAMRcceCZa+KiDsj4nBEPOG8qHrHxATsnx1i38zwEcv3zQyzf3aIiYmKCpPU9VrZYnsv8AfA+xYsuwN4JfB/WrhfdQH7pSS1SsuCLTNviYhzFi27CyDCUzWyX0pSa9jHpkrZLyVptXXslFoRcUVETEXE1K5du6ouR5LUJTo22DLz2swcy8yxDRs2VF2OJKlLdGywSZJ0Ilo53P8G4FPAuRHx9YjYGhE/HhFfB34QmIyIj7dq/5Kk3tTKUZGvPcqqj7Rqn5IkeSpSklQrBpskqVa8jq2H1eEmn5K0WM8HW68e3Otyk09JWqyng61XD+5zc8X7XnxjzFNmhtm8xRtjSupuPRtsvXxwr9NNPiVpsZ4dPLLcwX3tSJNGo6LC2uDYN/nMNlckSaunZ4Otlw/um0aD/j31usmnJM3r2WDr5YO7N/mUVGc928c2MQFXXjXEKTPDR5yO7IWDuzf5lFRnPRtsvX5w9yafkuqqZ4MNPLh7k09JddTTwQYe3CWpbnp28IgkqZ4MNklSrRhskqRaMdgkSbVisEmSasVgkyTVisEmSaoVg02SVCsGmySpVnp+5hHVw9xccY+9HdPJpnJqtIGBqquSVAWDbYV65cDZje9z+/bibuhrR5ocHGzSPznElVcVk1mPj1ddnaR2i8zOv6Hm2NhYTk1NVbb/Jxw4F9wFoE4Hzm58n3NzcObZhzj1kqkn3H7osZvGmN1Z/zs1SL0qIm7LzLHFy22xHcPcXHGwX3zgPGVmmM1b6nPg7Nb32WjA2pHmETUDrNu4m8MjTRqNDeUk15J6hYNHjmG5A+fakSaNRkWFrbJufZ87ppODg80l1x0YbDI93flnJCStrhUFW0T8+UqW1VGvHDi79X1uGg369wwtuW7NniFGR70dkdRrVtpie9bCHyKiD3je6pfTeXrlwNmt73NiAvbPDrFvZviI5ftmhtk/O8TEREWFSarMssEWEVdHxBzwnIh4uPyaAx4A/rYtFVasVw6c3fo+BwZgclsfj900xiOTF/DgraM8MnkBj900xuS2zuwXlNRaKxoVGRFvz8yr21DPkjppVOSBwSZrumC04Ino5ve5d2/RTzg9nYyWlykYalK9HW1U5IqH+0fECHA2C0ZSZuYtq1bhMqoONuidA2evvE9J3e+kgi0i3gG8Bvhn4FC5ODNzy6pWeRSdEGySpM5ystex/ThwbmZ+a3XLkiRpda10VORXgTWtLESSpNWwbIstIv4XkMCjwO0RcTPweKstM9/U2vIkSTo+xzoVOd+xdRuwrcW1SJJ00pYNtsy8vl2FSJK0GlY0eCQivkRxSnKhhyhadL+Vmd9c7cIkSToRKx088jFgEnhd+fV3FKF2H/DepV4QEddFxAMRcceCZUMRcVNE7Ci/D55U9ZIkLbLSYLs4M6/OzC+VX28BXpyZ7wTOOcpr3gtcumjZrwI3Z+Ym4ObyZ0mSVs1Kg60vIi6Y/yEing/0lT8eXOoF5awki6eLvwyY77e7HvixlZcqSdKxrfQC7Z8BrouI9UAADwM/ExGnA28/jv09LTPvLR/fBzztOF4rSdIxrSjYMvNzwLMj4ozy54cWrP7giew4MzMijjqfV0RcAVwBcNZZZ53ILiRJPehYF2i/PjPfHxG/vGg5AJn5+8e5v/sj4umZeW9EPJ3i9jdLysxrgWuhmCvyOPdTG3NzxaTEO6aTTeWkxAMDVVclSZ3rWC2208vvq3Uo3QZcDryj/N4T93Q7UQtvI3NwsEn/5BBXXtUdt5GRpKqs+LY1x73hiBuAlwDDwP3AW4G/oTh1eRZwD/DqzFw8wOQJenF2/7k5OPPsQ5x6yRTrNu5+fPm+mWEeu2mM2Z3eRFNSbzva7P4rGhUZEd8TETfPX5MWEc+JiF9b7jWZ+drMfHpmrsnMMzPzPZn5zcy8KDM3ZebFKwm1XtVowNqR5hGhBrBu427WjjRpNCoqTJI63EqH+/8JcDVwACAzv0hxfza1yI7p5ODg0rl/YLDJ9HTPdjtK0rJWOtz/tMz87PygkdKS1691om4cgLFpNOifHFpy3Zo9Q4yOxpLrJKnXrbTFtjsivptyvsiI+Ang3uVf0hm2by/6qq6+ZhfX3no3V1+zizPPPsT27VVXtryJCdg/O8S+meEjlu+bGWb/7BATExUVJkkdbqUttl+gGHr/zIiYBWYo5ozsaHNzxajCxQMwTpkZZvOWzh6AMTAAk9v62LxljMMjTQ4MNlmzZ4j9s8WoyE6tW5KqttJgmwX+DPhHYIhi5pHLgbe1qK5VsdwAjMMjTRqNDWzdWlFxKzA+DrM7+2g0NjA9PcxoeRq1E0OtG0/3SqqnlQbb3wIPAp8HvtG6clbXsQdgDFPMENa51q+nDN/OrdPr7SR1kpUG25mZuXim/o7nAIzW6+bTvZLqaaWDR26NiGe3tJIWcABG63m9naROc6y5IufvnN0PvCEivgp8i+K8WGbmc1pf4olzAEbr1eF0r6R6OdapyFe0pYoW6qYBGN3I072SOk3L5opcTb04V2S3cE5LSVU52lyRKx08Ii3J072SOo3BppPm6V5JncRg06rohuvtJPWGlQ73lySpKxhskqRaMdgkSbVisEmSasVgkyTVisEmSaoVg02SVCsGmySpVgw2SVKtGGySpFox2CRJtWKwSZJqxWCTJNWKwSZJqhWDTZJUKwabJKlWDDZJUq0YbJKkWjHYJEm1YrBJkmqlv+oCpBM1NweNBuyYTjaNBhMTMDBQdVWSqmawqStt3w6btxxi7UiTg4NN+ieHuPKqISa39TE+XnV1kqpksKnrzM0VoXbqJVOs27j78eWnzAyzecsYszv7WL++wgIlVco+NnWdRgPWjjSPCDWAdRt3s3akSaNRUWGSOoLBpq6zYzo5ONhcct2BwSbT09nmiiR1kkqCLSJ+MSLuiIg7I+LNVdSg7rVpNOjfM7TkujV7hhgdjTZXJKmTtD3YIuI84GeBC4DnAq+IiNF216HuNTEB+2eH2DczfMTyfTPD7J8dYmKiosIkdYQqBo98L/CZzHwUICI+AbwS+J0KalEXGhiAyW19bN4yxuGRJgcGm6zZM8T+2WJUpANHpN5WRbDdAfz3iPgOYB/wcmCqgjrUxcbHYXZnH43GBqanhxktr2Mz1CS1Pdgy866IeCdwI/AIcDtwaPHzIuIK4AqAs846q601qjusXw9btwLYpybp2yoZPJKZ78nM52Xmi4A9wN1LPOfazBzLzLENGza0v0hJUleq5ALtiHhqZj4QEWdR9K+9oIo6JEn1U9XMI39d9rEdAH4hMx+sqA5JUs1UEmyZeWEV+1XncSJjSavNuSJVGScyltQKBpsq0c0TGdvKlDqbc0WqEt06kfH27XDm2Ye4+ppdXHvr3Vx9zS7OPPsQ27dXXZmkebbYVIljT2Q8TKddn9bNrUypl9hiUyW6cSLjbm1lSr3GYFMlunEiY2+XI3UHT0WqEt04kfGm0aB/srtamVIviszO/ytzbGwsp6acJ7mO9u4tTvFNT2fHT2Q8N1cMHFncx7ZvZpjHbrKPTWq3iLgtM8cWL7fFpkp100TG3djKlHqRwSYdB2+XI3U+g006Tt3UypR6kaMiJUm1YrBJkmrFYJMk1YrBJkmqFYNNklQrBpskqVYMNklSrRhskqRaMdgkSbVisEmSasVgkyTVisEmSaoVg02SVCsGmySpVgw2SVKtGGySpFox2CRJtWKwSZJqxWCTJNWKwSZJqhWDTZJUKwabJKlWDDZJUq0YbJKkWjHYJEm1YrBJkmrFYJMk1YrBJkmqlUqCLSJ+KSLujIg7IuKGiDi1ijokSfXT9mCLiBHgTcBYZp4H9AGvaXcdkqR6qupUZD+wLiL6gdOAb1RUhySpZtoebJk5C/wusBO4F3goM29c/LyIuCIipiJiateuXe0uU5LUpao4FTkIXAZsBJ4BnB4Rr1/8vMy8NjPHMnNsw4YN7S5TktSlqjgVeTEwk5m7MvMA8GHghRXUIUmqoSqCbSfwgog4LSICuAi4q4I6JEk1VEUf22eADwGfB75U1nBtu+uQJNVTfxU7zcy3Am+tYt+SpHpz5hFJUq0YbJKkWjHYJEm1YrBJkmrFYJMk1YrBJkmqFYNNklQrBpskqVYMNklSrVQy80ivmpuDRgN2TCebRoOJCRgYqLoqSaoXg61Ntm+HzVsOsXakycHBJv2TQ1x51RCT2/oYH6+6OkmqD4OtDebmilA79ZIp1m3c/fjyU2aG2bxljNmdfaxfX2GBklQj9rG1QaMBa0eaR4QawLqNu1k70qTRqKgwSaohg60NdkwnBwebS647MNhkejrbXJEk1ZfB1gabRoP+PUNLrluzZ4jR0WhzRZJUXwZbG0xMwP7ZIfbNDB+xfN/MMPtnh5iYqKgwSaohB4+chJUO3x8YgMltfWzeMsbhkSYHBpus2TPE/tliVKQDRyRp9URm5/fvjI2N5dTUVNVlHOEJw/cXBNXRhu/v3VsE4fR0MloGoaEmSScmIm7LzLHFy22xnYATHb6/fj1s3Qpgn5oktYp9bCfA4fuS1LkMthPg8H1J6lwG2wlw+L4kdS6D7QQ4fF+SOpeDR06Aw/clqXMZbCdofBxmd/bRaGxgenrY4fuS1CEMtpPg8H1J6jz2sUmSasVgkyTVisEmSaoVg02SVCsGmySpVgw2SVKtGGySpFrpivuxRcQu4J6Kdj8M7D7mswR+VsfDz2pl/JxWrhc/q7Mzc8PihV0RbFWKiKmlbmSnJ/KzWjk/q5Xxc1o5P6tv81SkJKlWDDZJUq0YbMd2bdUFdBE/q5Xzs1oZP6eV87Mq2ccmSaoVW2ySpFox2JYREU+JiA9FxJcj4q6I+MGqa+pEEXFuRNy+4OvhiHhz1XV1ooj4pYi4MyLuiIgbIuLUqmvqVBHxi+XndKf/no4UEddFxAMRcceCZUMRcVNE7Ci/D1ZZY5UMtuVdA/x9Zj4TeC5wV8X1dKTM/JfMPD8zzweeBzwKfKTisjpORIwAbwLGMvM8oA94TbVVdaaIOA/4WeACiv97r4iI0Wqr6ijvBS5dtOxXgZszcxNwc/lzTzLYjiIizgBeBLwHIDP3Z+aD1VbVFS4CvpKZVV1Q3+n6gXUR0Q+cBnyj4no61fcCn8nMRzPzIPAJ4JUV19QxMvMWoLlo8WXA9eXj64Efa2tRHcRgO7qNwC7gzyLiCxHxpxFxetVFdYHXADdUXUQnysxZ4HeBncC9wEOZeWO1VXWsO4ALI+I7IuI04OXAv6m4pk73tMy8t3x8H/C0KoupksF2dP3ADwB/nJnfDzxCDzftVyIi1gJbgL+qupZOVPZ5XEbxR9MzgNMj4vXVVtWZMvMu4J3AjcDfA7cDhyotqotkMdy9Z4e8G2xH93Xg65n5mfLnD1EEnY7uZcDnM/P+qgvpUBcDM5m5KzMPAB8GXlhxTR0rM9+Tmc/LzBcBe4C7q66pw90fEU8HKL8/UHE9lTHYjiIz7wP+NSLOLRddBPxzhSV1g9fiacjl7AReEBGnRURQ/JtyQNJRRMRTy+9nUfSvfaDaijreNuDy8vHlwN9WWEulvEB7GRFxPvCnwFrgq8AbMnNPtVV1prL/cSfwXZn5UNX1dKqI+E1gAjgIfAH4mcz8VrVVdaaI+CTwHcAB4Jcz8+aKS+oYEXED8BKKGf3vB94K/A3wQeAsiruhvDozFw8w6QkGmySpVjwVKUmqFYNNklQrBpskqVYMNklSrRhskqRaMdikDhURH42Ipyyx/Dci4lfKx/8UEWNLPOf8iHh5O+qUOo3BJnWozHz5SUy8fT7F/IpSzzHYpIpExH+OiDeVj98VEf9QPn5pRPxFRHwtIobLZW+JiLsjYjtw7qJNvSoiPluuv7Ccs/NtwER5f7yJdr4vqWoGm1SdTwIXlo/HgPURsaZcdsv8kyLieRR3TZhvhT1/0Xb6M/MC4M3AWzNzP/DrQKO8T16jtW9D6iwGm1Sd24DnRcSTgW8Bn6IIuAspQm/ehcBHynuTPUwxJ+BCH16wvXNaWrHUBfqrLkDqVZl5ICJmgJ8CbgW+CPwwMMrxTY48P9fkIfw/Ldlikyr2SeBXKE49fhJ4I/CFPHIS11uAH4uIdRExAPzoCrY7BwysdrFSNzDYpGp9Eng68KnyPnaPceRpSDLz80AD+H/Ax4DPrWC7/wh8n4NH1Iuc3V+SVCu22CRJtWKwSZJqxWCTJNWKwSZJqhWDTZJUKwabJKlWDDZJUq0YbJKkWvn/nn67ZKBFlrcAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 504x360 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
},
{
"output_type": "stream",
"text": [
"done in 0.776s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Hjaa7KmjEmLy",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "e4433893-3785-4a57-b64c-fd78d6b1d133"
},
"source": [
"plt.hist(kmeans_lsa)\n",
"plt.xlabel('KMeans LSA')\n",
"plt.ylabel('Frequency')\n",
"plt.show"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<function matplotlib.pyplot.show>"
]
},
"metadata": {
"tags": []
},
"execution_count": 57
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEGCAYAAAB/+QKOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAXV0lEQVR4nO3dfZBldX3n8fdHUBFlUcOIyNMoQR5klJAWtXwIoBIYCZgsAaZ8XuKI0a11TWUdH0q7TCW12RS6cSHBQSnRjfiQBMOG8QFBA2ZBGJCHUVQQUQeIDGJAlBXB7/5xT8Ol+fX0nZ6+9/Z0v19Vt/qc8/udc76nu2c+fc6593dSVUiSNN2jxl2AJGlhMiAkSU0GhCSpyYCQJDUZEJKkpu3HXcB82mWXXWr58uXjLkOSthlXXnnlHVW1rNW2qAJi+fLlrF+/ftxlSNI2I8kPZmrzEpMkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkJgNCktRkQEgLzeTO465AAgwISdIMth/WhpOcBRwD3F5VB3XLPg3s13V5IvDvVXVwY92bgZ8BDwD3V9XEsOqUJLUNLSCAjwGnAR+fWlBVJ05NJzkVuGsz6x9eVXcMrTpJ0mYNLSCq6uIky1ttSQKcABwxrP1LkrbOuO5BvBj4cVXdMEN7AV9KcmWS1ZvbUJLVSdYnWb9p06Z5L1SSlqpxBcQq4JzNtL+oqg4BjgbekuQlM3WsqrVVNVFVE8uWLZvvOiVpyRp5QCTZHvgD4NMz9amqW7qvtwPnAoeOpjpJ0pRxnEG8DPh2VW1sNSZ5fJKdpqaBI4ENI6xPksQQAyLJOcClwH5JNiY5uWs6iWmXl5I8Lcm6bnZX4GtJrgEuB86vqi8Mq05JUtsw38W0aoblr28suxVY2U3fBDxnWHVJkgbjJ6klSU0GhCSpyYCQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKlpmM+kPivJ7Uk29C2bTHJLkqu718oZ1j0qyXeS3JhkzbBqlCTNbJhnEB8Djmos/2BVHdy91k1vTLIdcDpwNHAgsCrJgUOsU5LUMLSAqKqLgTvnsOqhwI1VdVNV3Qd8CjhuXouTJM1qHPcg3prk2u4S1JMa7bsDP+qb39gta0qyOsn6JOs3bdo037VK0pI16oD4W2Af4GDgNuDUrd1gVa2tqomqmli2bNnWbk6S1BlpQFTVj6vqgar6NXAmvctJ090C7Nk3v0e3TJI0QiMNiCS79c3+PrCh0e0KYN8kT0/yGOAk4LxR1CdJesj2w9pwknOAw4BdkmwE3gccluRgoICbgTd1fZ8GfKSqVlbV/UneCnwR2A44q6q+Oaw6JUltQwuIqlrVWPzRGfreCqzsm18HPOItsJKk0fGT1JKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1DS0gEhyVpLbk2zoW/ZXSb6d5Nok5yZ54gzr3pzkuiRXJ1k/rBolSTMb5hnEx4Cjpi27ADioqp4NfBd452bWP7yqDq6qiSHVJ0najKEFRFVdDNw5bdmXqur+bvYyYI9h7V+StHXGeQ/iPwGfn6GtgC8luTLJ6hHWJEnqbD+OnSZ5N3A/8HczdHlRVd2S5CnABUm+3Z2RtLa1GlgNsNdeew2lXklaikZ+BpHk9cAxwKuqqlp9quqW7uvtwLnAoTNtr6rWVtVEVU0sW7ZsCBVL0tI00oBIchTw34Bjq+oXM/R5fJKdpqaBI4ENrb6SpOEZ5ttczwEuBfZLsjHJycBpwE70LhtdneSMru/TkqzrVt0V+FqSa4DLgfOr6gvDqlOS1DbQPYgkK6rqui3ZcFWtaiz+6Ax9bwVWdtM3Ac/Zkn1JkubfoGcQf5Pk8iR/nGTnoVYkLSErzl4x7hKkGQ0UEFX1YuBVwJ7AlUk+meTlQ61MkjRWA9+DqKobgPcA7wB+B/hQN2zGHwyrOEnS+AwUEEmeneSDwPXAEcDvVdUB3fQHh1ifJGlMBv2g3P8CPgK8q6runVpYVbcmec9QKpMkjdWgAfEK4N6qegAgyaOAHarqF1X1iaFVJ0kam0HvQXwZeFzf/I7dMknSIjVoQOxQVfdMzXTTOw6nJEnSQjBoQPw8ySFTM0l+G7h3M/0lSdu4Qe9BvA34bJJbgQBPBU4cWlWSpLEbKCCq6ook+wP7dYu+U1W/Gl5ZkqRx25LB+p4LPBs4BFiV5LXDKUlTTj/loq1q12Cu3/+AWftsXHMJy9ecP4JqpIVj0MH6PgHsA1wNPNAtLuDjQ6pLkjRmg96DmAAOnOkBP5KkxWfQS0wb6N2YliQtEYOeQewCfCvJ5cAvpxZW1bFDqUqSNHaDBsTkMIuQJC08g77N9V+S7A3sW1VfTrIjsN1wS5MkjdOgw32/Efh74MPdot2Bzw2rKEnS+A16k/otwAuBu+HBhwc9ZbaVkpyV5PYkG/qWPTnJBUlu6L4+aYZ1X9f1uSHJ6wasU5I0TwYNiF9W1X1TM0m2p/c5iNl8DDhq2rI1wIVVtS9wYTf/MEmeDLwPeB5wKPC+mYJEkjQcgwbEvyR5F/C47lnUnwX+z2wrVdXFwJ3TFh8HnN1Nnw28srHq7wIXVNWdVfVT4AIeGTSSpCEaNCDWAJuA64A3AevoPZ96Lnatqtu66X8Ddm302R34Ud/8xm7ZIyRZnWR9kvWbNm2aY0nStmvjmkvGuv+nfuXqh2Ymdx7ejoa5bTUN+i6mXwNndq95U1WVZKs+nV1Va4G1ABMTE37SW5LmyaBjMX2fxj2HqnrGHPb54yS7VdVtSXYDbm/0uQU4rG9+D+Crc9iXJGmOtmQspik7AH8IPHmO+zwPeB3w37uv/9To80XgL/puTB8JvHOO+5MkzcFA9yCq6id9r1uq6n8Cr5htvSTnAJcC+yXZmORkesHw8iQ3AC/r5kkykeQj3f7uBP4MuKJ7vb9bJkkakUEvMR3SN/soemcUs65bVatmaHppo+964I/65s8CzhqkPknS/Bv0EtOpfdP3AzcDJ8x7NZKkBWPQdzEdPuxCJEkLy6CXmN6+ufaq+sD8lCNJWii25F1Mz6X3DiSA3wMuB24YRlGSpPEbNCD2AA6pqp8BJJkEzq+qVw+rMEnSeA061MauwH198/fRHiJD26BTTzxmq9o3Z8XZK+a87kLwsGEkpjn1xGMe0X76KRcxOTk55Kqk0Rj0DOLjwOVJzu3mX8lDA+5JkhahQd/F9OdJPg+8uFv0hqr6xvDKkiSN26CXmAB2BO6uqr8GNiZ5+pBqkiQtAIM+cvR9wDt4aDykRwP/e1hFSZLGb9AziN8HjgV+DlBVtwI7DasoSdL4DRoQ91VV0Q35neTxwytJkrQQDBoQn0nyYeCJSd4IfJl5fniQJGlhmfVdTEkCfBrYH7gb2A94b1VdMOTaJEljNMiQ3ZVkXVWtAAwFSVoiBr3EdFWS5w61EknSgjJoQDwPuCzJ95Jcm+S6JNcOszAtTA4joTmb3Hlsu16+5vwZ2/ydntlmLzEl2auqfgj87ojqkSQtELOdQXwOoKp+AHygqn7Q/5rLDpPsl+TqvtfdSd42rc9hSe7q6/PeuexLkjR3s92kTt/0M+Zjh1X1HeBggCTbAbcA5za6XlJVcx9GVJK0VWY7g6gZpufLS4HvzfVsRJI0PLMFxHO6S0A/A57dTd+d5GdJ7p6H/Z8EnDND2wuSXJPk80meNdMGkqxOsj7J+k2bNs1DSZIkmOUSU1VtN6wdJ3kMvfGd3tlovgrYu6ruSbKS3r2QfWeocS2wFmBiYmIYZzmStCRtyXDf8+1o4Kqq+vH0hqq6u6ru6abXAY9OssuoC5SkpWycAbGKGS4vJXlqN8QHSQ6lV+dPRlibJC15gz5ydF51o8G+HHhT37JTAKrqDOB44M1J7gfuBU7qRpOVJI3IWAKiqn4O/Ma0ZWf0TZ8GnDbquiRJDxnnJSZJ0gJmQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKaxhYQSW5Ocl2Sq5Osb7QnyYeS3Jjk2iSHjKNOSVqqxvJM6j6HV9UdM7QdDezbvZ4H/G33VZI0Agv5EtNxwMer5zLgiUl2G3dRkrRUjDMgCvhSkiuTrG607w78qG9+Y7fsYZKsTrI+yfpNmzYNqdTBLV9z/rhL2KaceuIxW9W+mF140T4ztp164jHN9snJySFWBEzuPNztL0Lz+n/CiL//4wyIF1XVIfQuJb0lyUvmspGqWltVE1U1sWzZsvmtUJKWsLEFRFXd0n29HTgXOHRal1uAPfvm9+iWSZJGYCwBkeTxSXaamgaOBDZM63Ye8Nru3UzPB+6qqttGXKokLVnjehfTrsC5SaZq+GRVfSHJKQBVdQawDlgJ3Aj8AnjDmGqVpCVpLAFRVTcBz2ksP6NvuoC3jLIuSdJDFvLbXCVJY2RASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDouH6/Q94cPrUE4/hwov2aXec3HnW9aesOHvFwPufnJwcuO9i8NSvXD2cDc/w89lSg/zsNq65ZF72ta397OfruLUwGRCSpKaRB0SSPZN8Jcm3knwzyX9p9DksyV1Jru5e7x11nZK01I3jmdT3A39SVVcl2Qm4MskFVfWtaf0uqapjxlCfJIkxnEFU1W1VdVU3/TPgemD3UdchSdq8sd6DSLIc+C3g643mFyS5JsnnkzxrpIVJksZyiQmAJE8A/gF4W1XdPa35KmDvqronyUrgc8C+M2xnNbAaYK+99hpixZK0tIzlDCLJo+mFw99V1T9Ob6+qu6vqnm56HfDoJLu0tlVVa6tqoqomli1bNtS6JWkpGce7mAJ8FLi+qj4wQ5+ndv1Icii9On8yuiolSeO4xPRC4DXAdUmmPiH1LmAvgKo6AzgeeHOS+4F7gZOqqsZQqyQtWSMPiKr6GpBZ+pwGnDaaiiRJLX6SujPbcAoztZ96YvujGlNDJpx+ykXzuu/WMB5Tpg9ZMbXvQYf56N/26adc9LAhRmbadsvyNecPtL9BTe1745pL5n3bC92Ks1c8eNyb+9lva6b/Ts51iJGhDdMyQnP5nR7VcRsQkqQmA0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkJgNCktRkQPTpH8phcnISJndutl+//wEPGxrgwov2eXBIhIfp1u8fsmI204dTmHG4i2m1zafZPvo/1T513FOmf1/mc98zfV9OPfGYLRoG5EHd929L1x14iJHN/XwabeMYQqR/GA/Y8uEuBh0CZrpB/j30/3uaryFGpg8fs7l9Tw2hsyVD2wy078mdH/ydvX7/Ax48xs21L19z/sNq769t2MPPGBCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKlpLAGR5Kgk30lyY5I1jfbHJvl01/71JMtHX6UkLW0jD4gk2wGnA0cDBwKrkhw4rdvJwE+r6jeBDwJ/OdoqJUnjOIM4FLixqm6qqvuATwHHTetzHHB2N/33wEuTZIQ1StKSl6oa7Q6T44GjquqPuvnXAM+rqrf29dnQ9dnYzX+v63NHY3urgdXd7H7Ad7agnF2AR2xzCfC4lxaPe2nZ0uPeu6qWtRq2n596xqeq1gJr57JukvVVNTHPJS14HvfS4nEvLfN53OO4xHQLsGff/B7dsmafJNsDOwM/GUl1kiRgPAFxBbBvkqcneQxwEnDetD7nAa/rpo8HLqpRXwuTpCVu5JeYqur+JG8FvghsB5xVVd9M8n5gfVWdB3wU+ESSG4E76YXIMMzp0tQi4HEvLR730jJvxz3ym9SSpG2Dn6SWJDUZEJKkpkUfEEt1WI8BjvvtSb6V5NokFybZexx1DsNsx97X7z8mqSSL4q2Qgxx3khO6n/s3k3xy1DUOwwC/63sl+UqSb3S/7yvHUed8SnJWktu7z4y12pPkQ9335Nokh8xpR1W1aF/0boJ/D3gG8BjgGuDAaX3+GDijmz4J+PS46x7RcR8O7NhNv3kxHPegx9712wm4GLgMmBh33SP6me8LfAN4Ujf/lHHXPaLjXgu8uZs+ELh53HXPw3G/BDgE2DBD+0rg80CA5wNfn8t+FvsZxFId1mPW466qr1TVL7rZy+h9HmUxGORnDvBn9Mb4+n+jLG6IBjnuNwKnV9VPAarq9hHXOAyDHHcB/6Gb3hm4dYT1DUVVXUzvHZ4zOQ74ePVcBjwxyW5bup/FHhC7Az/qm9/YLWv2qar7gbuA3xhJdcMzyHH3O5neXxuLwazH3p1u71lV54+ysCEb5Gf+TOCZSf41yWVJjhpZdcMzyHFPAq9OshFYB/zn0ZQ2Vlv6f0DTNj/UhrZOklcDE8DvjLuWUUjyKOADwOvHXMo4bE/vMtNh9M4YL06yoqr+faxVDd8q4GNVdWqSF9D7jNVBVfXrcRe20C32M4ilOqzHIMdNkpcB7waOrapfjqi2YZvt2HcCDgK+muRmetdnz1sEN6oH+ZlvBM6rql9V1feB79ILjG3ZIMd9MvAZgKq6FNiB3oB2i9lA/wfMZrEHxFId1mPW407yW8CH6YXDYrgWPWWzx15Vd1XVLlW1vKqW07v/cmxVrR9PufNmkN/1z9E7eyDJLvQuOd00yiKHYJDj/iHwUoAkB9ALiE0jrXL0zgNe272b6fnAXVV125ZuZFFfYqqFNazHyAx43H8FPAH4bHdP/odVdezYip4nAx77ojPgcX8RODLJt4AHgD+tqm36bHnA4/4T4Mwk/5XeDevXb+t/BCY5h17Y79LdW3kf8GiAqjqD3r2WlcCNwC+AN8xpP9v490mSNCSL/RKTJGmODAhJUpMBIUlqMiAkSU0GhCSpyYDQkpPknr7plUm+m2TvJJPd6K6/2df+tnGN+Jrk5u7zCv3Ldk3yz0mu6UZlXTet/ZVdvfuPtlotRgaElqwkLwU+BBxdVT/oFl/Hwz8L84fAN0dd22a8H7igqp5TVQcC04e3XgV8rfsqbRUDQktSkpcAZwLHVNX3+po+RzcaaJJ96A3eeEffekcmuTTJVUk+m+QJ3fL3JrkiyYYka6dGBE7y1SR/meTy7kzlxd3yZ3XLru7G6x90yIvd6A2ZAUBVXdtX2xOAF9EbWmKb/8Cnxs+A0FL0WHpB8Mqq+va0truBHyU5iO75IFMN3eWe9wAvq6pDgPXA27vm06rquVV1EPA44Ji+bW5fVYcCb6P3iVeAU4C/rqqD6Q2WuJHBnA58tHsAzruTPK2v7TjgC1X1XeAnSX57wG1KTQaElqJfAf+X3l/aLZ+iFw6vBM7tW/58eg+c+dckV9Mbw2vqSXyHp/dEwuuAI4Bn9a33j93XK4Hl3fSlwLuSvAPYu6ruHaTwqvoivYfjnAnsD3wjybKueVVX+9QxeJlJW8WA0FL0a+AE4NAk72q0/zPwGnrjU93dtzz0rv8f3L0OrKqTk+wA/A1wfFWtoPef9w59602NlPsA3fhnVfVJ4FjgXmBdkiMGLb6q7qyqT1bVa+gNVveSJE+mF0wf6Uap/VPghEXw8CuNkQGhJal7mt4rgFclObnR9g7gz6etdhnwwql3OSV5fJJn8lAY3NHdBzh+tv0neQZwU1V9CPgn4NmD1J3kiCQ7dtM7AfvQG630eOATVbV3N1LtnsD3gRcPsl2pZVGP5iptTlXd2T1V7eIkm6a1farRf1OS1wPnJHlst/g9VfXdJGcCG4B/o/dX/WxOAF6T5FfdOn8xQ79rk0w92OYzwG3AaUnup/cH3keq6ook/4PeI1T7/QO9y0wXD1CP9AiO5ipJavISkySpyYCQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJavr/sUxWpaiX9BIAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-D6kBtPxfvAp"
},
"source": [
"### Splitting Data"
]
},
{
"cell_type": "code",
"metadata": {
"id": "UQkD5Q8Pfwgk",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "ccc044e5-c9fa-42e8-d85c-b8c9aa340301"
},
"source": [
"# Import train_test_split function\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"# Split dataset into training set and test set\n",
"X_train, X_test, y_train, y_test = train_test_split(XLSA, kmeans_lsa, test_size=0.3,random_state=109) # 70% training and 30% test\n",
"X_train.shape, X_test.shape, y_train.shape, y_test.shape"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"((14, 93), (6, 93), (14, 93), (6, 93))"
]
},
"metadata": {
"tags": []
},
"execution_count": 58
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "h16t0wJF9J6n"
},
"source": [
"y_train = np.argmax(y_train, axis=1)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "J6OHM0e09PPP"
},
"source": [
"### Model Classifier"
]
},
{
"cell_type": "code",
"metadata": {
"id": "Np1oA6x99M6m"
},
"source": [
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.tree import DecisionTreeRegressor\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.svm import SVC\n",
"from sklearn.svm import SVR\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"\n",
"# Logistic Regression\n",
"clf = LogisticRegression(penalty='l2', C=0.1)\n",
"clf.fit(X_train, y_train)\n",
"y_predLR = clf.predict(X_test)\n",
"\n",
"# Decision Tree\n",
"Dt = DecisionTreeClassifier()\n",
"Dt.fit(X_train, y_train)\n",
"y_predDT = Dt.predict(X_test)\n",
"\n",
"# Decision Tree Regressor\n",
"Dtr = DecisionTreeRegressor()\n",
"Dtr.fit(X_train, y_train)\n",
"y_predDTR = Dtr.predict(X_test)\n",
"\n",
"# Gaussian Naive Bias\n",
"gnb = GaussianNB()\n",
"gnb.fit(X_train, y_train)\n",
"y_predGNB = gnb.predict(X_test)\n",
"\n",
"# random forest classifier\n",
"rfc = RandomForestClassifier()\n",
"rfc.fit(X_train, y_train)\n",
"y_predRFC = rfc.predict(X_test)\n",
"\n",
"# random forest regressor\n",
"rfr = RandomForestRegressor(n_estimators = 100)\n",
"rfr.fit(X_train,y_train)\n",
"y_predRFR = rfr.predict(X_test)\n",
"\n",
"# Support vector classifier\n",
"ppn = SVC(C=1, random_state = 0)\n",
"ppn.fit(X_train,y_train)\n",
"y_predSVC = ppn.predict(X_test)\n",
"\n",
"# support vector regression\n",
"svm = SVR(C = 2, kernel = 'rbf', degree = 2)\n",
"svm.fit(X_train, y_train)\n",
"y_predSVR = svm.predict(X_test)\n",
"\n",
"# k neearest neighbor classifier\n",
"Knn = KNeighborsClassifier(n_neighbors = 2, p =2, metric = 'minkowski')\n",
"Knn = Knn.fit(X_train,y_train)\n",
"y_predKNN = Knn.predict(X_test)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "WuVUlR2W9Vc9",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 176
},
"outputId": "3408ff96-df5a-45d4-cfda-3a47b3455a36"
},
"source": [
"print(\"prediksi logistic regression\", y_predLR)\n",
"print(\"prediksi Decision Tree\", y_predDT)\n",
"print(\"prediksi Decision Tree Regression\", y_predDTR)\n",
"print(\"prediksi Gaussian Naive Bias\", y_predGNB)\n",
"print(\"prediksi random forest classifer\", y_predRFC)\n",
"print(\"prediksi random forest regressor\", y_predRFR)\n",
"print(\"prediksi support vector classifer\", y_predSVC)\n",
"print(\"prediksi support vector regression\", y_predSVR)\n",
"print(\"prediksi k nearest neighbor classifer\", y_predKNN)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"prediksi logistic regression [ 4 90 57 18 8 24]\n",
"prediksi Decision Tree [24 90 57 6 57 24]\n",
"prediksi Decision Tree Regression [20. 90. 20. 20. 20. 20.]\n",
"prediksi Gaussian Naive Bias [ 4 6 4 18 18 8]\n",
"prediksi random forest classifer [24 20 57 2 24 2]\n",
"prediksi random forest regressor [30.86 45.34 36.68 34.82 35.57 37.15]\n",
"prediksi support vector classifer [18 8 4 1 4 4]\n",
"prediksi support vector regression [13. 13. 13. 13. 13. 13.]\n",
"prediksi k nearest neighbor classifer [8 6 2 1 4 4]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "iFr617BpfxB2"
},
"source": [
"### Model Evaluation"
]
},
{
"cell_type": "code",
"metadata": {
"id": "LVrgc7Zsf0Mz",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 516
},
"outputId": "dc30ea4b-867d-4632-8491-62b47c14fbde"
},
"source": [
"# Python script for confusion matrix creation. \n",
"from sklearn.metrics import confusion_matrix \n",
"from sklearn.metrics import accuracy_score \n",
"from sklearn.metrics import classification_report \n",
"\n",
"# actual = [6, 36, 17, 10, 19, 19]\n",
"# actual = [4, 14, 12, 12, 8, 6]\n",
"actual = [4, 90, 20, 18, 8, 4]\n",
"\n",
"# predicted = y_predLR\n",
"# predicted = y_predDT\n",
"# predicted = y_predDTR\n",
"# predicted = y_predGNB\n",
"# predicted = y_predRFC\n",
"# predicted = y_predRFR\n",
"predicted = y_predSVC\n",
"# predicted = y_predSVR\n",
"# predicted = y_predKNN\n",
"\n",
"results = confusion_matrix(actual, predicted) \n",
"\n",
"print ('Confusion Matrix :')\n",
"print(results,'\\n') \n",
"print ('Accuracy Score :',accuracy_score(actual, predicted)) \n",
"\n",
"print ('Report : ')\n",
"print (classification_report(actual, predicted)) "
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Confusion Matrix :\n",
"[[0 0 0 0 0 0]\n",
" [0 1 0 1 0 0]\n",
" [0 1 0 0 0 0]\n",
" [1 0 0 0 0 0]\n",
" [0 1 0 0 0 0]\n",
" [0 0 1 0 0 0]] \n",
"\n",
"Accuracy Score : 0.16666666666666666\n",
"Report : \n",
" precision recall f1-score support\n",
"\n",
" 1 0.00 0.00 0.00 0\n",
" 4 0.33 0.50 0.40 2\n",
" 8 0.00 0.00 0.00 1\n",
" 18 0.00 0.00 0.00 1\n",
" 20 0.00 0.00 0.00 1\n",
" 90 0.00 0.00 0.00 1\n",
"\n",
" accuracy 0.17 6\n",
" macro avg 0.06 0.08 0.07 6\n",
"weighted avg 0.11 0.17 0.13 6\n",
"\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n",
"/usr/local/lib/python3.6/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "le3PaCqcelnk"
},
"source": [
"# Latent Dirichlet Allocation"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "WtYTI_miu-eZ"
},
"source": [
"### Import Library"
]
},
{
"cell_type": "code",
"metadata": {
"id": "hXRRo6ZqewvI"
},
"source": [
"from time import time\n",
"from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\n",
"from sklearn.decomposition import NMF, LatentDirichletAllocation\n",
"from sklearn.datasets import fetch_20newsgroups"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "CTcPMI7Zu8Cl"
},
"source": [
"### Load dataset"
]
},
{
"cell_type": "code",
"metadata": {
"id": "exIf94NSe2wi",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 90
},
"outputId": "913cbf73-26b8-468b-d89d-b20cb1367a8c"
},
"source": [
"n_samples = 2000\n",
"n_features = 1000\n",
"n_components = 10\n",
"n_top_words = 20\n",
"\n",
"\n",
"def print_top_words(model, feature_names, n_top_words):\n",
" for topic_idx, topic in enumerate(model.components_):\n",
" message = \"Topic #%d: \" % topic_idx\n",
" message += \" \".join([feature_names[i]\n",
" for i in topic.argsort()[:-n_top_words - 1:-1]])\n",
" print(message)\n",
" print()\n",
"\n",
"\n",
"# Load the 20 newsgroups dataset and vectorize it. We use a few heuristics\n",
"# to filter out useless terms early on: the posts are stripped of headers,\n",
"# footers and quoted replies, and common English words, words occurring in\n",
"# only one document or in at least 95% of the documents are removed.\n",
"\n",
"print(\"Loading dataset...\")\n",
"t0 = time()\n",
"data = cleaned_text\n",
"# data, _ = fetch_20newsgroups(shuffle=True, random_state=1,\n",
"# remove=('headers', 'footers', 'quotes'),\n",
"# return_X_y=True)\n",
"data_samples = data[:n_samples]\n",
"print(data_samples)\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading dataset...\n",
"['user creat new diari', 'user add titl diari', 'user add weather diari', 'user add date diari', 'user add hour diari', 'user add second diari', 'user save diari', 'user add photo diari', 'user read diari ha creat', 'user share diari form postcard', 'user save postcard', 'user chang postcard background color', 'user chang color post postcard', 'user delet diari', 'user edit diari ha creat', 'user open calendar contain diari', 'system open diari editor', 'applic ha high level avail oper continu seven day per week hour per day without stop', 'applic must high degre flexibilitywhich must abl run devic use kitkat version android oper system abov', 'thi applic must high level integr data secur includ secur form password user', 'thi applic ha high usabl aspect easi use interfac applic introduc two hoursth new user creat diari less three minut', 'thi applic must respons time valu quit good fast access extend perform result toler three second', 'applic must high level interoperabilitybecaus one need thi applic connect social media lineinstagramand facebook', 'document must includ system sourc code develop reread chang addit sourc code made high level maintain', 'applic must high degre flexibilityand use user age seven year abov', 'modul system design program structur way continu use system developmentso reusabl level high']\n",
"done in 0.000s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "5YgFnWv9fEqU",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 958
},
"outputId": "02e9eefa-0bd2-4cb4-8a00-585f1bd013b6"
},
"source": [
"# Use tf-idf features for NMF.\n",
"print(\"Extracting tf-idf features for NMF...\")\n",
"tfidf_vectorizer = TfidfVectorizer(max_df=0.95, min_df=2,\n",
" max_features=n_features,\n",
" stop_words='english')\n",
"t0 = time()\n",
"tfidf = tfidf_vectorizer.fit_transform(data_samples)\n",
"print(tfidf)\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Extracting tf-idf features for NMF...\n",
" (0, 8)\t0.3288983191694328\n",
" (0, 15)\t0.6903417501591423\n",
" (0, 6)\t0.580044757687454\n",
" (0, 24)\t0.280717371592919\n",
" (1, 1)\t0.7611076249228439\n",
" (1, 8)\t0.49335826879486444\n",
" (1, 24)\t0.4210852667762693\n",
" (2, 1)\t0.7611076249228439\n",
" (2, 8)\t0.49335826879486444\n",
" (2, 24)\t0.4210852667762693\n",
" (3, 1)\t0.7611076249228439\n",
" (3, 8)\t0.49335826879486444\n",
" (3, 24)\t0.4210852667762693\n",
" (4, 12)\t0.7193414627020884\n",
" (4, 1)\t0.5287084722779107\n",
" (4, 8)\t0.3427146017708732\n",
" (4, 24)\t0.29250968037350394\n",
" (5, 20)\t0.7193414627020884\n",
" (5, 1)\t0.5287084722779107\n",
" (5, 8)\t0.3427146017708732\n",
" (5, 24)\t0.29250968037350394\n",
" (6, 19)\t0.8474770401085553\n",
" (6, 8)\t0.40376201202105216\n",
" (6, 24)\t0.344614138040728\n",
" (7, 1)\t0.7611076249228439\n",
" :\t:\n",
" (20, 15)\t0.3901562820147368\n",
" (20, 6)\t0.32782039621579584\n",
" (20, 24)\t0.15865134329829308\n",
" (21, 22)\t0.5682222250880776\n",
" (21, 2)\t0.4688079114938321\n",
" (21, 20)\t0.6762711327838453\n",
" (22, 22)\t0.4384477883362798\n",
" (22, 14)\t0.40869104530446076\n",
" (22, 11)\t0.34251499216786635\n",
" (22, 2)\t0.7234767767739462\n",
" (23, 13)\t0.5900358567968614\n",
" (23, 14)\t0.4621181329475218\n",
" (23, 11)\t0.38729106131805385\n",
" (23, 3)\t0.5369452045769898\n",
" (24, 0)\t0.4572583733680385\n",
" (24, 23)\t0.38420148038226076\n",
" (24, 7)\t0.4572583733680385\n",
" (24, 21)\t0.4572583733680385\n",
" (24, 11)\t0.3001378283680209\n",
" (24, 2)\t0.31698283815443346\n",
" (24, 24)\t0.18593742690651238\n",
" (25, 23)\t0.5066552840915535\n",
" (25, 5)\t0.6029970806763238\n",
" (25, 14)\t0.47226940851983695\n",
" (25, 11)\t0.39579862250172293\n",
"done in 0.009s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Q6TFBfw5fH0V",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 958
},
"outputId": "9bd9f3c4-9b11-44a2-ac6f-d0a943e56afe"
},
"source": [
"# Use tf (raw term count) features for LDA.\n",
"print(\"Extracting tf features for LDA...\")\n",
"tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2,\n",
" max_features=n_features,\n",
" stop_words='english')\n",
"t0 = time()\n",
"tf = tf_vectorizer.fit_transform(data_samples)\n",
"print(tf)\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Extracting tf features for LDA...\n",
" (0, 24)\t1\n",
" (0, 6)\t1\n",
" (0, 15)\t1\n",
" (0, 8)\t1\n",
" (1, 24)\t1\n",
" (1, 8)\t1\n",
" (1, 1)\t1\n",
" (2, 24)\t1\n",
" (2, 8)\t1\n",
" (2, 1)\t1\n",
" (3, 24)\t1\n",
" (3, 8)\t1\n",
" (3, 1)\t1\n",
" (4, 24)\t1\n",
" (4, 8)\t1\n",
" (4, 1)\t1\n",
" (4, 12)\t1\n",
" (5, 24)\t1\n",
" (5, 8)\t1\n",
" (5, 1)\t1\n",
" (5, 20)\t1\n",
" (6, 24)\t1\n",
" (6, 8)\t1\n",
" (6, 19)\t1\n",
" (7, 24)\t1\n",
" :\t:\n",
" (20, 11)\t1\n",
" (20, 23)\t1\n",
" (20, 22)\t1\n",
" (21, 20)\t1\n",
" (21, 2)\t1\n",
" (21, 22)\t1\n",
" (22, 2)\t2\n",
" (22, 11)\t1\n",
" (22, 14)\t1\n",
" (22, 22)\t1\n",
" (23, 3)\t1\n",
" (23, 11)\t1\n",
" (23, 14)\t1\n",
" (23, 13)\t1\n",
" (24, 24)\t1\n",
" (24, 2)\t1\n",
" (24, 11)\t1\n",
" (24, 21)\t1\n",
" (24, 7)\t1\n",
" (24, 23)\t1\n",
" (24, 0)\t1\n",
" (25, 11)\t1\n",
" (25, 14)\t1\n",
" (25, 5)\t1\n",
" (25, 23)\t1\n",
"done in 0.003s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "aShjRL7dfKeQ",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 303
},
"outputId": "1b9f92c3-6a7e-4d4a-b29c-0515765a805b"
},
"source": [
"# Fit the NMF model\n",
"print(\"Fitting the NMF model (Frobenius norm) with tf-idf features, \"\n",
" \"n_samples=%d and n_features=%d...\"\n",
" % (n_samples, n_features))\n",
"t0 = time()\n",
"nmf = NMF(n_components=n_components, random_state=1,\n",
" alpha=.1, l1_ratio=.5).fit(tfidf)\n",
"print(\"done in %0.3fs.\" % (time() - t0))\n",
"\n",
"print(\"\\nTopics in NMF model (Frobenius norm):\")\n",
"tfidf_feature_names = tfidf_vectorizer.get_feature_names()\n",
"print_top_words(nmf, tfidf_feature_names, n_top_words)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Fitting the NMF model (Frobenius norm) with tf-idf features, n_samples=2000 and n_features=1000...\n",
"done in 0.022s.\n",
"\n",
"Topics in NMF model (Frobenius norm):\n",
"Topic #0: add diari user hour second form thi applic chang color continu creat degre seven ha save high use includ level\n",
"Topic #1: abov degre use applic high oper seven user open second add thi chang color continu creat diari new form ha\n",
"Topic #2: color chang postcard user high add applic continu creat degre diari form ha hour use includ level new open oper\n",
"Topic #3: creat ha diari user new use applic add chang color continu degre form hour high includ level open oper postcard\n",
"Topic #4: open diari user high add applic chang color continu creat degre form ha hour use includ level new oper postcard\n",
"Topic #5: applic thi second high level new form user add chang color continu creat degre diari hour ha use includ open\n",
"Topic #6: save user postcard diari ha add applic chang color continu creat degre form hour high use includ level new open\n",
"Topic #7: includ level high chang form thi applic user add color continu creat degre diari hour ha use new open oper\n",
"Topic #8: continu level high use hour seven oper ha applic diari add chang color creat degre user form includ new open\n",
"Topic #9: form postcard user diari high add applic chang color continu creat degre ha hour use includ level new open oper\n",
"\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Bq4D3luyVDBo",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 214
},
"outputId": "b3efd811-6e74-4837-96b1-38aa03049ef8"
},
"source": [
"model_nmf = nmf\n",
"feature_names = tfidf_feature_names\n",
"n_top_words = n_top_words\n",
"\n",
"for topic_idx, topic in enumerate(model_nmf.components_):\n",
" message = \"Topic #%d: \" % topic_idx\n",
" message += \" \".join([feature_names[i]\n",
" for i in topic.argsort()[:-n_top_words - 1:-1]])\n",
" print(message)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Topic #0: add diari user hour second form thi applic chang color continu creat degre seven ha save high use includ level\n",
"Topic #1: abov degre use applic high oper seven user open second add thi chang color continu creat diari new form ha\n",
"Topic #2: color chang postcard user high add applic continu creat degre diari form ha hour use includ level new open oper\n",
"Topic #3: creat ha diari user new use applic add chang color continu degre form hour high includ level open oper postcard\n",
"Topic #4: open diari user high add applic chang color continu creat degre form ha hour use includ level new oper postcard\n",
"Topic #5: applic thi second high level new form user add chang color continu creat degre diari hour ha use includ open\n",
"Topic #6: save user postcard diari ha add applic chang color continu creat degre form hour high use includ level new open\n",
"Topic #7: includ level high chang form thi applic user add color continu creat degre diari hour ha use new open oper\n",
"Topic #8: continu level high use hour seven oper ha applic diari add chang color creat degre user form includ new open\n",
"Topic #9: form postcard user diari high add applic chang color continu creat degre ha hour use includ level new open oper\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "6fN35oRZV0qO",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "c788caeb-6b54-4b17-99f5-e3b87c9e0261"
},
"source": [
"plt.hist(model_nmf.components_)\n",
"plt.xlabel('Model NMF Components')\n",
"plt.ylabel('Frequency')\n",
"plt.show"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<function matplotlib.pyplot.show>"
]
},
"metadata": {
"tags": []
},
"execution_count": 69
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEGCAYAAABsLkJ6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAUP0lEQVR4nO3de7RedX3n8ffHRAdQRC2xTr00QK2ABhXiFS8odFS0YJUpUHFGl4Whd3pZbbQuPbara9WlaGsXnYGhjojUG6LDFJwqBJTaQgyQIUAUL1AbsTV2WrHICMJ3/tj74CEkOfs5Oftc8nu/1jorz97P3vv3/T3n5Pk8+/L8dqoKSVJ7HrLYBUiSFocBIEmNMgAkqVEGgCQ1ygCQpEatXOwCZtp///1r9erVi12GJC0b11577XeqatVc1l1SAbB69Wo2bty42GVI0rKR5O/nuq6HgCSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVF7TACsXncJTO13//RZp6/nzBNfxdZ1VzE1NQXA467Y9IB1thx8CMD9y02budya89aMWLUkLZ49JgAkSZMxACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNWrUAEjym0luSnJjkg8n2WvM9iRJw40WAEkeD/w6sLaqngasAE4aqz1J0mTGPgS0Etg7yUpgH+D2kduTJA00WgBU1TeBdwPfAL4FfLeqPrP9cklOS7IxycZt27bNS9tbDj5kp8+tXncJAGvOWwPAWaevv/+5qampBy43td+s25Ok5WrMQ0CPBo4HDgB+Anh4klO2X66qzqmqtVW1dtWqVWOVI0nazpiHgI4Bbq2qbVV1D3AR8PwR25MkTWDMAPgG8Nwk+yQJcDSwZcT2JEkTGPMcwDXAhcB1wOa+rXPGak+SNJmVY268qt4OvH3MNiRJc+M3gSWpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRe1wArDlvzW6tf/n6g+apEkla2va4AJAkDWMASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElq1KgBkORRSS5M8qUkW5I8b8z2JEnDrRx5+38K/O+qOiHJw4B9Rm5PkjTQaAGQZD/gRcAbAKrqbuDusdqTJE1mzENABwDbgP+R5Pok5yZ5+PYLJTktycYkG7dt2zZiOTNM7bcw7UjSEjZmAKwEDgf+a1U9E7gTWLf9QlV1TlWtraq1q1atGrEcSdJMYwbAVmBrVV3TT19IFwiSpCVgtACoqn8E/iHJU/pZRwM3j9WeJGkyY18F9GvABf0VQF8H3jhye5KkgUYNgKraBKwdsw1J0twMOgSUZM3YhUiSFtbQcwB/nmRDkl/ur++XJC1zgwKgql4IvA54InBtkr9M8jOjViZJGtXgq4Cq6ivAW4HfA14MvK8f4+c1YxUnSRrP0HMAhyV5L7AFeCnws1V1SP/4vSPWJ0kaydCrgP4MOBd4S1XdNT2zqm5P8tZRKpMkjWpoALwSuKuq7gVI8hBgr6r6flWdP1p1kqTRDD0HcBmw94zpffp5kqRlamgA7FVV/zY90T92bH9JWsaGBsCdSe4fyC3JEcBdu1hekrTEDT0HcAbw8SS3AwEeB5w4WlWSpNENCoCq+mKSg4HpkT2/XFX3jFeWJGlskwwG9yxgdb/O4Umoqg+OUpUkaXSDAiDJ+cBBwCbg3n52AQaAJC1TQ/cA1gKHVlWNWYwkaeEMvQroRroTv5KkPcTQPYD9gZuTbAB+MD2zqo4bpSpJ0uiGBsDUmEUshMvXHwT5xGKXIUlLxtDLQD+X5CeBJ1fVZUn2AVaMW5okaUxDh4M+FbgQOLuf9XjgU2MVJUka39CTwL8CHAncAfffHOaxYxUlSRrf0AD4QVXdPT2RZCXd9wAkScvU0AD4XJK3AHv39wL+OPC/xitLkjS2oQGwDtgGbAb+C3Ap3f2BJUnL1NCrgO4D/nv/I0naAwwdC+hWdnDMv6oOnPeKJEkLYpKxgKbtBfxH4DHzX44kaaEMOgdQVf884+ebVfUndDeKlyQtU0MPAR0+Y/IhdHsEk9xLQJK0xAx9Ez9zxuMfArcBPz/v1UiSFszQq4BeMnYhkqSFNfQQ0G/t6vmqes/8lCNJWiiTXAX0LODifvpngQ3AV8YoSpI0vqEB8ATg8Kr6HkCSKeCSqjplrMIkSeMaOhTEjwN3z5i+u58nSVqmhu4BfBDYkOST/fSrgfPGKUmStBCGXgX0R0k+Dbywn/XGqrp+vLIkSWMbeggIYB/gjqr6U2BrkgOGrJRkRZLrk/zVnCqUJI1i6C0h3w78HvDmftZDgQ8NbOM3gC2TlyZJGtPQPYCfA44D7gSoqtuBfWdbKckT6MYMOneuBUqSxjE0AO6uqqIfEjrJwweu9yfA7wL37WyBJKcl2Zhk47Zt2wZuduGddfr6xS5BkubV0AD4WJKzgUclORW4jFluDpPkVcC3q+raXS1XVedU1dqqWrtq1aqB5UiSdtesVwElCfBR4GDgDuApwNuq6rOzrHokcFySY+nuIfDIJB/yy2OStDTMGgBVVUkurao1wGxv+jPXezP9SeMkRwG/45u/JC0dQw8BXZfkWaNWIklaUEO/Cfwc4JQkt9FdCRS6nYPDhqxcVVcCV86hPknSSHYZAEmeVFXfAF62QPVIkhbIbHsAn6IbBfTvk3yiql67EEVJksY32zmAzHh84JiFSJIW1mwBUDt5LEla5mY7BPT0JHfQ7Qns3T+GH50EfuSo1UmSRrPLAKiqFQtViCRpYU0yHLQkaQ9iAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAGgXTrr9PWLXYKkkRgAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqNGC4AkT0xyRZKbk9yU5DfGakuSNLmVI277h8BvV9V1SfYFrk3y2aq6ecQ2JUkDjbYHUFXfqqrr+sffA7YAjx+rPUnSZBbkHECS1cAzgWt28NxpSTYm2bht27aFKGdJmpqa2m7GfotSx2gWuD+Xrz9oQduTlqPRAyDJI4BPAGdU1R3bP19V51TV2qpau2rVqrHLkST1Rg2AJA+le/O/oKouGrMtSdJkxrwKKMBfAFuq6j1jtSNJmpsx9wCOBF4PvDTJpv7n2BHbkyRNYLTLQKvqb4CMtX1J0u7xm8CS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANgHq05b80O51++/iAAVq+7BKb22+ly01avu+QB02edvn5Q+1sOPmSnbQMPavvME1+14+V2sb0dma0/29tZf2b2e8vBh+yy39u/RvCj/kxNTQ1ue0f9nsTO6nzcFZse1J/t7U7bk77mQ7c39G9tpq3rrgJ2/7Uc03SN0P1uJja13zxWs3QYAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjRg2AJC9P8uUkX02ybsy2JEmTGS0AkqwAzgJeARwKnJzk0LHakyRNZsw9gGcDX62qr1fV3cBHgONHbE+SNIFU1TgbTk4AXl5Vv9hPvx54TlX96nbLnQac1k8+BfjyBM3sD3xnHspdSvbEPoH9Wm7s1/LxlKrady4rrpzvSiZVVecA58xl3SQbq2rtPJe0qPbEPoH9Wm7s1/KRZONc1x3zENA3gSfOmH5CP0+StASMGQBfBJ6c5IAkDwNOAi4esT1J0gRGOwRUVT9M8qvAXwMrgPdX1U3z3MycDh0tcXtin8B+LTf2a/mYc59GOwksSVra/CawJDXKAJCkRi35AJhtOIkk/y7JR/vnr0myeuGrnNyAfv1WkpuT3JDk8iQ/uRh1Tmro8B9JXpukkiyLS/KG9CvJz/e/s5uS/OVC1zgXA/4On5TkiiTX93+Lxy5GnZNI8v4k305y406eT5L39X2+IcnhC13jpAb06XV9XzYn+dskTx+04apasj90J4+/BhwIPAz4P8Ch2y3zy8B/6x+fBHx0seuep369BNinf/xLe0q/+uX2BT4PXA2sXey65+n39WTgeuDR/fRjF7vueerXOcAv9Y8PBW5b7LoH9OtFwOHAjTt5/ljg00CA5wLXLHbN89Cn58/423vF0D4t9T2AIcNJHA+c1z++EDg6SRawxrmYtV9VdUVVfb+fvJruexRL3dDhP/4QeCfw/xayuN0wpF+nAmdV1b8AVNW3F7jGuRjSrwIe2T/eD7h9Aeubk6r6PPB/d7HI8cAHq3M18Kgk/35hqpub2fpUVX87/bfHBO8XSz0AHg/8w4zprf28HS5TVT8Evgv82IJUN3dD+jXTm+g+sSx1s/ar391+YlVdspCF7aYhv6+fBn46yReSXJ3k5QtW3dwN6dcUcEqSrcClwK8tTGmjmvT/33Iz+P1i0YeC0K4lOQVYC7x4sWvZXUkeArwHeMMilzKGlXSHgY6i+/T1+SRrqupfF7Wq3Xcy8IGqOjPJ84Dzkzytqu5b7ML0YEleQhcALxiy/FLfAxgynMT9yyRZSbeb+s8LUt3cDRomI8kxwO8Dx1XVDxaott0xW7/2BZ4GXJnkNrrjrxcvgxPBQ35fW4GLq+qeqroVuIUuEJayIf16E/AxgKr6O2AvugHVlrM9cpiaJIcB5wLHV9Wg98ClHgBDhpO4GPjP/eMTgPXVnwlZwmbtV5JnAmfTvfkvh+PJMEu/quq7VbV/Va2uqtV0xyqPq6o5D2a1QIb8HX6K7tM/SfanOyT09YUscg6G9OsbwNEASQ6hC4BtC1rl/LsY+E/91UDPBb5bVd9a7KJ2R5InARcBr6+qWwavuNhntwec/T6W7tPU14Df7+f9Ad0bB3R/kB8HvgpsAA5c7JrnqV+XAf8EbOp/Ll7smuejX9steyXL4Cqggb+v0B3euhnYDJy02DXPU78OBb5Ad4XQJuA/LHbNA/r0YeBbwD10e2ZvAk4HTp/xuzqr7/Pm5fA3OKBP5wL/MuP9YuOQ7ToUhCQ1aqkfApIkjcQAkKRGGQCS1CgDQJIaZQBIUqMMAM1ZP5rnh2ZMr0yyLclfTbid2/pr5ydepp//iRnTJyT5QP/4DX2Nx8x4/tX9vBP66Sv70TA39T8n7KCNRyQ5O8nXklzbr/OcSfq41CR5xnIY2VPjcigI7Y47gacl2buq7gJ+hsX5RuURSQ6tqpt38Nxmui84XdZPn0x3TftMr6tdfxntXOBW4MlVdV+SA+iuj1/OnkE3xMili12IFo97ANpdlwKv7B+fTPeFFQCSPCbJp/pxyq/uv6pOkh9L8pl+3Pxz6b6YM73OKUk29J/Gz06yYkANZ9INmbEjVwHPTvLQJI8AforuizKDJDkIeA7w1urHv6mqW6sfzC7dfRtu7H/O6OetTvKlJB9IckuSC5Ic0w8U95Ukz+6Xm0pyfpK/6+ef2s9Pknf129yc5MR+/lH93seF/fYvmB75NskRST7X76H8dfrRLfvl39m/prckeWH/rd8/AE7sX+cTk7x4xl7Q9Un2HfoaafkyALS7PgKclGQv4DDgmhnPvQO4vqoOA94CfLCf/3bgb6rqqcAngSfB/UMNnAgcWVXPAO4FXjegho8Bhyf5qR08V3Sf/l9GNwzw9kMdAFww481v+5Fknwpsqqp7t18pyRHAG+kC4rnAqemG8IAuaM4EDu5/foFugK7foXstph0GvBR4HvC2JD8BvIbuE/rTgWOAd+VHwxU/EziDbg/kQODIJA8F/gw4oaqOAN4P/NGMNlZW1bP79d5e3dDPb6O7x8QzquqjfV2/0r/uLwTu2sHrpD2Mh4C0W6rqhnR3YTuZBx9OeAHw2n659f0n/0fS3dziNf38S5JMj2N+NHAE8MX+g+3ewJBxkO4F3gW8mR0Pg/sR4NfpBgr8bR74BgyzHwLamRcAn6yqOwGSXET35nkxcGtVbe7n3wRcXlWVZDOwesY2/md/+OyuJFfQjdH/AuDDfej8U5LPAc8C7gA2VNXWfrub+m39K90ge5/tX7cVdMMGTLuo//fa7dqe6QvAe5JcAFw03Yb2bAaA5sPFwLvpBkPbnXsxBDivqt48h3XPpwuAB90yr6o2JFkDfL+qbslk9wu6CXh6khU72gvYhZmjt943Y/o+Hvj/bvuxWGYbm2Xmdu/ttxXgpqp63izrTC//IFX1x0kuoRsb6AtJXlZVX5qlFi1zHgLSfHg/8I7pT7wzXEV/CCfJUcB3quoOuttB/kI//xXAo/vlLwdOSPLY/rnHZOC9kKvqHuC9wG/uZJF1PPiT/5Dtfg3YCLxjxvH21Ule2ffv1Un2SfJw4Of6eZM4Psle/aGno+hG6LyK7vj8iiSr6PaYNuxiG18GVqUbr5/+fMdTZ2n3e3TDc9Ovc1BVba6qd/Y1HDxhP7QMGQDabVW1taret4Onpuiu0LkB+GN+NGz3O4AX9YdGXkM35DD9VTxvBT7Tr/NZYJJb9f0FO/+E++mqumKCbc30i8CPA19Nd1PuDwDfrqrr+scb6M59nFtV10+47RuAK+iGxv7Dqrqd7rzIDXRXK60Hfreq/nFnG+iP6Z8AvDPJ9Kidz5+l3SuAQ6dPAgNn9Cedb6AbcXI53IFOu8nRQKVFkmQK+Leqevdi16I2uQcgSY1yD0CSGuUegCQ1ygCQpEYZAJLUKANAkhplAEhSo/4/d73cO7GwC6cAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "F-pcLVhVXz-c",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 300
},
"outputId": "e184357e-4bd3-4a6f-ba10-e54bb30c8042"
},
"source": [
"plt.hist(model_nmf.fit_transform(tfidf_vectorizer.fit_transform(cleaned_text)))\n",
"plt.xlabel('model nmf fit transorm')\n",
"plt.ylabel('Frequency')\n",
"plt.show"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<function matplotlib.pyplot.show>"
]
},
"metadata": {
"tags": []
},
"execution_count": 70
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEJCAYAAACT/UyFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAT5UlEQVR4nO3dfZQldX3n8fdHIItENOKMhCjY6iECRyKSkcQ1KA8mi0+gESEsKuxhmZhgTjyarMS4OrubnMV1wY05RMXAGeKiC4aIGEhEAUU3CziDIwOMxqchCyKMhhXwiafv/nGrJ5ehZ6ammbp3un/v1zn39K9+VXXr29Xdn65bt+6vUlVIktrxuGkXIEmaLINfkhpj8EtSYwx+SWqMwS9JjTH4JakxgwV/kr2TXJ3kliQ3J/n9rn9FktuTrOkeLx+qBknSo2Wo6/iT7AXsVVU3JNkdWA28GjgOuK+q/vsgG5YkbdHOQz1xVd0B3NG1702yDnjafJ5ryZIlNTMzsx2rk6TFb/Xq1d+rqqWb9g8W/OOSzADPB64DXgS8OckbgVXA26rq7i2tPzMzw6pVq4YuU5IWlSS3ztU/+Ju7SZ4AXAy8paruAT4APBs4iNErgjM3s97yJKuSrNqwYcPQZUpSMwYN/iS7MAr9C6rqbwCq6s6qeqiqHgY+DBwy17pVdU5VLauqZUuXPuqViiRpnoa8qifAucC6qjprrH+vscVeA9w0VA2SpEcb8hz/i4A3AGuTrOn63gGckOQgoID1wG8PWIMkaRNDXtXzRSBzzLp8qG1KkrbOT+5KUmMMfklqjMEvSY0x+CWpMRP55O5CddvpX9jYfvoZh06xEknafjzil6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMW1dx7/iSQAc+Mx9NnatPWktAGe/6aqNfad98IhHr7pixSO+StJC5RG/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTFtfYBrO7jyqmdvbJ+YiwH47uEHTascSdpmHvFLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGtN88K/bb3/W7bf/I/rOPP6VnHn8K6dUkSQNq/ngl6TWGPyS1BiDX5IaM1jwJ9k7ydVJbklyc5Lf7/r3SPKZJF/vvj55qBokSY825BH/g8DbquoA4FeB05IcAJwOXFlV+wJXdtOSpAkZLPir6o6quqFr3wusA54GHAOc3y12PvDqoWqQJD3aRM7xJ5kBng9cB+xZVXd0s74L7DmJGiRJI4MHf5InABcDb6mqe8bnVVUBtZn1lidZlWTVhg0bhi5TkpoxaPAn2YVR6F9QVX/Tdd+ZZK9u/l7AXXOtW1XnVNWyqlq2dOnSIcuUpKYMeVVPgHOBdVV11tisS4GTuvZJwCeHqkGS9GhD3nP3RcAbgLVJ1nR97wDOAC5KcgpwK3DcgDVIkjYxWPBX1ReBbGb2kUNtV5K0ZX5yV5IaY/BLUmMMfklqjMEvSY0Z8qqeHcLM6ZdtbK/fdYqFSNIOwiN+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGDBb8Sc5LcleSm8b6ViS5Pcma7vHyobYvSZpbr+BPcuA8nnslcNQc/e+rqoO6x+XzeF5J0mPQ94j/L5Jcn+R3kzypzwpVdQ3wz/MvTZI0hF7BX1WHAicCewOrk3w0ya/Pc5tvTnJjdyroyfN8DknSPPU+x19VXwfeCbwdeAnw/iRfTfKb27C9DwDPBg4C7gDO3NyCSZYnWZVk1YYNG7ZhE5KkLel7jv+XkrwPWAccAbyqqvbv2u/ru7GqurOqHqqqh4EPA4dsYdlzqmpZVS1bunRp301Ikrai7xH/nwM3AM+rqtOq6gaAqvoOo1cBvSTZa2zyNcBNm1tWkjSMnXsu9wrgx1X1EECSxwG7VtWPquojc62Q5GPAYcCSJLcB7wYOS3IQUMB64LcfW/mSpG3VN/g/C7wUuK+b3g24AvjXm1uhqk6Yo/vcbapOkrTd9T3Vs2tVzYY+XXu3YUqSJA2pb/D/MMnBsxNJfhn48TAlSZKG1PdUz1uAjyf5DhDg54HjB6tKkjSYXsFfVV9Ksh/wnK7ra1X1wHBlSZKG0veIH+AFwEy3zsFJqKq/GqQqSdJgegV/ko8w+sTtGuChrrsAg1+SFpi+R/zLgAOqqoYsRpI0vL5X9dzE6A1dSdIC1/eIfwlwS5LrgZ/OdlbV0YNUJUkaTN/gXzFkEZKkyel7OefnkzwD2LeqPptkN2CnYUuTJA2h77DMpwJ/DXyo63oacMlQRUmShtP3zd3TgBcB98DGm7I8daiiJEnD6Rv8P62q+2cnkuzM6Dp+SdIC0zf4P5/kHcDju3vtfhz41HBlSZKG0jf4Twc2AGsZ3TzlcrbhzluSpB1H36t6Zu+R++Fhy5EkDa3vWD3fZo5z+lX1rO1ekSRpUNsyVs+sXYHXAXts/3IkSUPrdY6/qr4/9ri9qv4HoxuwS5IWmL6neg4em3wco1cA2zKWvyRpB9E3vM8caz8IrAeO2+7VSJIG1/eqnsOHLkSSNBl9T/W8dUvzq+qs7VOOJGlo23JVzwuAS7vpVwHXA18foihJ0nD6Bv/TgYOr6l6AJCuAy6rq9UMVJkkaRt8hG/YE7h+bvr/rkyQtMH2P+P8KuD7JJ7rpVwPnD1OSJGlIfa/q+dMkfwcc2nX9u6r68nBlSZKG0vdUD8BuwD1V9WfAbUmeOVBNkqQB9b314ruBtwN/1HXtAvzPoYqSJA2n7xH/a4CjgR8CVNV3gN2HKkqSNJy+wX9/VRXd0MxJfna4kiRJQ+ob/Bcl+RDwc0lOBT6LN2WRpAVpq1f1JAlwIbAfcA/wHOBdVfWZgWuTJA1gq8FfVZXk8qo6EOgd9knOA14J3FVVz+369mD0T2SGboTPqrp7HnVLkuap76meG5K8YBufeyVw1CZ9pwNXVtW+wJXdtCRpgvoG/68A1yb5ZpIbk6xNcuOWVqiqa4B/3qT7GP7lE7/nM/oEsCRpgrZ4qifJPlX1T8C/2U7b27Oq7uja38XxfiRp4rZ2jv8SRqNy3prk4qp67fbacPfeQW1ufpLlwHKAffbZZ3ttVpKat7VTPRlrP2s7bO/OJHsBdF/v2tyCVXVOVS2rqmVLly7dDpuWJMHWg782056vS4GTuvZJwCe3w3NKkrbB1k71PC/JPYyO/B/ftemmq6qeuLkVk3wMOAxYkuQ24N3AGYw+DHYKcCvesF2SJm6LwV9VO833iavqhM3MOnK+zylJeuy2ZVhmSdIiYPBLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1Jjdp7GRpOsB+4FHgIerKpl06hDklo0leDvHF5V35vi9iWpSZ7qkaTGTCv4C7giyeoky6dUgyQ1aVqnen6tqm5P8lTgM0m+WlXXjC/Q/UNYDrDPPvtMo0ZJWpSmcsRfVbd3X+8CPgEcMscy51TVsqpatnTp0kmXKEmL1sSDP8nPJtl9tg38BnDTpOuQpFZN41TPnsAnksxu/6NV9fdTqEOSmjTx4K+qbwHPm/R2JUkjXs4pSY0x+CWpMQa/JDXG4JcWiZnTL2Pm9MumXYYWAINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGrPztAtQf1de9eyN7RNzMQDfPfygaZWzQxq/A9X6M14xxUo0a91++29s7//VdRPZ5oHnHwjA2pPWbuw7+01XAXDaB4/Y4rqzf2dHHvHN/htc8aSx9g/6rzclHvFLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYr+OfsNnriwEu+q8PAnDVYWdv7PvJ3WcB8LYL/3Zj34oVKwA49MXz3OjsNcYTur54PtfSn3n8K4FHft9z+fmr1wDz//zC7DXl49eT9932YzXXtucy+/Oe/TqUSX3fW9r28c98+8a+p59x6MTruO30L0xt27PGf86z7dnfcxjmszoe8UtSYwx+SWqMwS9JjTH4JakxUwn+JEcl+VqSbyQ5fRo1SFKrJh78SXYCzgZeBhwAnJDkgEnXIUmtmsYR/yHAN6rqW1V1P/C/gGOmUIckNWkawf804P+OTd/W9UmSJiBVNdkNJscCR1XVv++m3wD8SlW9eZPllgPLu8nnAF/bhs0sAb63HcpdyNwH7oNZ7od298Ezqmrppp3T+OTu7cDeY9NP7/oeoarOAc6ZzwaSrKqqZfMrb3FwH7gPZrkf3Aebmsapni8B+yZ5ZpKfAX4LuHQKdUhSkyZ+xF9VDyZ5M/BpYCfgvKq6edJ1SFKrpjJIW1VdDlw+4CbmdYpokXEfuA9muR/cB48w8Td3JUnT5ZANktSYBR38Wxv6Icm/SnJhN/+6JDOTr3JYPfbBW5PckuTGJFcmecY06hxS3yFAkrw2SSVZdFd39NkHSY7rfhduTvLRSdc4CT3+HvZJcnWSL3d/Ey+fRp1TV1UL8sHojeFvAs8Cfgb4CnDAJsv8LvDBrv1bwIXTrnsK++BwYLeu/Tst7oNuud2Ba4BrgWXTrnsKvwf7Al8GntxNP3XadU9pP5wD/E7XPgBYP+26p/FYyEf8fYZ+OAY4v2v/NXBkkkywxqFtdR9U1dVV9aNu8lpGn5tYTPoOAfJfgPcAP5lkcRPSZx+cCpxdVXcDVNVdE65xEvrshwKe2LWfBHxngvXtMBZy8PcZ+mHjMlX1IPAD4CkTqW4ytnX4i1OAvxu0osnb6j5IcjCwd1VdxuLU5/fgF4FfTPK/k1yb5KiJVTc5ffbDCuD1SW5jdGXh702mtB2L99xtRJLXA8uAl0y7lklK8jjgLODkKZcybTszOt1zGKNXfdckObCq/t9Uq5q8E4CVVXVmkhcCH0ny3Kp6eNqFTdJCPuLvM/TDxmWS7Mzopd33J1LdZPQa/iLJS4E/Bo6uqp9OqLZJ2do+2B14LvC5JOuBXwUuXWRv8Pb5PbgNuLSqHqiqbwP/yOgfwWLSZz+cAlwEUFX/B9iV0Tg+TVnIwd9n6IdLgZO69rHAVdW9q7NIbHUfJHk+8CFGob8Yz+tucR9U1Q+qaklVzVTVDKP3OY6uqlXTKXcQff4WLmF0tE+SJYxO/XxrkkVOQJ/98E/AkQBJ9mcU/BsmWuUOYMEGf3fOfnboh3XARVV1c5L/nOTobrFzgack+QbwVmBR3e2r5z54L/AE4ONJ1iRZVOMi9dwHi1rPffBp4PtJbgGuBv6wqhbTq9++++FtwKlJvgJ8DDh5kR0M9uIndyWpMQv2iF+SND8GvyQ1xuCXpMYY/JLUGINfkhpj8GtBSLK+u/78MS0zz22/Lsm6JFfPMe+93WiX703ypiRv7PpPTvILm3m+zc6TJsEhG6StOwU4taq+OMe85cAeVfXQJv0nAzcx9yBgm52XZKc5nmswk96edgwe8WsQSWaSfDXJyiT/mOSCJC/tBgn7epJDuuX2SHJJNzb6tUl+qet/SpIruqPpvwQy9tyvT3J994G0DyXZaSu13JfkT5N8pdvGnl3/yiQf6Pq+leSwJOd1R/cru2XeBfwacG6S927yvJcy+nDc6iTHJ1mR5A+SHMtoXKQLuhofP7bOo+Z1r1Tek+QG4HVJTk3ypa7ei5PsNlbv+5P8Q1fvsV3/Xkmu6Z7vpiSHdv0nJFnb9b1nk/1xZvchphd207OvXD6b5JAkn+u20cSH4Joz7XGhfSzOBzADPAgcyOgAYzVwHqMAPwa4pFvuz4F3d+0jgDVd+/3Au7r2KxgNp7sE2B/4FLBLN+8vgDd27fXAkjlqKeBVXfu/Ae/s2isZDd07W9M9m9R7ULfc59jMGP7AfWPtFcAf9FjnEfO6uv/D2PRTxtp/AvzeWL0f7+o7gNEQxDD6NOofd+2dGI1P9AuMhidYyuiV/VXAq8f2x3Gb7J+Xde1PAFcAuwDPm/15+FhcD0/1aEjfrqq1AEluBq6sqkqyltE/BhgdTb8WoKqu6o70nwi8GPjNrv+yJHd3yx8J/DLwpYxurfB4YGtjEN0P/G3XXg38+ti8T43VdOcm9c4Aa+bzjc/DhWPt5yb5E+DnGL2i+PTYvEtqNJLkLbOvXBiNUXNekl26+WuSHAF8rqo2ACS5gNE+vQR4CLh47DnvB/6+a68FflpVD2zyc9IiYvBrSOMjgT48Nv0w8//dC3B+Vf3RNqzzQFXNjk3y0CbbHq9p03on+ffxw7H2SkZH519JcjLd4Gqd8RoDUFXXJHkxo1dGK5OcxejeE5vzk3rkef3x/bNxP1TVwxmNaqtFxnP8mrYvACcCJDkM+F5V3cPoNon/tut/GfDkbvkrgWOTPLWbt0d2zPsI38volMu2zqObd0d3BH/i1jbUff93VtWHgb8EDgauB16SZEn3HsgJwOe3oX4tYv4317StYHSa4kbgR/zLMNr/CfhYd8rlHxidr6aqbknyTuCKjG6y8gBwGnDrpAvfipXAB5P8GHhhVf14c/PmWPc/AtcxGi74Orb8TwJGrwj+MMkDwH2M3vO4I6ObjV/N6JXBZVX1yfl/O1pMHJ1TkhrjqR5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSY/4/aNj79QVqXH8AAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "0iFlXuGXfNmz",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 570
},
"outputId": "3e330fc4-8751-4238-b495-d6bd7a63a9d3"
},
"source": [
"# Fit the NMF model\n",
"print(\"Fitting the NMF model (generalized Kullback-Leibler divergence) with \"\n",
" \"tf-idf features, n_samples=%d and n_features=%d...\"\n",
" % (n_samples, n_features))\n",
"t0 = time()\n",
"nmf = NMF(n_components=n_components, random_state=1,\n",
" beta_loss='kullback-leibler', solver='mu', max_iter=1000, alpha=.1,\n",
" l1_ratio=.5).fit(tfidf)\n",
"print(\"done in %0.3fs.\" % (time() - t0))\n",
"\n",
"print(\"\\nTopics in NMF model (generalized Kullback-Leibler divergence):\")\n",
"tfidf_feature_names = tfidf_vectorizer.get_feature_names()\n",
"print_top_words(nmf, tfidf_feature_names, n_top_words)\n",
"\n",
"print(\"Fitting LDA models with tf features, \"\n",
" \"n_samples=%d and n_features=%d...\"\n",
" % (n_samples, n_features))\n",
"lda = LatentDirichletAllocation(n_components=n_components, max_iter=5,\n",
" learning_method='online',\n",
" learning_offset=50.,\n",
" random_state=0)\n",
"t0 = time()\n",
"lda.fit(tf)\n",
"print(\"done in %0.3fs.\" % (time() - t0))\n",
"\n",
"print(\"\\nTopics in LDA model:\")\n",
"tf_feature_names = tf_vectorizer.get_feature_names()\n",
"print_top_words(lda, tf_feature_names, n_top_words)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Fitting the NMF model (generalized Kullback-Leibler divergence) with tf-idf features, n_samples=2000 and n_features=1000...\n",
"done in 0.093s.\n",
"\n",
"Topics in NMF model (generalized Kullback-Leibler divergence):\n",
"Topic #0: diari add user high applic chang color continu creat degre form ha hour use includ level new open oper postcard\n",
"Topic #1: use high applic oper abov degre seven continu level form add chang color creat diari user ha includ new open\n",
"Topic #2: chang postcard color user high add applic continu creat degre diari form ha hour use includ level new open oper\n",
"Topic #3: creat ha diari new user high add applic chang color continu degre form hour use includ level open oper postcard\n",
"Topic #4: open diari user high add applic chang color continu creat degre form ha hour use includ level new oper postcard\n",
"Topic #5: applic thi second user ha add chang color continu creat degre diari form hour high use includ level new open\n",
"Topic #6: save abov degre seven user oper use applic second diari add chang color continu creat thi form postcard ha high\n",
"Topic #7: level high includ user form ha add applic chang color continu creat degre diari hour use new open oper postcard\n",
"Topic #8: hour continu level seven ha oper applic add chang color creat degre diari form user high use includ new open\n",
"Topic #9: form postcard user high add applic chang color continu creat degre diari ha hour use includ level new open oper\n",
"\n",
"Fitting LDA models with tf features, n_samples=2000 and n_features=1000...\n",
"done in 0.042s.\n",
"\n",
"Topics in LDA model:\n",
"Topic #0: applic high level oper abov ha thi seven continu degre second use open diari hour add creat includ form user\n",
"Topic #1: use second level includ continu seven hour creat new color postcard user applic oper diari abov degre open form high\n",
"Topic #2: ha seven high includ chang save level add degre new use continu open applic postcard color hour abov diari second\n",
"Topic #3: diari user add open second hour level save creat thi abov color chang degre form includ ha use continu oper\n",
"Topic #4: user diari ha creat applic high postcard new use save seven degre form abov thi level second add continu open\n",
"Topic #5: second thi new applic open color chang ha save hour user oper high diari postcard abov includ level add creat\n",
"Topic #6: open form color high includ save creat continu seven add postcard chang degre thi applic second new level use ha\n",
"Topic #7: color postcard user chang new save level creat add diari form degre applic hour continu use second seven includ ha\n",
"Topic #8: thi diari level add second open degre save form user use high ha new hour abov creat continu color applic\n",
"Topic #9: high level includ applic thi user use chang form second new seven continu add degre ha hour open creat postcard\n",
"\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "04kLtbmIWVUl",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 214
},
"outputId": "d775844b-9b7b-45c9-da91-429d97766717"
},
"source": [
"model_lda = lda\n",
"feature_names = tfidf_feature_names\n",
"n_top_words = n_top_words\n",
"\n",
"for topic_idx, topic in enumerate(model_lda.components_):\n",
" message = \"Topic #%d: \" % topic_idx\n",
" message += \" \".join([feature_names[i]\n",
" for i in topic.argsort()[:-n_top_words - 1:-1]])\n",
" print(message)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Topic #0: applic high level oper abov ha thi seven continu degre second use open diari hour add creat includ form user\n",
"Topic #1: use second level includ continu seven hour creat new color postcard user applic oper diari abov degre open form high\n",
"Topic #2: ha seven high includ chang save level add degre new use continu open applic postcard color hour abov diari second\n",
"Topic #3: diari user add open second hour level save creat thi abov color chang degre form includ ha use continu oper\n",
"Topic #4: user diari ha creat applic high postcard new use save seven degre form abov thi level second add continu open\n",
"Topic #5: second thi new applic open color chang ha save hour user oper high diari postcard abov includ level add creat\n",
"Topic #6: open form color high includ save creat continu seven add postcard chang degre thi applic second new level use ha\n",
"Topic #7: color postcard user chang new save level creat add diari form degre applic hour continu use second seven includ ha\n",
"Topic #8: thi diari level add second open degre save form user use high ha new hour abov creat continu color applic\n",
"Topic #9: high level includ applic thi user use chang form second new seven continu add degre ha hour open creat postcard\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "kWpWWELtXrSs",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "3693fc03-56b8-4ca5-e029-10fd78ceee12"
},
"source": [
"plt.hist(model_lda.components_)\n",
"plt.xlabel('Model LDA Components')\n",
"plt.ylabel('Frequency')\n",
"plt.show"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<function matplotlib.pyplot.show>"
]
},
"metadata": {
"tags": []
},
"execution_count": 73
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAEGCAYAAABvtY4XAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAATNUlEQVR4nO3de7SmdV338fcHZpQZ1DGbXSigW1k8AjkYNB7STB/NlaeUEh/gAXtwrSQqS8tWjeaSbafHVmFpoYlEjYcMREQSNJRB43lKcICR02SwRG2Ucmslisbx2x/Xtcc92z2z75m9r3348X6tda91nX/fa1+zP3Pt333dvztVhSSpPfstdQGSpGEY8JLUKANekhplwEtSowx4SWrUqqUuYLr169fX+Pj4UpchSSvGNddc87WqGptt3bIK+PHxcbZu3brUZUjSipHki7tbZxeNJDXKgJekRhnwktQoA16SGmXAS1KjDHhJapQBL0mNMuAlqVEGvCQ1qq2An1i3y+yZJ7yIHZuunHXTDZs3sP2II3duBzAxMQHAQVdsG65GSVokbQW8JGknA16SGmXAS1KjDHhJapQBL0mNMuAlqVEGvCQ1yoCXpEYZ8JLUKANekhplwEtSowx4SWqUAS9JjTLgJalRBrwkNcqAl6RGGfCS1CgDXpIaZcBLUqMGDfgkv5rkpiQ3Jnl/kgOGbE+S9F2DBXySg4FfATZW1ROA/YETh2pPkrSrobtoVgFrkqwC1gJfGbg9SVJvsICvqi8DfwR8Cbgd+EZVXTZzuySnJdmaZOvk5OQ+tze+6ZKd09uPOJKzTt+yc35iYoLLtxw20nGmtpt+PElaiYbsovk+4CXAY4FHAQcmOWXmdlV1dlVtrKqNY2NjQ5UjSQ84Q3bR/ARwW1VNVtU9wIXA0wZsT5I0zZAB/yXgqUnWJgnwHGD7gO1JkqYZsg/+KuAC4Frghr6ts4dqT5K0q1VDHryqzgDOGLINSdLs/CSrJDXKgJekRhnwktQoA16SGmXAS1KjDHhJapQBL0mNMuAlqVEGvCQ1yoCXpEYZ8JLUKANekhplwEtSowx4SWqUAS9JjTLgJalRBrwkNaq5gN+wecNu1x10xTbGN10CE+t2Ljvr9C0A7Nh05ffuMLFuj8eTpOWsuYCXJHUMeElqlAEvSY0y4CWpUQa8JDXKgJekRhnwktQoA16SGmXAS1KjDHhJapQBL0mNMuAlqVEGvCQ1yoCXpEYZ8JLUKANekhplwEtSowx4SWqUAS9JjRo04JM8PMkFSf4pyfYkPzpke5Kk71o18PHfCnysqo5P8iBg7cDtSZJ6gwV8knXAjwOnAlTV3cDdQ7UnSdrVkF00jwUmgb9Mcl2Sc5IcOHOjJKcl2Zpk6+Tk5IDl7LvtRxzJWadvWeoyJGmvDBnwq4BjgXdU1THAncCmmRtV1dlVtbGqNo6NjQ1YjiQ9sAwZ8DuAHVV1VT9/AV3gS5IWwWABX1X/CvxLksf3i54D3DxUe5KkXQ39FM0vA+/rn6D5PPCKgduTJPUGDfiq2gZsHLINSdLsRuqiSbJh6EIkSQtr1D74tye5Oskv9s+3S5KWuZECvqqeAZwMHApck+Svkzx30MokSfMy8lM0VXUL8AbgN4FnAm/rx5j5maGKkyTtu1H74I9O8sfAduDZwE9V1ZH99B8PWJ8kaR+N+hTNnwLnAK+vqu9MLayqryR5wyCVSZLmZdSAfyHwnaq6DyDJfsABVfXtqnrPYNVJkvbZqH3wnwDWTJtf2y+TJC1Towb8AVX1ramZftqx3SVpGRs14O9MsnOgsCQ/AnxnD9tLkpbYqH3wrwE+kOQrQICDgBMGq0qSNG8jBXxVfSbJEcDUyJCfq6p7hitLkjRfezPY2JOA8X6fY5NQVe8epCpJ0ryNFPBJ3gMcBmwD7usXF2DAS9IyNeod/EbgqKqqIYuRJC2cUZ+iuZHujVVJ0gox6h38euDmJFcDd00trKoXD1KVJGneRg34iSGLkCQtvFEfk/xUkscAh1fVJ5KsBfYftjRJ0nyMOlzwK4ELgHf2iw4GLhqqKEnS/I36JusvAU8H7oCdX/7xA0MVJUmav1ED/q6quntqJskquufgJUnL1KgB/6kkrwfW9N/F+gHgb4crS5I0X6MG/CZgErgB+HngUrrvZ5UkLVOjPkVzP/Cu/iVJWgFGHYvmNmbpc6+qxy14RZKkBbE3Y9FMOQB4GfCIhS9HkrRQRuqDr6qvT3t9uar+hO6LuCVJy9SoXTTHTpvdj+6Ofm/GkpckLbJRQ/rMadP3Al8A/teCVyNJWjCjPkXzP4cuRJK0sEbtovm1Pa2vqrcsTDmSpIWyN0/RPAm4uJ//KeBq4JYhipIkzd+oAX8IcGxVfRMgyQRwSVWdMlRhkqT5GXWogh8E7p42f3e/TJK0TI16B/9u4OokH+rnjwM2D1OSJGkhjPoUze8l+SjwjH7RK6rquuHKkiTN16hdNABrgTuq6q3AjiSPHWWnJPsnuS7JR/apQknSPhn1K/vOAH4TeF2/aDXw3hHbeDWwfe9LkyTNx6h38D8NvBi4E6CqvgI8dK6dkhxCN2bNOftaoCRp34wa8HdXVdEPGZzkwBH3+xPgN4D7d7dBktOSbE2ydXJycsTDSpLmMmrAn5/kncDDk7wS+ARzfPlHkhcBX62qa/a0XVWdXVUbq2rj2NjYiOVIkuYy51M0SQKcBxwB3AE8HnhjVX18jl2fDrw4yQvoxpB/WJL3+uEoSVoccwZ8VVWSS6tqAzBXqE/f73X0b8omeRbw64a7JC2eUbtork3ypEErkSQtqFE/yfoU4JQkX6B7kiZ0N/dHj7JzVX0S+OQ+1CdJ2kd7DPgkj66qLwE/uUj1SJIWyFx38BfRjSL5xSQfrKqXLkZRkqT5m6sPPtOmHzdkIZKkhTVXwNdupiVJy9xcXTRPTHIH3Z38mn4avvsm68MGrU6StM/2GPBVtf9iFSJJWlh7M1ywJGkFMeAlqVEGvCQ1yoCXpEYZ8JLUKANekhplwEtSowx4SWqUAS9JjTLgJalRBrwkNcqAXyQHXbFt73aYWMeGzRvYfsSRwxQkqXkGvCQ1yoCXpEYZ8JLUKANekhplwEtSowx4SWqUAS9JjTLgJalRBrwkNcqAl6RGGfCS1CgDXpIaZcBLUqMMeElqlAEvSY0y4CWpUQa8JDXKgJekRhnwktSowQI+yaFJrkhyc5Kbkrx6qLYkSd9r1YDHvhd4bVVdm+ShwDVJPl5VNw/YpiSpN9gdfFXdXlXX9tPfBLYDBw/VniRpV4vSB59kHDgGuGqWdacl2Zpk6+Tk5GKUs6gmJiZ2To9vumTWbXZsunKX+d1tN6TLtxw2bAMT63ZOnnX6lmHbkgQsQsAneQjwQeA1VXXHzPVVdXZVbayqjWNjY0OXI0kPGIMGfJLVdOH+vqq6cMi2JEm7GvIpmgB/AWyvqrcM1Y4kaXZD3sE/HXg58Owk2/rXCwZsT5I0zWCPSVbV/wMy1PElSXvmJ1klqVEGvCQ1yoCXpEYZ8JLUKANekhplwEtSowx4SWqUAS9JjTLgJalRBrwkNcqAl6RGGfCS1CgDXpIaZcBLUqMMeElqlAEvSY0y4CWpUQb8AhjfdMnO6e1HHMlZp2/hzBNetNvtN2zeAMBZp28Z6fh72m6q7Q2bN+xse7cm1u2sEeDME17Ejk1XMjExAcBBV2zbZfM5j9ebOp/lYMemK3eZn35tFtTEumV13svB5VsOm9f+o/4+DNH2UprPec/FgJekRhnwktQoA16SGmXAS1KjDHhJapQBL0mNMuAlqVEGvCQ1yoCXpEYZ8JLUKANekhplwEtSowx4SWqUAS9JjTLgJalRBrwkNcqAl6RGGfCS1CgDXpIaNWjAJ3leks8luTXJpiHbkiTtarCAT7I/cBbwfOAo4KQkRw3VniRpV0PewT8ZuLWqPl9VdwN/A7xkwPYkSdOkqoY5cHI88Lyq+rl+/uXAU6rqVTO2Ow04rZ99PPC5QQpaHtYDX1vqIgbmObbBc1w5HlNVY7OtWLXYlcxUVWcDZy91HYshydaq2rjUdQzJc2yD59iGIbtovgwcOm3+kH6ZJGkRDBnwnwEOT/LYJA8CTgQuHrA9SdI0g3XRVNW9SV4F/B2wP3BuVd00VHsrxAOhK8pzbIPn2IDB3mSVJC0tP8kqSY0y4CWpUQb8AktybpKvJrlxN+uT5G398A3XJzl2sWucrxHO8VlJvpFkW/9642LXOF9JDk1yRZKbk9yU5NWzbLNir+WI59fCdTwgydVJPtuf55tm2ebBSc7rr+NVScYXv9KBVJWvBXwBPw4cC9y4m/UvAD4KBHgqcNVS1zzAOT4L+MhS1znPc3wkcGw//VDgn4GjWrmWI55fC9cxwEP66dXAVcBTZ2zzi8Cf99MnAuctdd0L9fIOfoFV1d8D/76HTV4CvLs6nwYenuSRi1PdwhjhHFe8qrq9qq7tp78JbAcOnrHZir2WI57fitdfm2/1s6v718wnS14CbO6nLwCekySLVOKgDPjFdzDwL9Pmd9DgLxbwo/2fxR9N8kNLXcx89H+yH0N39zddE9dyD+cHDVzHJPsn2QZ8Ffh4Ve32OlbVvcA3gO9f3CqHYcBrCNfSjY/xROBPgYuWuJ59luQhwAeB11TVHUtdz0Kb4/yauI5VdV9V/TDdp+mfnOQJS13TYjHgF1/zQzhU1R1TfxZX1aXA6iTrl7isvZZkNV34va+qLpxlkxV9Lec6v1au45Sq+k/gCuB5M1btvI5JVgHrgK8vbnXDMOAX38XAz/ZPYDwV+EZV3b7URS2kJAdN9WEmeTLdv7MV9QvT1/8XwPaqestuNlux13KU82vkOo4leXg/vQZ4LvBPMza7GPg//fTxwJbq33Fd6ZZ8NMnWJHk/3dMH65PsAM6ge2OHqvpz4FK6py9uBb4NvGJpKt13I5zj8cAvJLkX+A5w4gr8hXk68HLghr7/FuD1wKOhiWs5yvm1cB0fCWzuv4BoP+D8qvpIkt8GtlbVxXT/0b0nya10Dw+cuHTlLiyHKpCkRtlFI0mNMuAlqVEGvCQ1yoCXpEYZ8JLUKANe+yxJJXnvtPlVSSaTfGQvj/OFuT5As7ttZlue5NS+juuS3JLk75I8bcY265Pck+T0PbS5Osmb+2Ncm+Qfkzx/b85tuUkynuR/L3UdWhwGvObjTuAJ/QdIoPsQyXL5JOd5VXVMVR0OvBm4MMmR09a/DPg0cNIejvE7dM9RP6GqjgWOoxt5cSUbBwz4BwgDXvN1KfDCfvok4P1TK5I8IslF/Vjpn05ydL/8+5Nc1o/PfQ7dkK5T+5zSj9+9Lck7+w+ozEtVXUH3/ZunTVt8EvBa4OAkh8zcJ8la4JXAL1fVXf1x/q2qzu/Xn5TkhiQ3JvmDaft9K8kf9uf2iSRPTvLJJJ9P8uJ+m1OTfLhffkuSM6bt/2v9MW9M8pp+2XiS7Une1R/3sqn/VJMcluRjSa5JcmWSI/rlf5VurPp/6Ns+vm/izcAz+p/vryb5oWk/7+uTHD7fn7eWkaUer9jXyn0B3wKOphti9QBgG9PGEKcboOqMfvrZwLZ++m3AG/vpF9IN37oeOBL4W2B1v+7twM/2018A1s9Sw/csB04F/mzGsuOAj/bThwK39NO/D7x2luMeDVy3m/N+FPAlYIzu0+BbgOP6dQU8v5/+EHAZ3ad8nzjt/E8FbqcbsXANcCOwEfgR4AbgQOAhwE10ozyOA/cCP9zvfz5wSj99OXB4P/0Uuo/ZA/wV8AG6m7ijgFv75Tuvz7RrdHI//SBgzVL/u/K1cC+HKtC8VNX16YabPYnubn66HwNe2m+3pb9zfxjdF4b8TL/8kiT/0W//HLqQ+0w/BMoauiFeF8L08b1PoAtJgL8BzgXO3ItjPQn4ZFVNAiR5H905XQTcDXys3+4G4K6quifJDXRBPeXjVfX1fv8L6X5WBXyoqu6ctvwZdGOl3FZVU0MKXAOMpxsJ8mnAB/Ld4csfPK2Ni6rqfuDmJD+4m3P5R+C3+r9iLqyqW/bi56BlzoDXQrgY+CO6u8P5jKMdYHNVvW4hiprhGLovtYDuP6ODkpzczz8qyeEzwu1W4NFJHlZ7N0zwPVU1Nf7H/cBU98796UYqnDJzjJC5xgy5a9r0fXT/+e0H/Gd1Q+HOtc+sX2BRVX+d5Cq6v6QuTfLzVbVljlq0QtgHr4VwLvCmqrphxvIrgZOh+35P4Gt9WP49/Rt9/VMp39dvfzlwfJIf6Nc9Islj5ltckmfS9b+/K8n/oPsKt4OraryqxoH/y4w3W6vq23SDUL01yYP644wleRlwNfDM/kmc/ft9P7WXZT23P781dN1H/5/u53VckrVJDgR+ul82q/5neVtf09R3xD5xjna/ybQ3ipM8Dvh8Vb0N+DBd15Qa4R285q2qdtD1q880AZyb5Hq60RanhmR9E/D+JDcB/0DXn01V3ZzkDcBlSfYD7gF+CfjiHCVcn+T+fvp84HrghCQ/BqwFbgNeWlXb+zc0PzRj/w8C5wG/PWP5G4Dfpevi+C+6p4beWFW3J9lEN7Z4gEuq6sNz1DjT1X27hwDvraqt0L052q8DOKeqrsuevwT6ZOAd/c9tNV2X02f3sP31wH1JPkvXT/9g4OVJ7gH+le49CTXC0SSlRZbkVGBjVb1qqWtR2+yikaRGeQcvSY3yDl6SGmXAS1KjDHhJapQBL0mNMuAlqVH/DU8IgE6ViUT0AAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "peB6ZuEMYeuH",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "a2cbb8e4-f148-4343-cc50-f1c18351d088"
},
"source": [
"plt.hist(model_lda.fit_transform(tfidf_vectorizer.fit_transform(cleaned_text)))\n",
"plt.xlabel('MOdel LDA Fit Transform')\n",
"plt.ylabel('Frequency')\n",
"plt.show"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<function matplotlib.pyplot.show>"
]
},
"metadata": {
"tags": []
},
"execution_count": 74
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAUpElEQVR4nO3de7hldX3f8fdH0CJIEcMEqaAHeYCByEUcjK1gwUuCWkWjBaZqSR4javDWWAMxNk6atpICkhtNgkrFO15CRDFGuSjaVnHAkQHGiBdsQS6jjULQisC3f6x1hu3hzJx9hrP2PsPv/Xqe/Zy1fvu39/qeNfA5a//W2r+VqkKS1I6HTLsASdJkGfyS1BiDX5IaY/BLUmMMfklqzPbTLmAcu+22W83MzEy7DEnaplx55ZXfr6oVc9u3ieCfmZlh7dq10y5DkrYpSb47X7tDPZLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1Jht4pu7S2bNLv3PH22x25nH/ysAjt/7lE1te552JACXXLrPprZnPP1bS1ygJA3PI35JaozBL0mNMfglqTEGvyQ1ZrDgT7JXksuSXJfk2iSv79vXJLkpybr+8ZyhapAk3d+QV/XcDbyxqq5KsjNwZZLP9s+dVVVnDLhtSdJmDBb8VXUzcHO/fEeSDcBjhtqeJGk8ExnjTzIDPBH4ct/0miRXJzk3ya6bec1JSdYmWbtx48Ylreeg8w7a9HggHn3ZOh592bolqkqSJmPw4E/yCOBjwBuq6nbgL4B9gEPpPhGcOd/rquqcqlpVVatWrLjfLSMlSVtp0OBP8lC60H9/Vf01QFXdWlX3VNW9wDuAJw9ZgyTp5w15VU+AdwEbqurtI+17jHR7IXDNUDVIku5vyKt6ngq8DFifZHYg/M3A6iSHAgXcALxywBokSXMMeVXPF4HM89SnhtqmJGlhfnNXkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1Jjhrzn7jbl7FddusXn16xZA8CRT5tAMZI0II/4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4JekxgwW/En2SnJZkuuSXJvk9X37o5J8Nsn1/c9dh6pBknR/Qx7x3w28saoOBJ4CnJzkQOBU4JKq2he4pF+XJE3IYMFfVTdX1VX98h3ABuAxwLHAeX2384AXDFWDJOn+JjLGn2QGeCLwZWD3qrq5f+oWYPdJ1CBJ6gw+H3+SRwAfA95QVbcn2fRcVVWS2szrTgJOAnjsYx+71dufOfWiTcs37LDVbyNJDxqDHvEneShd6L+/qv66b741yR7983sAt8332qo6p6pWVdWqFStWDFmmJDVlyKt6ArwL2FBVbx956kLgxH75RODjQ9UgSbq/IYd6ngq8DFifZF3f9mbgNODDSV4OfBc4bsAaJElzDBb8VfVFIJt5+hlDbVeStGV+c1eSGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDVmrOBPctDQhUiSJmPcI/7/luSKJL+VZJdBK5IkDWqs4K+qI4GXAHsBVyb5QJJnDVqZJGkQY4/xV9X1wFuAU4B/Cfxpkq8n+bWhipMkLb1xx/gPTnIWsAF4OvC8qjqgXz5rwPoGt2HlAWxYecC0y5Ckidl+zH5/BrwTeHNV/WS2saq+l+Qtg1QmSRrEuMH/XOAnVXUPQJKHADtU1Y+r6r2DVSdJWnLjjvFfDDx8ZH3Hvk2StI0ZN/h3qKp/nF3pl3ccpiRJ0pDGDf47kxw2u5LkScBPttCfJOcmuS3JNSNta5LclGRd/3jO1pUtSdpa447xvwH4SJLvAQEeDRy/wGveDfw58J457WdV1RmLKVKStHTGCv6q+kqSlcD+fdPfV9XPFnjN5UlmHlh5kqSltphJ2g4HDgYOA1Yn+bdbuc3XJLm6HwradXOdkpyUZG2StRs3btzKTUmS5hr3C1zvBc4AjqD7A3A4sGortvcXwD7AocDNwJmb61hV51TVqqpatWLFiq3YlCRpPuOO8a8CDqyqeiAbq6pbZ5eTvAP45AN5P0nS4o071HMN3QndByTJHiOrL+zfV5I0QeMe8e8GXJfkCuCns41V9fzNvSDJB4GjgN2S3Ai8FTgqyaFAATcAr9y6siVJW2vc4F+z2DeuqtXzNL9rse8jSVpa417O+fkkjwP2raqLk+wIbDdsaZKkIYx7Vc8rgI8Cf9U3PQb4m6GKkiQNZ9yTuycDTwVuh003ZfnFoYqSJA1n3OD/aVXdNbuSZHu6E7SSpG3MuMH/+SRvBh7e32v3I8AnhitLkjSUcYP/VGAjsJ7uEsxP0d1/V5K0jRn3qp57gXf0D0nSNmys4E/yHeYZ06+qxy95RZKkQS1mrp5ZOwD/GnjU0pcjSRraWGP8VfWDkcdNVfXHdDdglyRtY8Yd6jlsZPUhdJ8Axv20IElaRsYN79F58++mm2DtuCWvRpI0uHGv6jl66EIkSZMx7lDPb2/p+ap6+9KUI0ka2mKu6jkcuLBffx5wBXD9EEVJkoYzbvDvCRxWVXcAJFkDXFRVLx2qMEnSMMadsmF34K6R9bv6NknSNmbcI/73AFckuaBffwFw3jAlSZKGNO5VPf85yd8CR/ZNv1FVXx2uLEnSUMYd6gHYEbi9qv4EuDHJ3gPVJEka0Li3XnwrcArwu33TQ4H3DVWUJGk44x7xvxB4PnAnQFV9D9h5qKIkScMZN/jvqqqin5o5yU7DlSRJGtK4wf/hJH8FPDLJK4CL8aYskrRNWvCqniQBzgdWArcD+wO/X1WfHbg2SdIAFgz+qqokn6qqgwDDXpK2ceMO9VyV5PBBK5EkTcS439z9ZeClSW6gu7IndB8GDh6qMEnSMLYY/EkeW1X/G/jVCdUjSRrYQkf8f0M3K+d3k3ysql40iaIkScNZaIw/I8uPH7IQSdJkLBT8tZnlBSU5N8ltSa4ZaXtUks8mub7/ueti3lOS9MAtFPyHJLk9yR3Awf3y7UnuSHL7Aq99N3DMnLZTgUuqal/gkn5dkjRBWxzjr6rttvaNq+ryJDNzmo8FjuqXzwM+Rzf5myRpQhYzLfNS2L2qbu6Xb2ELd/FKclKStUnWbty4cTLVSVIDJh38m4xO+raZ58+pqlVVtWrFihUTrEySHtwmHfy3JtkDoP9524S3L0nNm3TwXwic2C+fCHx8wtuXpOYNFvxJPgj8L2D/JDcmeTlwGvCsJNcDz+zXJUkTNO5cPYtWVas389QzhtqmJGlhUzu5K0maDoNfkhoz2FCPJE3bhpUHbFo+4OsbpljJ8uIRvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhqz/TQ2muQG4A7gHuDuqlo1jTokqUVTCf7e0VX1/SluX5Ka5FCPJDVmWsFfwGeSXJnkpPk6JDkpydokazdu3Djh8iRpYTOnXsTMqRdNu4xFm1bwH1FVhwHPBk5O8rS5HarqnKpaVVWrVqxYMfkKJelBairBX1U39T9vAy4AnjyNOiSpRRMP/iQ7Jdl5dhn4FeCaSdchSa2axlU9uwMXJJnd/geq6tNTqEOSmjTx4K+qbwOHTHq7kqSOl3NKUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL6kJZ7/qUs5+1aXTLmNZMPglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWrMNO65K0nLypo1a+ZdfrBu2yN+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5Ia43X8kga1YeUBm5YP+PqGJX//mVMvAuCG0567pO/76MvWAXDL0Ycu6nUHnXcQAOtPXL+pbfY+ACf/5dOXqLoHxiN+SWqMwS9JjTH4JakxBr8kNWYqwZ/kmCR/n+SbSU6dRg2S1KqJB3+S7YCzgWcDBwKrkxw46TokqVXTOOJ/MvDNqvp2Vd0FfAg4dgp1SFKTUlWT3WDyYuCYqvrNfv1lwC9X1Wvm9DsJOKlf3R/4AfD9Sda6lXbDOpeSdS4t61xay73Ox1XVirmNy/YLXFV1DnDO7HqStVW1aooljcU6l5Z1Li3rXFrbSp1zTWOo5yZgr5H1Pfs2SdIETCP4vwLsm2TvJA8DTgAunEIdktSkiQ/1VNXdSV4D/B2wHXBuVV07xkvPWbjLsmCdS8s6l5Z1Lq1tpc6fM/GTu5Kk6fKbu5LUGINfkhqz7IJ/oekckvyTJOf3z385yczkqxyrzqcluSrJ3f13F6ZijDp/O8l1Sa5OckmSxy3TOl+VZH2SdUm+OK1ve4873UiSFyWpJFO51G+M/fnrSTb2+3Ndkt9cjnX2fY7r/xu9NskHJl1jX8NC+/OskX35jSQ/nEadY6uqZfOgO9n7LeDxwMOArwEHzunzW8Bf9ssnAOcv0zpngIOB9wAvXsb782hgx3751ct4f/7TkeXnA59ejnX2/XYGLge+BKxajnUCvw78+aRr24o69wW+Cuzar//icqxzTv/X0l20MrV9u9BjuR3xjzOdw7HAef3yR4FnJMkEa4Qx6qyqG6rqauDeCdc2apw6L6uqH/erX6L7XsWkjVPn7SOrOwHTuCph3OlG/hD4I+D/TbK4EdvKtCjj1PkK4Oyq+geAqrptwjXC4vfnauCDE6lsKy234H8M8H9G1m/s2+btU1V3Az8CfmEi1c1TQ2++OpeDxdb5cuBvB61ofmPVmeTkJN8C/ivwugnVNmrBOpMcBuxVVRdNsrA5xv13f1E/xPfRJHvN8/zQxqlzP2C/JP8jyZeSHDOx6u4z9v9H/VDp3sClE6hrqy234NeUJHkpsAo4fdq1bE5VnV1V+wCnAG+Zdj1zJXkI8HbgjdOuZQyfAGaq6mDgs9z3KXq52Z5uuOcouiPpdyR55FQr2rITgI9W1T3TLmRLllvwjzOdw6Y+SbYHdqGbwG2StpVpJ8aqM8kzgd8Dnl9VP51QbaMWuz8/BLxg0Irmt1CdOwNPAD6X5AbgKcCFUzjBu+D+rKofjPxbvxN40oRqGzXOv/uNwIVV9bOq+g7wDbo/BJO0mP8+T2CZD/MAy+7k7vbAt+k+Ks2eRPmlOX1O5udP7n54OdY50vfdTO/k7jj784l0J672Xeb/7vuOLD8PWLsc65zT/3NM5+TuOPtzj5HlFwJfWqZ1HgOc1y/vRjfk8gvLrc6+30rgBvovxi7nx9QLmGfnPYfur/q3gN/r2/4j3dEowA7AR4BvAlcAj1+mdR5Od7RyJ90nkmuXaZ0XA7cC6/rHhcu0zj8Bru1rvGxLgTvNOuf0nUrwj7k/39bvz6/1+3PlMq0zdMNn1wHrgROWY539+hrgtGnUt9iHUzZIUmOW2xi/JGlgBr8kNcbgl6TGGPyS1BiDX5IaY/BrSfUzUr5vZH37fhbIT460vaCfKmBDP+PmvF/GSjKT5JoFtjdvny20vzvJd5J8rZ9F8T1J9pzT5wX977FyC9u9Z2Q2xnX99v7nyLb/zTyvOWik///t61iX5OIt/Y5bK8mR/YyW65I8fIhtaNtk8Gup3Qk8YSRonsXItxyTHAKcARxbVQfQzbR5RpKDJ1jjm6rqEGB/upkfL013/+dZq4Ev9j835ydVdejI44aq+hf9czPA/YK/qtbP9qe7z/Sb+vVnzvbpv42+VF4CvK3fxk8W6rzE29YyZvBrCJ8Cntsvz52p8N8D/6W6r9/T/3wb8CaAJE/qj8a/Rvctbfr27ZKcnuQr/aeFVz7QIqtzFnAL8Ox+O48AjqCbsO6Exbxfkn/sF08DjuyPtP/dGK/7XJI/TrIWeH2S56W718RXk1ycZPe+35ok5/b9v53kdX37Tkku6vfbNUmO7+fXPw74wyTvT+f0/vn1SY7vX3tUki8kuRC4rl//fJKP99s4LclLklzRv26fxewTLU8Gv4bwIeCEJDvQ3ZPgyyPP/RJw5Zz+a/t2gP8OvLY/Ih/1cuBHVXU43beiX5Fk7yWq9yq6r9tDN93up6vqG8APkmxuDpuHjwzbXDDnuVOBL/RH2meNWcPDqmpVVZ1J92njKVX1RLp9+Tsj/VYCv0o3VfBbkzyUblqD71XVIVX1hL7+d3Lfp4qXAL8GHAocAjwTOD3JHv17Hga8vqr269cPAV4FHAC8DNivqp5MN6fPa8f8fbSM+dFOS66qrk53Z7TVdEf/Y+lnXXxkVV3eN72X/kgc+BXg4Nx3N7Nd6Cbr+sYSlDx6P4fVdNNDQBe6q7n/Hyroh3qWYNuzzh9Z3hM4vw/mhwHfGXnuouomV/tpktuA3emmMjgzyR8Bn6yqL8zz/kcAH6xu1shbk3ye7g/o7cAVs5/Ael+pqpsB0k2D/Zm+fT3djXu0jfOIX0O5kG4sf+5Mhddx/5kgn0Q3b8yWhO6TwOyY+t5V9ZkFXjOuJwIbkjwKeDrwzn52zTcBxyUTudHPnSPLf0Z3d6yDgFfSzU81a3T21HuA7ftPJ4fRBfN/SvL7D2Dbc7dx78j6vXiw+KBg8Gso5wJ/UFXr57SfAfxu/4mA/uebgTOr6ofAD5Mc0fd9ycjr/g54dT+0QZL9kuz0QArsx71fB+wBfBp4MfDeqnpcVc1U1V50R9tHLvKt76Cbonlr7cJ9J8RPXKhzkn8G/Liq3kd3P4XD5un2BeD4/lzJCuBpdJMcqkH+9dYgqupG4E/naV+X5BTgE32I/wz4napa13f5DeDcJMV9QwzQjS/PAFf1R+AbWXhO/v2T3DiyPnui9fQk/wHYke52k0dX1V1JVtPdMnHUx+iGey5nfFcD9/QnqN+9iHH+WWuAjyT5B7o7OS10LuMgut/pXrr9+ep5+lwA/HO62TiLbp/fsqVLVvXg5eycktQYh3okqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWrM/wfqFIoRG0MWCAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "wfWD2YJzWgID"
},
"source": [
"# len(model.components_)\n",
"# len(data_samples)\n",
"# len(model.fit_transform(vectorizer.fit_transform(cleaned_text)))\n",
"# len(model.fit_transform(vectorizer.fit_transform(model.components_)))\n",
"# len(model.fit_transform(vectorizer.fit_transform(data_samples)))\n",
"\n",
"# len(nmf.components_)\n",
"# len(model.fit_transform(nmf.components_))"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "syVx1hqKVKh5"
},
"source": [
"### Topic Visualisation"
]
},
{
"cell_type": "code",
"metadata": {
"id": "GxypzGJcVNNI",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 385
},
"outputId": "a38d1034-e8a4-4d92-fe8b-088d4eaecb72"
},
"source": [
"import umap\n",
"\n",
"print(\"Loading Topic Visualisation...\")\n",
"t0 = time()\n",
"\n",
"topic_lda = tf\n",
"embedding = umap.UMAP(n_neighbors=3, min_dist=0.1, random_state=4).fit_transform(topic_lda)\n",
"plt.figure(figsize=(7,5))\n",
"plt.scatter(embedding[:, 0], embedding[:, 1], c = None, s = 50 ,edgecolors=None)\n",
"plt.title('Topic LDA Visualization')\n",
"plt.xlabel('widht')\n",
"plt.ylabel('Height')\n",
"plt.show()\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Topic Visualisation...\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAbAAAAFNCAYAAABhQjrtAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAf+klEQVR4nO3dfXRcd33n8c8n40hEckLwAwE5mKF0T1hK7QRk89SwkJjHCihbiqFAsbfdlB6eskvLFkN5SuHQbraEwiltCtihCQQ2wC6VCZvwnBweFDnEBuy0PQQ5JEqI7RBiSVSOlO/+ce84sjySRtLcuffOvF/n6Hg0d2bu91rW/fj3u7/7+zkiBABA2ZySdwEAACwFAQYAKCUCDABQSgQYAKCUCDAAQCkRYACAUiLA0JZs/73tv8i7juWwvd72mO1Khvt4tu07Znz/Y9vPzmA/Y7Z/rdmfi85GgCF36cmt9vWg7V/N+P7VS/nMiHh9RFyyhFpGbG+p8/yz09pqdd1h+3O2N9V57ePS135snv08zPZ9ti+os+1Dtq+JiNsjYmVETC/2OJYqIn4jIr65nM+w/U3bfzTrc1dGxG3LKg6YhQBD7tKT28qIWCnpdkkvnvHcVXnXN8NoWuPpkp4m6VZJN9i+cNbr/kDSLyRttd1d74Mi4t8lfTZ97XFpa+tVkq5ocu1A2yHAUFi2u21fZns0/bqsFgi1ri/bO2wfTltOr57x3l22/3LG9y+1fYvt+23/xPYLllpXJO6IiHdJ+rikv5qxHysJpXdKekDSi+f5qCsk/a7tnhnPPV/J7+W1tqu2w/aK9LO32b7N9lHbP60dr+332L5yRg2z37fd9oH0fbfZ/uO5CprZAk1biLUW53j6mVXbj7A9aPuQ7V+kj89O3/N+SedL+mj6vo+mz4ftX08fP9z2p9L3H7T9TtunzDjGG21fmn72T22/sIEfCzoQAYYie4eSls65kjZK2qwkGGoeJWmNpHWSXifpctvnzP4Q25slfUrSn0k6U9KzJI00qcYvSHqy7d70+9+SdLakqyV9Lq2rroj4jqS7JP3nGU+/VtKnI2Jq5mvTz/9bSS+MiNMlPUPSLQ3WeI+kAUlnSNou6UO2n7zQmyLizBkt4w9LukHSnUrOGzslPVbSekm/kvTR9D3vSF/3xvS9b6zz0R+R9HBJvybpPykJ/O0ztj9V0r8o+dn+taRPpP8xAE5AgKHIXi3pfRFxT0QckvReJSf4mf4iIiYj4luSdkt6RZ3P+UNJn4yI6yPiwYi4MyJubVKNo5KsJBilJLCujYhfSPq0pBfYfuQ87/+U0m5E22dIeqnm7j58UNKTbJ8WEXdFxI8bKTAidkfET9KW47ckXaekldQQ21sl/b6k342IByLiSER8PiImIuKopPcrCaJGPqsi6ZWS3h4RRyNiRNL/0ok/14MR8Y/ptb8rJD1a0lmN1ovOQYChyPokHZzx/cH0uZpfRMT4PNtrHiPpJ80vT1LS+gtJ99k+TdLvSbpKkiLiu0qu6f3+PO//J0nPsd0n6eWSfhIRP5j9ovQ4t0p6vaS7bO+2/YRGCrT9Qtvfs32v7fskvUhJ66aR956npHX1svQ/EbLdY/sf0u6/+yV9W9KZbmy05BpJp+rkn+u6Gd/fXXsQERPpw5WN1IvOQoChyEaVdFPVrE+fq3nEjK67ettrfibp8c0vT5L0Mkk3pwHzMiXddH9n+27bd+uh7s26IuKgki631yhphcw5eCMi/l9EPFdJi+RWSf+YbhqXNPM62qNqD9Jrhp+XdKmksyLiTElfVtJqnFfacvw/kt4wK1TfKukcSU+NiDOUdMlqxmfOt8TFYSXXBmf/XO9cqB5gNgIMRfYZSe+0vdb2GknvknTlrNe813aX7fOVXOf533U+5xOSttu+0PYpttct0Ho5NR3mXvtaMXOjE+tsv1vSH0nakW56naRPSvpNJdftzpX0TEkbbf/mPPu7QtIb09fWHXVp+6x0IEqvpElJY0q6FKXkWtiznNw39nBJb5/x1i5J3ZIOSZpKB0Q8b55aavtbIekaSVdGxOdmbT5dyXWv+2yvkvTuWdt/ruT61knSbsHPSXq/7dNtP1bSf9fJP1dgQQQYiuwvJQ1L2ifph5JuTp+ruVvJcPVRJSf+19e7thURQ0oHL0j6paRv6cQWwGxfVnKCrn29J32+z/aYkvC4SUlQPTsirrO9TtKFki6LiLtnfO2R9BXN0wpT0kJaJelrEXHXHK85RcmJflTSvUquOf1JenzXKxmSv0/SHkmDM479qKQ3KwmNXyjpzvzSPLXUnK3kOtnFPvE+vfWSLpN0mpLW1PfS45vpw5Jeno4i/Ns6n/0mJa3G2yTdqORa4ScbqAk4gVnQEmXkZLaIKyPi7LxrAZAPWmAAgFIiwAAApUQXIgCglGiBAQBKiQADAJTSioVf0jpr1qyJarWadxkAgILYs2fP4YhYW29boQKsWq1qeHg47zIAAAVh++Bc2+hCBACUEgEGACglAgwAUEoEGACglAgwAEApEWAAgFIq1DD6ZhmbnNLg3lGNHBlXdXWvBjb2aWV3Wx4qAHSstjur3zRyr7btHFKENHFsWj1dFV2ye792bd+sTdVVeZcHAGiStulCHJuc0q7vjOhVl39X45PTmjg2LSkJsfHJaW3bOaTxyamcqwQANEtbtMBqra4Hph7U1IP1XxMhDe4b1dZN61tbHAAgE6UPsLHJqbR1NT3v6yaOTWvk8ESLqgIAZK30XYiDe0fVyJJmPV0VVdf0ZF8QAKAlSh9gI0fGj1/vmo8tDWzoa0FFAIBWKH2AVVf3qqerMuf2rorV213Rru2b1ctQegBoG6UPsIGNfbLrbzu1Yr1z4D9qaMcWhtADQJspfYCt7F6Rtq4qx1tiPV0V9XZX9On/+jT9wdMfR8sLANpQW5zZN1VXaWjHFg3uG9XI4QlV1/RoYEMfwQUAbaxtzvC93Su4xwsAOkjpuxABAJ2JAAMAlBIBBgAoJQIMAFBKmQWY7XNs3zLj637bF2e1PwBAZ8lsFGJE/IukcyXJdkXSnZK+mNX+AACdpVVdiBdK+klEHGzR/gAAba5V94G9UtJnWrSvBY1NTmlw76hGjoyrurpXAxv7tJKbngGgVByNrEWynB3YXZJGJf1GRPy8zvaLJF0kSevXr3/KwYPZNtJqi19GJGuE9XRVZEu7tm9mvkQAKBjbeyKiv962VnQhvlDSzfXCS5Ii4vKI6I+I/rVr12ZayMzFL2tLsEwcm9b45HT6/FSm+wcANE8rAuxVKkj34XyLX0ZIg/tGW1sQAGDJMg0w272SnivpC1nup1HzLX45cWxaI4cnWlwRAGCpMh25EBHjklZnuY/FqC1+WS/Eeroqqq7pyaEqAMBSdNRMHPMtfmlLAxv6WlsQAGDJOirA5lv8MnmeofQAUBYdd8Zm8UsAaA8dedZm8UsAKL+O6kIEALQPAgwAUEoEGACglAgwAEApEWAAgFIiwAAApUSAAQBKqe3uA2OxSgDoDG11Zq+3WOUlu/ezWCUAtKG26UJksUoA6CxtE2AsVgkAnaVtAozFKgGgs7RNgNUWq6yHxSoBoP20TYAtdrHKsckpXT10uz547QFdPXS7xrhGBgCl0jajEGuLVc4ehWjrpMUqGa0IAOXnmGvkQw76+/tjeHh4WZ8xPjk172KVY5NTeuoHvqrxyZOvl/V2VzS0YwuLWwJAQdjeExH99ba13Zl6ocUqGxmtyGKXAFB8bXMNrFGMVgSA9tBxAcZoRQBoDx0XYIsdrQgAKKaOC7DaaMXe7srxllhPV0W93ZWTRisCAIqrI8/Wm6qrNLRjy7yjFQEAxdaxZ+yFRisCAIqt47oQAQDtgQADAJQSAQYAKCUCDABQSgQYAKCUCDAAQCllGmC2z7R9je1bbR+w/fQs9wcA6BxZ3wf2YUlfiYiX2+6SxESDAICmyCzAbD9c0rMkbZOkiDgm6VhW+wMAdJYsuxAfJ+mQpJ22f2D747Z7M9wfAKCDZBlgKyQ9WdLHIuI8SeOS/nz2i2xfZHvY9vChQ4cyLAcA0E6yDLA7JN0REd9Pv79GSaCdICIuj4j+iOhfu3ZthuUAANpJZgEWEXdL+pntc9KnLpS0P6v9AQA6S9ajEN8k6ap0BOJtkrZnvD8AQIfINMAi4hZJ/Vnuo5nGJqc0uHdUI0fGVV3dq4GNfVrJGmEAUEicnVM3jdyrbTuHFCFNHJtWT1dFl+zer13bN2tTdVXe5QEAZmEqKSUtr207hzQ+Oa2JY9OSkhAbn5xOn5/KuUIAwGwEmKTBvaOKqL8tQhrcN9raggAACyLAJI0cGT/e8ppt4ti0Rg5PtLgiAMBCCDBJ1dW96umq1N3W01VRdQ1TOAJA0RBgkgY29smuv82WBjb0tbYgAMCCCDBJK7tXaNf2zertrhxvifV0VdTbXUmfZ7AmABQNZ+bUpuoqDe3YosF9oxo5PKHqmh4NbOgjvACgoDg7z9DbvUJbN63PuwwAQAPoQgQAlBIBBgAoJQIMAFBKBBgAoJQIMABAKRFgAIBSIsAAAKVEgAEASokAAwCUEjNxzGNsckqDe0c1cmRc1dW9GtjYp5VMLQUAhcDZeA43jdyrbTuHFJGsCdbTVdElu/dr1/bN2lRdlXd5ANDx6EKsY2xyStt2Dml8cvr4QpcTx6Y1PjmdPj+Vc4UAAAKsjsG9o4qovy1CGtw32tqCAAAnIcDqGDkyfrzlNdvEsWmNHJ5ocUUAgNkIsDqqq3uPL2w5W09XRdU1PS2uCAAwGwFWx8DGPtn1t9nSwIa+1hYEADgJAVbHyu4V2rV9s3q7K8dbYj1dFfV2V9LnGbwJAHnjTDyHTdVVGtqxRYP7RjVyeELVNT0a2NBHeAFAQXA2nkdv9wpt3bQ+7zIAAHXQhQgAKCUCDABQSh3fhch8hwBQTh19pma+QwAor47tQmS+QwAot44NMOY7BIByy7QL0faIpKOSpiVNRUR/lvtbDOY7BIBya8U1sOdExOEW7GdRavMd1gsx5jsEgOLr2C5E5jsEgHLLOsBC0nW299i+KON9LQrzHQJAuTnmGsnQjA+310XEnbYfKel6SW+KiG/Pes1Fki6SpPXr1z/l4MGDmdVTz/jkFPMdAkBB2d4z1/iJTANsVhHvkTQWEZfO9Zr+/v4YHh5uST0AgOKbL8Ay60K03Wv79NpjSc+T9KOs9gcA6CxZ9pWdJemLTkZKrJD06Yj4Sob7AwB0kMwCLCJuk7Qxq88HAHS2jh1GDwAoNwIMAFBKBBgAoJQIMABAKTUUYLa/1shzAAC0yryjEG0/TFKPpDW2HyGpNnvgGZLWZVwbAABzWmgY/R9LulhSn6Q9eijA7pf00QzrKpSxySkN7h3VyJFxVVf3amBjn1Yy3RQA5KqhqaRsvykiPpJ1MUWcSuqmkXu1beeQIpJ1wnq6KrKlXds3a1N1Vd7lAUBbm28qqYaaERHxEdvPkFSd+Z6I+FRTKiyosckpbds5pPHJh9YMq60ftm3nkIZ2bGHiXwDISaODOP5J0qWSfkvSpvSrMKsrZ2Vw76jmaqBGSIP7RltbEADguEabD/2Snhitmrq+IEaOjNddsVlKWmIjhydaXBEAoKbR+8B+JOlRWRZSRNXVvccXu5ytp6ui6pqeFlcEAKhZaBj9PytZVfl0SfttD0marG2PiJdkW16+Bjb26ZLd++tus6WBDX0trggAULNQF+Kci092gpXdK7Rr++Y5RyEygAMA8jPvGTgivtWqQopqU3WVhnZs0eC+UY0cnlB1TY8GNvQRXgCQs4bOwraPKulKnOmXkoYlvTVd+6tt9Xav0NZN6/MuAwAwQ6PNiMsk3SHp00pm43ilpMdLulnSJyU9O4viAACYS6OjEF8SEf8QEUcj4v6IuFzS8yPis5IekWF9AADU1WiATdh+he1T0q9XSPr3dFtH3RsGACiGRgPs1ZJeK+keST9PH7/G9mmS3phRbQAAzKnRuRBvk/TiOTbf2LxyAABozEI3Mr8tIv7a9kdUp6swIt6cWWUAAMxjoRbYgfTPYq1xAgDoeAvdyPzP6Z9XSJLtnohgBlsAQO4aXU7l6bb3S7o1/X6j7b/LtDIAAObR6CjEyyQ9X9IRSYqIvZKelVVRAAAspNEAU0T8bNZT9RfKAgCgBRqdSupntp8hKWyfKuktemiABwAALddoC+z1kt4gaZ2kOyWdm34PAEAuGr2R+bCS2TgAACiEhW5krnsDcw03MgMA8rJQC2zmDczvlfTuDGsBAKBhC93IfEXtse2LZ34PAOhsY5NTGtw7qpEj46qu7tXAxj6tbOFq9YvZ05KWTbFdUdKSuzMiBpbyGQCAfM0Oq74zT9OfXLVHEdLEsWn1dFV0ye792rV9szZVV7WkplZEZW3I/Rkt2BcAoMluGrlX23YOHQ+r0049Rb964METXjNxLLk1eNvOIQ3t2KLeFrTE5h1Gb/uo7ftt3y9pQ+1x7fmFPtz22ZJ+W9LHm1QvAKCFxiantG3nkMYnp4+H1OzwmilCGtw32pLaFroGdvoyP/8ySW+TtNzPAQDkYHDvqGIRF5Amjk1r5HBr5nxveCqpxbI9IOmeiNizwOsusj1se/jQoUNZlQMAWIKRI+PHW16N6OmqqLqmJ8OKHpJZgEl6pqSX2B6RdLWkC2xfOftFEXF5RPRHRP/atWszLAcAsFjV1b3q6ao0/HpbGtjQl2FFD8kswCLi7RFxdkRUJb1S0tcj4jVZ7Q8A0HwDG/tkz739tFOTcOvpqqi3u6Jd2ze3ZACH1JpRiACAklrZvUK7tm8+YRRiT1dFtvSx1zxFd933K40cnlB1TY8GNvS1LLwkybGYq3MZ6+/vj+Hh4YVfCABoqfHJKQ3uG215WNneExH99bbRAgMALKi3e4W2blqfdxknyHIQBwAAmSHAAAClRBciAKBpWjnBLwEGAGiK2XMmZj3BL12IAIBlqzdn4sSxaY1PTqfPTzV9nwQYAGDZ5pszMasJfgkwAMCyzTdnYlYT/BJgAIBlm2/OxKwm+CXAAADLNt+ciVlN8EuAAQCWrTZnYm935XhLLOsJfhlGDwBoik3VVRrasaVlcyYSYACApmnlnIl0IQIASokAAwCUEl2IANCmWjkvYR7a50gAAMe1el7CPNCFCABtJo95CfNAgAFAm8ljXsI8EGAA0GbymJcwDwQYALSZPOYlzAMBBgBtJo95CfNAgAFAm8ljXsI8tMdRAABO0Op5CfPQPkcCADhBK+clzANdiACAUiLAAAClRIABAEqJAAMAlBIBBgAoJQIMAFBKDKNvoXZfmwcAWomzZ4t0wto8ANBKdCG2QKeszQMArZRZgNl+mO0h23tt/9j2e7PaV9F1yto8ANBKWXYhTkq6ICLGbJ8q6Ubb10bE9zLcZyF1yto8ANBKmbXAIjGWfntq+jVHO6S9dcraPADQSpleA7NdsX2LpHskXR8R389yf0XVKWvzAEArZRpgETEdEedKOlvSZttPmv0a2xfZHrY9fOjQoSzLyU2nrM0DAK3kmGt0QbN3ZL9L0kREXDrXa/r7+2N4eLgl9eRhfHLqhLV5nnPOI/X1W+/hvjAAx3G/6Ils74mI/rrbsgow22slPRAR99k+TdJ1kv4qIgbnek+7B9hM9e4Ls8V9YUCbWUwgcV44WV4BtkHSFZIqSroqPxcR75vvPZ0SYGOTU3rqB76q8cmTRyb2dlc0tGML3YpAG1hMIHFeqG++AMtyFOK+iDgvIjZExJMWCq9Own1hQPtb7AQGnBcWj5k4csB9YUD7W2wgcV5YPAIsB9wXBrS/xQYS54XFI8BywH1hQPtbbCBxXlg8AiwH3BcGtL/FBhLnhcVr2X1gjeiUUYg1s+8LG9jQxz9SoI0sZVg854UT5TKMfik6LcAAtD8CaXnmCzD+FguKu/GB9tDbvUJbN63Pu4y2xBmxgFi9GQAWxiCOgmH1ZgBoDAFWMNyNDwCNoQuxYLgbHygPrlXni7/pgqnd/FgvxLgbHygOrlXnjy7EguFufKD4uFZdDARYwXA3PlB8XKsuBs6GBbSpukpDO7Zw8yNQUFyrLgbOiAXFzY9AcZX5WnU7DTwpZ9UAkKOBjX26ZPf+utuKfK263QaecA0MABapjNeq23HgSfH+lgGgBOa7Vl3EbrpGBp6U7bIFAQYAS1TvWnVRu+naceAJXYgA0CRF7qZb7ArRZUCAAUCTFPn+sHacJIEAA4AmKXI3XTMGnoxNTunqodv1wWsP6Oqh2zWW88AProEBQJMU/f6w5UySUMRre7TAAKBJytBNVxt48j9e+ARt3bS+4ZZXEa/tEWAA0CRlvD+sEUW9tlfOv00AKKh2nMu0qNf2yvs3CgAF1W5zmRb12h5diACAeRX12h4BBgCYV1Gv7dGFCABtIOv5F4t4bc8x19CSHPT398fw8HDeZQBAqdS7R8tW7vMvNoPtPRHRX28bXYgAUGJFvUerFTILMNuPsf0N2/tt/9j2W7LaFwB0qqLeo9UKWXZeTkl6a0TcbPt0SXtsXx8R9ZcxBQAsWlHv0WqFzAIsIu6SdFf6+KjtA5LWSSLAAKBJmn2PVhEX45xLS6qyXZV0nqTvt2J/ANApBjb26ZLd9dsFi71Hq4gT9s4n80EctldK+rykiyPi/jrbL7I9bHv40KFDWZcDAG2lWfdolXEwSKYtMNunKgmvqyLiC/VeExGXS7pcSobRZ1kPALSjZtyj1chgkKJNj5VZgNm2pE9IOhARf5PVfgAAy59/sYyDQbLsQnympNdKusD2LenXizLcHwBgiWqDQeopwmKc9WQWYBFxY0Q4IjZExLnp15ez2h8AYOmKOmHvfJiJAwBQ2Al751O8igAAuSjihL3zKWZVAIBclGkxTroQAQClRAsMADpUmaaNqqc8lQIAmqZs00bVQxciAHSYMk4bVQ8BBgAdpl3WECPAAKDDlHHaqHoIMADoMGWcNqoeAgwA2sTY5JSuHrpdH7z2gK4eul1jc1zLKuO0UfUwChEA2sBiRhXWpo2a/XpbhZ02qh7HXFfyctDf3x/Dw8N5lwEApTI2OaWnfuCrGp88+bpWb3dFQzu21A2l8cmpwk8bZXtPRPTX21asSgEAi7bUxSjLNG1UPVwDA4CSa5dRhYtFgAFAybXLqMLFIsAAoOTaZVThYhFgAFByZVyMshna86gAoMOUbTHKZmjfIwOADlP2UYWLRRciAKCUCDAAQCkRYACAUiLAAAClRIABAEqJAAMAlBIBBgAopUItp2L7kKSDedexTGskHc67iCbgOIqjHY5B4jiKpEzH8NiIWFtvQ6ECrB3YHp5r7Zoy4TiKox2OQeI4iqQdjkGiCxEAUFIEGACglAiw5rs87wKahOMojnY4BonjKJJ2OAaugQEAyokWGACglAiwJrH9GNvfsL3f9o9tvyXvmpbKdsX2D2wP5l3LUtk+0/Y1tm+1fcD20/OuaSls/7f039OPbH/G9sPyrqkRtj9p+x7bP5rx3Crb19v+t/TPR+RZ40LmOIb/mf6b2mf7i7bPzLPGRtQ7jhnb3mo7bK/Jo7blIsCaZ0rSWyPiiZKeJukNtp+Yc01L9RZJB/IuYpk+LOkrEfEESRtVwuOxvU7SmyX1R8STJFUkvTLfqhq2S9ILZj3355K+FhH/QdLX0u+LbJdOPobrJT0pIjZI+ldJb291UUuwSycfh2w/RtLzJN3e6oKahQBrkoi4KyJuTh8fVXLCXJdvVYtn+2xJvy3p43nXslS2Hy7pWZI+IUkRcSwi7su3qiVbIek02ysk9UgazbmehkTEtyXdO+vpl0q6In18haTfaWlRi1TvGCLiuoiYSr/9nqSzW17YIs3xs5CkD0l6m6TSDoQgwDJguyrpPEnfz7eSJblMyT/qB/MuZBkeJ+mQpJ1pV+jHbffmXdRiRcSdki5V8j/kuyT9MiKuy7eqZTkrIu5KH98t6aw8i2mC/yLp2ryLWArbL5V0Z0TszbuW5SDAmsz2Skmfl3RxRNyfdz2LYXtA0j0RsSfvWpZphaQnS/pYRJwnaVzF7646SXqN6KVKArlPUq/t1+RbVXNEMvy5tP/zt/0OJZcNrsq7lsWy3SNph6R35V3LchFgTWT7VCXhdVVEfCHvepbgmZJeYntE0tWSLrB9Zb4lLckdku6IiFoL+BolgVY2WyT9NCIORcQDkr4g6Rk517QcP7f9aElK/7wn53qWxPY2SQOSXh3lvA/p8Ur+U7Q3/V0/W9LNth+Va1VLQIA1iW0rueZyICL+Ju96liIi3h4RZ0dEVclgga9HROn+xx8Rd0v6me1z0qculLQ/x5KW6nZJT7Pdk/77ulAlHIwyw5ckvS59/DpJ/zfHWpbE9guUdLG/JCIm8q5nKSLihxHxyIiopr/rd0h6cvp7UyoEWPM8U9JrlbRabkm/XpR3UR3sTZKusr1P0rmSPpBzPYuWtiCvkXSzpB8q+X0txQwKtj8j6buSzrF9h+0/lPRBSc+1/W9KWpcfzLPGhcxxDB+VdLqk69Pf8b/PtcgGzHEcbYGZOAAApUQLDABQSgQYAKCUCDAAQCkRYACAUiLAAAClRIABObL95Xozmtt+j+0/TR9/03Z/ndecy60a6GQEGJCjiHjRMiYaPlcSAYaORYABGbL9Z7bfnD7+kO2vp48vsH2V7ZHaWky232H7X23fKOmcWR/1e7aH0u3n2+6S9D5JW9Mbare28riAIiDAgGzdIOn89HG/pJXpnJnnS/p27UW2n6Jk+q5aq2rTrM9ZERGbJV0s6d0RcUzJZKyfjYhzI+Kz2R4GUDwEGJCtPZKeYvsMSZNKpvTpVxJgN8x43fmSvhgRE+kqBl+a9Tm1yaH3SKpmWjFQEivyLgBoZxHxgO2fStom6TuS9kl6jqRf1+Im5p1M/5wWv7eAJFpgQCvcIOlPlXQZ3iDp9ZJ+MGspjm9L+h3bp9k+XdKLG/jco0omlgU6EgEGZO8GSY+W9N2I+Lmkf9eJ3YeKiJslfVbSXiWr/N7UwOd+Q9ITGcSBTsVs9ACAUqIFBgAoJQIMAFBKBBgAoJQIMABAKRFgAIBSIsAAAKVEgAEASokAAwCU0v8HJUyxlDEPudgAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 504x360 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
},
{
"output_type": "stream",
"text": [
"done in 0.806s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ZR1yz5SgPUbr"
},
"source": [
"## Kmeans Clustering LDA"
]
},
{
"cell_type": "code",
"metadata": {
"id": "T2j2ZT8dPLbs"
},
"source": [
"import numpy as np\n",
"XLDA = np.array(model_lda.components_)\n",
"yLDA = np.array(model_lda.fit_transform(model_lda.components_))"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "HvD9RFGOPbJE",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "cc37c70b-ec04-4b02-a636-d3065cfcbbb3"
},
"source": [
"print(\"Loading Data X & Y...\")\n",
"t0 = time()\n",
"\n",
"print(\"X\", XLDA, \"\\n\")\n",
"print(\"Y\", yLDA, \"\\n\")\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Data X & Y...\n",
"X [[1.13924605 0.8834537 1.24858232 0.68218711 0.74324957 0.92721692\n",
" 0.80725571 1.01261206 0.89262673 0.83145245 0.95676322 1.05289823\n",
" 0.69093688 0.85540123 1.10791895 0.65036905 0.95625662 1.09999268\n",
" 0.68827518 0.61648041 0.91613008 1.07665834 0.77859659 0.89589489\n",
" 0.8634334 ]\n",
" [0.71942444 0.63997389 0.73540432 0.68775008 0.77909644 0.77846923\n",
" 0.78218557 0.70571175 0.72475793 0.69337689 0.63270695 0.65847644\n",
" 0.78475209 0.80481943 0.78343423 0.78024404 0.70221961 0.72836256\n",
" 0.75478443 0.66953312 0.81795132 0.78496336 0.64501827 0.86455628\n",
" 0.7377681 ]\n",
" [0.72188201 0.84294267 0.77683419 0.95038401 0.75993562 0.7802179\n",
" 0.66251075 0.823229 0.72116528 0.69789107 0.89461667 0.92457791\n",
" 0.73145992 0.97224079 0.92285554 0.81959204 0.77832303 0.67393852\n",
" 0.77252999 0.8511185 0.719634 0.89311744 0.69634768 0.7904796\n",
" 0.70562698]\n",
" [0.80082112 1.76384661 0.72823224 0.76284712 0.79769123 0.68656457\n",
" 0.83468914 0.74576771 1.94665113 0.80749889 0.7036918 0.70478701\n",
" 0.91673652 0.74132829 0.94034999 0.66550394 1.16302158 0.68062933\n",
" 0.67621618 1.05192408 0.93577483 0.67832871 0.84483248 0.69404551\n",
" 1.53855969]\n",
" [0.72535567 0.65560033 0.94117306 0.64073553 0.67360546 0.75448764\n",
" 1.17542527 0.77022075 1.15575047 0.81220636 1.33320481 0.90742293\n",
" 0.72428474 0.73349717 0.8333343 1.00306665 0.74894603 0.69853774\n",
" 0.8332386 0.74361725 0.85759665 0.77392573 0.78816635 0.79244518\n",
" 1.23985637]\n",
" [0.74764282 0.71764245 0.80932737 0.78948172 0.79773925 0.65235912\n",
" 0.71555191 0.71207991 0.81888121 0.67013132 0.80878859 0.78203076\n",
" 0.80086094 0.74111737 0.729344 0.83743921 0.8317783 0.78911041\n",
" 0.73506724 0.80197202 0.83564276 0.64197372 0.82392196 0.70835664\n",
" 0.79124558]\n",
" [0.6192943 0.76447613 0.74562272 0.81693046 0.96685607 0.77933322\n",
" 0.78707106 0.74232162 0.63330059 0.93563986 0.70171466 0.88411573\n",
" 0.6775884 0.86686346 0.72558653 0.7164088 0.97268528 0.6765899\n",
" 0.81242026 0.79376679 0.72214931 0.77784214 0.76303316 0.70253696\n",
" 0.71185104]\n",
" [0.65745455 0.80215024 0.7392082 0.95836202 1.07041545 0.72317494\n",
" 0.78623641 0.74624506 0.7962408 0.84845508 0.68805863 0.67005629\n",
" 0.72831332 0.70083931 0.82121772 0.84230552 0.66828209 0.63080163\n",
" 1.21824859 1.03140545 0.71835222 0.70200315 0.66626471 0.71922809\n",
" 0.95468925]\n",
" [0.72459914 0.79974719 0.71309036 0.6514595 0.7044665 0.78131889\n",
" 0.71651015 0.79658169 0.84474558 0.79919044 0.74956753 0.82235703\n",
" 0.73147939 0.69559448 0.88491606 0.73277901 0.79832352 0.6427745\n",
" 0.63452474 0.7791277 0.75350713 0.63719132 0.89661358 0.81957116\n",
" 0.7558214 ]\n",
" [0.63979681 0.81466649 1.04548043 0.70819415 0.67283206 0.66136784\n",
" 0.82582351 0.81133018 0.78281342 0.65088382 0.8291177 0.97817116\n",
" 0.80143616 0.8665057 0.97777469 0.87853807 0.79371411 0.74131466\n",
" 0.75753523 0.71023808 0.84652979 0.81549846 0.92952519 0.85711209\n",
" 0.77484846]] \n",
"\n",
"Y [[0.00427929 0.004279 0.96148845 0.00427905 0.00427903 0.00427904\n",
" 0.00427905 0.00427899 0.00427901 0.00427907]\n",
" [0.00515713 0.00515666 0.95358939 0.0051567 0.00515667 0.0051567\n",
" 0.00515671 0.00515665 0.00515665 0.00515673]\n",
" [0.00478998 0.00478929 0.95689543 0.00478934 0.00478932 0.00478934\n",
" 0.00478935 0.00478929 0.00478929 0.00478937]\n",
" [0.00420095 0.00420055 0.96219431 0.00420063 0.00420058 0.00420061\n",
" 0.0042006 0.00420057 0.00420058 0.00420062]\n",
" [0.00448221 0.00448191 0.95966228 0.00448195 0.00448195 0.00448195\n",
" 0.00448195 0.00448191 0.00448191 0.00448198]\n",
" [0.00497918 0.00497857 0.95519195 0.00497863 0.0049786 0.00497863\n",
" 0.00497863 0.00497857 0.00497858 0.00497865]\n",
" [0.00492849 0.00492792 0.9556479 0.00492797 0.00492794 0.00492796\n",
" 0.004928 0.00492792 0.00492792 0.00492799]\n",
" [0.0047886 0.00478825 0.95690517 0.0047883 0.00478827 0.00478829\n",
" 0.00478831 0.00478826 0.00478825 0.00478831]\n",
" [0.00503524 0.00503461 0.95468754 0.00503468 0.00503464 0.00503467\n",
" 0.00503468 0.00503462 0.00503463 0.0050347 ]\n",
" [0.00472453 0.00472423 0.95748134 0.00472428 0.00472426 0.00472428\n",
" 0.00472429 0.00472423 0.00472424 0.00472432]] \n",
"\n",
"done in 0.013s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "1EvGNZoYPfJH",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 105
},
"outputId": "fc28cce8-c774-45be-b48a-bf5230f89644"
},
"source": [
"from sklearn.cluster import KMeans\n",
"\n",
"print(\"Loading Kmeans...\")\n",
"t0 = time()\n",
"kmeans = KMeans(n_clusters=2) # You want cluster the passenger records into 2: Survived or Not survived\n",
"print(kmeans.fit(XLDA))\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Kmeans...\n",
"KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n",
" n_clusters=2, n_init=10, n_jobs=None, precompute_distances='auto',\n",
" random_state=None, tol=0.0001, verbose=0)\n",
"done in 0.023s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "JT9RHbW5Pi1t",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 70
},
"outputId": "aeda378d-241d-448b-c465-bf7de65ac9b5"
},
"source": [
"print(\"Loading Data Correction...\")\n",
"t0 = time()\n",
"\n",
"correct = 0\n",
"for i in range(len(XLDA)):\n",
" predict_me = np.array(XLDA[i].astype(float))\n",
" predict_me = predict_me.reshape(-1, len(predict_me))\n",
" prediction = kmeans.predict(predict_me)\n",
" if prediction[0] == yLDA[i].all():\n",
" correct += 1\n",
"\n",
"print(correct/len(XLDA))\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Data Correction...\n",
"0.1\n",
"done in 0.005s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "bPxekG1bPo-w",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 941
},
"outputId": "7d01542f-2ad7-4043-fbd1-958c3cfc42fe"
},
"source": [
"from sklearn.preprocessing import MinMaxScaler\n",
"\n",
"\n",
"print(\"Loading X Scaled...\")\n",
"t0 = time()\n",
"\n",
"scaler = MinMaxScaler()\n",
"XLDA_scaled = scaler.fit_transform(XLDA)\n",
"\n",
"print(XLDA_scaled)\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading X Scaled...\n",
"[[1. 0.21664357 1. 0.13050418 0.17711382 1.\n",
" 0.28220093 1. 0.19745387 0.63411694 0.46260851 1.\n",
" 0.05581676 0.57765728 1. 0. 0.58207306 1.\n",
" 0.09208196 0. 0.90964718 1. 0.46950814 1.\n",
" 0.18945878]\n",
" [0.19257583 0. 0.04167002 0.14801837 0.2672757 0.45881948\n",
" 0.23332313 0. 0.06963666 0.14922623 0. 0.\n",
" 0.44810594 0.39481803 0.15130212 0.36823327 0.06859676 0.20793435\n",
" 0.20602154 0.12183599 0.4580899 0.33625286 0. 0.84474261\n",
" 0.0385879 ]\n",
" [0.19730237 0.18059766 0.11903788 0.97488242 0.2190825 0.46518156\n",
" 0. 0.38291668 0.06690118 0.16507901 0.37389083 0.6746622\n",
" 0.22526422 1. 0.51596201 0.47979627 0.22242199 0.09193887\n",
" 0.23642215 0.53884833 0.00589533 0.5823557 0.18041531 0.47775274\n",
" 0. ]\n",
" [0.34912243 1. 0.02827657 0.38445027 0.31404524 0.1244478\n",
" 0.33568632 0.1305178 1. 0.54999735 0.10133485 0.11741381\n",
" 1. 0.16531511 0.5617192 0.04291181 1. 0.10619919\n",
" 0.07142322 1. 1. 0.09360747 0.70231758 0.\n",
" 1. ]\n",
" [0.20398311 0.01390409 0.42593114 0. 0.00194526 0.37156856\n",
" 1. 0.21019529 0.39779927 0.56652895 1. 0.63116819\n",
" 0.19526114 0.13700777 0.28181698 1. 0.16304326 0.14436787\n",
" 0.34042444 0.29197082 0.64043219 0.31113691 0.50314444 0.4874906\n",
" 0.6413836 ]\n",
" [0.24684698 0.06910796 0.17971699 0.46830539 0.31416601 0.\n",
" 0.1034113 0.02074991 0.14130319 0.06759295 0.25136642 0.31325429\n",
" 0.51546524 0.16455268 0.00982777 0.53039816 0.3304693 0.33740794\n",
" 0.17224325 0.42598303 0.53945883 0.01088228 0.62882016 0.07090004\n",
" 0.10279173]\n",
" [0. 0.11077966 0.06075228 0.55472366 0.7395279 0.46196286\n",
" 0.24284808 0.11928911 0. 1. 0.09851238 0.57207611\n",
" 0. 0.61909008 0. 0.18724185 0.61527975 0.09758983\n",
" 0.30475971 0.40713964 0.01746411 0.32004863 0.41480499 0.04206827\n",
" 0.00747247]\n",
" [0.0733919 0.14430134 0.04877355 1. 1. 0.2576453\n",
" 0.2412208 0.1320732 0.12406452 0.6938264 0.07901764 0.02935904\n",
" 0.21210668 0.01895863 0.25012577 0.54419556 0. 0.\n",
" 1. 0.95287881 0. 0.14747826 0.07467809 0.12475926\n",
" 0.29901848]\n",
" [0.2025281 0.14216316 0. 0.03376285 0.07956682 0.46918724\n",
" 0.10527952 0.29608944 0.16099662 0.52081991 0.16682504 0.41549577\n",
" 0.22534562 0. 0.41673037 0.23365613 0.2628483 0.02551812\n",
" 0. 0.37352085 0.16168932 0. 0.88432051 0.62187781\n",
" 0.06026228]\n",
" [0.03943156 0.15543806 0.62071907 0.21238348 0. 0.03277592\n",
" 0.31840151 0.34414571 0.11384077 0. 0.28038737 0.81054021\n",
" 0.51787052 0.61779685 0.65960444 0.64692535 0.25353147 0.23553952\n",
" 0.21073405 0.21531527 0.58953195 0.40573497 1. 0.80786268\n",
" 0.08310572]]\n",
"done in 0.005s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "bR6WehbtPuM0",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "31a803d5-5916-498c-dfaf-2004fa12ad12"
},
"source": [
"print(\"Loading Correction...\")\n",
"t0 = time()\n",
"\n",
"print(\"X\", XLDA, \"\\n\")\n",
"print(\"X scaled\", XLDA_scaled, \"\\n\")\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Correction...\n",
"X [[1.13924605 0.8834537 1.24858232 0.68218711 0.74324957 0.92721692\n",
" 0.80725571 1.01261206 0.89262673 0.83145245 0.95676322 1.05289823\n",
" 0.69093688 0.85540123 1.10791895 0.65036905 0.95625662 1.09999268\n",
" 0.68827518 0.61648041 0.91613008 1.07665834 0.77859659 0.89589489\n",
" 0.8634334 ]\n",
" [0.71942444 0.63997389 0.73540432 0.68775008 0.77909644 0.77846923\n",
" 0.78218557 0.70571175 0.72475793 0.69337689 0.63270695 0.65847644\n",
" 0.78475209 0.80481943 0.78343423 0.78024404 0.70221961 0.72836256\n",
" 0.75478443 0.66953312 0.81795132 0.78496336 0.64501827 0.86455628\n",
" 0.7377681 ]\n",
" [0.72188201 0.84294267 0.77683419 0.95038401 0.75993562 0.7802179\n",
" 0.66251075 0.823229 0.72116528 0.69789107 0.89461667 0.92457791\n",
" 0.73145992 0.97224079 0.92285554 0.81959204 0.77832303 0.67393852\n",
" 0.77252999 0.8511185 0.719634 0.89311744 0.69634768 0.7904796\n",
" 0.70562698]\n",
" [0.80082112 1.76384661 0.72823224 0.76284712 0.79769123 0.68656457\n",
" 0.83468914 0.74576771 1.94665113 0.80749889 0.7036918 0.70478701\n",
" 0.91673652 0.74132829 0.94034999 0.66550394 1.16302158 0.68062933\n",
" 0.67621618 1.05192408 0.93577483 0.67832871 0.84483248 0.69404551\n",
" 1.53855969]\n",
" [0.72535567 0.65560033 0.94117306 0.64073553 0.67360546 0.75448764\n",
" 1.17542527 0.77022075 1.15575047 0.81220636 1.33320481 0.90742293\n",
" 0.72428474 0.73349717 0.8333343 1.00306665 0.74894603 0.69853774\n",
" 0.8332386 0.74361725 0.85759665 0.77392573 0.78816635 0.79244518\n",
" 1.23985637]\n",
" [0.74764282 0.71764245 0.80932737 0.78948172 0.79773925 0.65235912\n",
" 0.71555191 0.71207991 0.81888121 0.67013132 0.80878859 0.78203076\n",
" 0.80086094 0.74111737 0.729344 0.83743921 0.8317783 0.78911041\n",
" 0.73506724 0.80197202 0.83564276 0.64197372 0.82392196 0.70835664\n",
" 0.79124558]\n",
" [0.6192943 0.76447613 0.74562272 0.81693046 0.96685607 0.77933322\n",
" 0.78707106 0.74232162 0.63330059 0.93563986 0.70171466 0.88411573\n",
" 0.6775884 0.86686346 0.72558653 0.7164088 0.97268528 0.6765899\n",
" 0.81242026 0.79376679 0.72214931 0.77784214 0.76303316 0.70253696\n",
" 0.71185104]\n",
" [0.65745455 0.80215024 0.7392082 0.95836202 1.07041545 0.72317494\n",
" 0.78623641 0.74624506 0.7962408 0.84845508 0.68805863 0.67005629\n",
" 0.72831332 0.70083931 0.82121772 0.84230552 0.66828209 0.63080163\n",
" 1.21824859 1.03140545 0.71835222 0.70200315 0.66626471 0.71922809\n",
" 0.95468925]\n",
" [0.72459914 0.79974719 0.71309036 0.6514595 0.7044665 0.78131889\n",
" 0.71651015 0.79658169 0.84474558 0.79919044 0.74956753 0.82235703\n",
" 0.73147939 0.69559448 0.88491606 0.73277901 0.79832352 0.6427745\n",
" 0.63452474 0.7791277 0.75350713 0.63719132 0.89661358 0.81957116\n",
" 0.7558214 ]\n",
" [0.63979681 0.81466649 1.04548043 0.70819415 0.67283206 0.66136784\n",
" 0.82582351 0.81133018 0.78281342 0.65088382 0.8291177 0.97817116\n",
" 0.80143616 0.8665057 0.97777469 0.87853807 0.79371411 0.74131466\n",
" 0.75753523 0.71023808 0.84652979 0.81549846 0.92952519 0.85711209\n",
" 0.77484846]] \n",
"\n",
"X scaled [[1. 0.21664357 1. 0.13050418 0.17711382 1.\n",
" 0.28220093 1. 0.19745387 0.63411694 0.46260851 1.\n",
" 0.05581676 0.57765728 1. 0. 0.58207306 1.\n",
" 0.09208196 0. 0.90964718 1. 0.46950814 1.\n",
" 0.18945878]\n",
" [0.19257583 0. 0.04167002 0.14801837 0.2672757 0.45881948\n",
" 0.23332313 0. 0.06963666 0.14922623 0. 0.\n",
" 0.44810594 0.39481803 0.15130212 0.36823327 0.06859676 0.20793435\n",
" 0.20602154 0.12183599 0.4580899 0.33625286 0. 0.84474261\n",
" 0.0385879 ]\n",
" [0.19730237 0.18059766 0.11903788 0.97488242 0.2190825 0.46518156\n",
" 0. 0.38291668 0.06690118 0.16507901 0.37389083 0.6746622\n",
" 0.22526422 1. 0.51596201 0.47979627 0.22242199 0.09193887\n",
" 0.23642215 0.53884833 0.00589533 0.5823557 0.18041531 0.47775274\n",
" 0. ]\n",
" [0.34912243 1. 0.02827657 0.38445027 0.31404524 0.1244478\n",
" 0.33568632 0.1305178 1. 0.54999735 0.10133485 0.11741381\n",
" 1. 0.16531511 0.5617192 0.04291181 1. 0.10619919\n",
" 0.07142322 1. 1. 0.09360747 0.70231758 0.\n",
" 1. ]\n",
" [0.20398311 0.01390409 0.42593114 0. 0.00194526 0.37156856\n",
" 1. 0.21019529 0.39779927 0.56652895 1. 0.63116819\n",
" 0.19526114 0.13700777 0.28181698 1. 0.16304326 0.14436787\n",
" 0.34042444 0.29197082 0.64043219 0.31113691 0.50314444 0.4874906\n",
" 0.6413836 ]\n",
" [0.24684698 0.06910796 0.17971699 0.46830539 0.31416601 0.\n",
" 0.1034113 0.02074991 0.14130319 0.06759295 0.25136642 0.31325429\n",
" 0.51546524 0.16455268 0.00982777 0.53039816 0.3304693 0.33740794\n",
" 0.17224325 0.42598303 0.53945883 0.01088228 0.62882016 0.07090004\n",
" 0.10279173]\n",
" [0. 0.11077966 0.06075228 0.55472366 0.7395279 0.46196286\n",
" 0.24284808 0.11928911 0. 1. 0.09851238 0.57207611\n",
" 0. 0.61909008 0. 0.18724185 0.61527975 0.09758983\n",
" 0.30475971 0.40713964 0.01746411 0.32004863 0.41480499 0.04206827\n",
" 0.00747247]\n",
" [0.0733919 0.14430134 0.04877355 1. 1. 0.2576453\n",
" 0.2412208 0.1320732 0.12406452 0.6938264 0.07901764 0.02935904\n",
" 0.21210668 0.01895863 0.25012577 0.54419556 0. 0.\n",
" 1. 0.95287881 0. 0.14747826 0.07467809 0.12475926\n",
" 0.29901848]\n",
" [0.2025281 0.14216316 0. 0.03376285 0.07956682 0.46918724\n",
" 0.10527952 0.29608944 0.16099662 0.52081991 0.16682504 0.41549577\n",
" 0.22534562 0. 0.41673037 0.23365613 0.2628483 0.02551812\n",
" 0. 0.37352085 0.16168932 0. 0.88432051 0.62187781\n",
" 0.06026228]\n",
" [0.03943156 0.15543806 0.62071907 0.21238348 0. 0.03277592\n",
" 0.31840151 0.34414571 0.11384077 0. 0.28038737 0.81054021\n",
" 0.51787052 0.61779685 0.65960444 0.64692535 0.25353147 0.23553952\n",
" 0.21073405 0.21531527 0.58953195 0.40573497 1. 0.80786268\n",
" 0.08310572]] \n",
"\n",
"done in 0.011s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "WZfcqRspP0yi",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 385
},
"outputId": "304a0b79-20a5-4950-d894-18fee8cee746"
},
"source": [
"import umap\n",
"\n",
"print(\"Loading Topic Visualisation...\")\n",
"t0 = time()\n",
"\n",
"kmeans_lda = XLDA_scaled\n",
"embedding = umap.UMAP(n_neighbors=3, min_dist=0.1, random_state=4).fit_transform(kmeans_lsa)\n",
"plt.figure(figsize=(7,5))\n",
"plt.scatter(embedding[:, 0], embedding[:, 1], c = None, s = 50 ,edgecolors='Blue')\n",
"plt.title(\"Kemans LDA Visualization\")\n",
"plt.xlabel('widht')\n",
"plt.ylabel('height')\n",
"plt.show\n",
"plt.show()\n",
"\n",
"print(\"done in %0.3fs.\" % (time() - t0))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Loading Topic Visualisation...\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAbYAAAFNCAYAAABsXEqqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAfaUlEQVR4nO3dfZRk9V3n8feX7hkZmA50pyfZpBEY0yM+xASXFhPOEKMEFzM4iZ5ok5gV3YmsrufEKC5n2eyah1VjXDWH1X2QzQMYEyxljY7bRuGwJtAhT02CCZHItOlkTPM0kxqggYF54Lt/3Dukp+nuKWa66lbder/O6dPdv1tV93urZ+6n7u/+7u9GZiJJUl2cVHUBkiStJYNNklQrBpskqVYMNklSrRhskqRaMdgkSbVisEk1FBH/MSLe2+Z1XBcRv17+fGFE/GMb1vFTEXHTWr+u6s1gU2Ui4qsR8apFv18WEfsi4geqrOt4RMQrI+LrKyy7LiIORMRC+XVXRLwrIk5b5rFvj4iMiO9fZV2Xle9dLGkfjIgHI+LSzPzNzHzTiW9ZazLztsw850ReIyLOLrd9cNHrfigzf/jEK1Q/MdjUFSLicuC/A9sy8+NV19MGv52ZQ8Am4GeBlwGfiIhTjzygDKqfBprl95X8BXA6sPQDwCVAAn+zhnVLPcdgU+Ui4t8Cvwv8q8y8vWw7LSLeFxH3RcR8RPx6RAyUy34mIj4REe+JiIci4isRcUHZ/s/lUcvli15/W0R8PiIeKZe/fdGyI0cJl0fE7ojYGxFvXbT8/IiYKZ/7QET83olsa2Y+kZmfBbYDz6UIuSMuBF4AvBm4LCLWr/QawJ/yzPD7aeDDmXmoPPL743IbTo6IP46Ib5Tv12cj4vnlsqVHzU8/r/z9zyLi/oh4OCJujYjvXq6mxUesETEZEY8u+noyIj5WLlvxbwHcWn5/qHzey8u/6fSi9VxQ1v9w+f2CRcs+FhH/pfy3sRARN0XE6HL1qt4MNlXtF4B3Ahdl5syi9uuAQ8A48L3ADwOLu9a+H/gCRTh8GPgT4PvKx78R+IOI2Fg+9jGKnf7pwDbgFyLitUvq2AqcA1wE/FpEfGfZfg1wTWY+B3gRRaCcsMxcAG6mCLMjLgf+atE6fnSVl7geeF1EbIDig0D5+OuXeezlwGnAt1K8Xz8P7G+x1I8CW4DnAZ8DPnSsJ2RmIzM3ZuZG4IXAV4AbysWr/S1eUX4/vXz+Jxe/bkSMAFPAfyu34/eAqYh47qKHvYHiw8LzgPXAr7a4naoRg01Vuxj4FPDFIw3l0cSrgbdk5mOZ+SDwHuCyRc+by8wPZOZhoEGx035nZj6ZmTcBByhCjsz8WGZ+MTOfyswvUOxkl3bjvSMz92fm3wN/D7y0bD8IjEfEaGY+mpmfWsNtvxcYKbf5FOAnKI64DgI3skp3ZGZ+AngA+LGy6SeBezLzzmUefpAiCMYz83Bm3pGZj7RSYGa+PzMXMvNJ4O3AS5c7N7iciDiJ4kPHxzLzD8vXa+VvsZJtwK7M/GBmHsrMG4Avc/QHgA9k5j2ZuZ/iA8K5Lb62asRgU9V+Afh24L2LBkOcBawD7iu7zh4C/pDiU/gRDyz6eT9AZi5t2wgQEd8fEX8XEXsi4mGKI5alXVT3L/r58SPPBXaU9X257Pq69Di3czljFOfToAioQ8Bfl79/CPiRiNi0yvP/iG+G378uf1/OB4G/Bf4kIu6NiN+OiHXHKi4iBiLityLinyLiEeCr5aJWu/d+Axii6Fo98pqt/C1W8kLga0vavkbxPh6x0t9RfcRgU9UeoOj+uxD4H2XbPwNPAqOZeXr59ZzMXPb8Tgs+DOwEvjUzTwP+FxCrP6WQmbsy8/UUofpu4MbFAz6OV9lN+irgtrLpcoqd8O6IuB/4M4pwf8MqL/NB4KKIeDnFYJRluwkz82BmviMzvwu4ALiUbwbiY8Apix7+Lxb9/AbgNWWdpwFnHym/he27DHg98LryCPSI1f4Wx7rVyL0UH3oWOxOYP1Y96i8GmyqXmfdShNslEfGezLwPuAn43Yh4TkScFBEviuO/DGAIaGbmExFxPquHxVEi4o0RsSkznwIeKpufWuXxJy/5Wjok/1si4jyKkY37gA9ExBjF9l9K0XV2LkVX6LtZvTvyq8A0RXfezZl5/3KPi4gfjIjviWLwzSMUXZNHtuFOioEq6yJiAnjdoqcOUXzA+AZF+P3mSrUsWd/3Ar8PvDYz9yxZvNrfYk9Z17et8NJ/DXx7RLwhiksbJoHvAv5vK3Wpfxhs6gqZuRv4IYoBEe+i2KGvB/6BIgBupBgxeDz+HfDOiFgAfo1nNwDkEuBLEfEoxUCSy8rzN8sZo+gCXfz1onLZVeX6v0HRZXgHcEFmPkbRjXhnZt6Umfcf+aIYJPGSiHjxKvVdT3EUs1I3JBRHYTdShNrdwMcpjvYA/nNZ4z7gHRRHVEf8EUVX3zzF36HV84uvAYaB6UUjIz9aLlvxb5GZj1N0X36i7IJ+2eIXzcxvUIT/lRTv41XApZm5t8W61CfCG41KkurEIzZJUq0YbJKkWjHYJEm1YrBJkmrFYJMk1crgsR9SvdHR0Tz77LOrLkOS1EXuuOOOvZn5jNl5eiLYzj77bGZmZo79QElS34iIpVOsAXZFSpJqxmCTJNWKwSZJqhWDTZJUKwabJKlWDDZJUq30xHB/SVLvW1iARgN2zSZbxoPJSRgaWvv1GGySpLabnoZt2w+zfqzJoeEmg1MjXHnVCFM7B9i6dW3XZbBJktpqYaEItZMvnmHD5m/eF/akuVG2bZ9gfvcAGzeu3fo8xyZJaqtGA9aPNY8KNYANm/eyfqxJo7G26zPYJElttWs2OTTcXHbZweEms7O5pusz2CRJbbVlPBjcN7LssnX7RhgfjzVdn8EmSWqryUk4MD/C/rnRo9r3z41yYH6Eycm1XZ+DRyRJbTU0BFM7B9i2fYKnxpocHG6ybt8IB+aLUZFrOXAEDDZJUgds3QrzuwdoNDYxOzvKeHkd21qHGhhskqQO2bgRduwAWNtzaksZbJKkp3VqdpB2MtgkSUBnZwdpJ4NNktTx2UHayeH+kqSOzw7STgabJKnjs4O0k8EmSer47CDtZLBJkjo+O0g7OXhEktTx2UHayWCTJAGdnR2knQw2SdLTOjU7SDt5jk2SVCsGmySpVgw2SVKtGGySpFox2CRJtWKwSZJqxWCTJNWKwSZJqhWDTZJUK848IqkSCwvFPcB2zSZbyqmbhoaqrkp10LYjtoh4f0Q8GBF3LWr7rxHx5Yj4QkR8JCJOb9f6JXWv6Wk446zDXH3NHq69/R6uvmYPZ5x1mOnpqitTHbSzK/I64JIlbTcDL87MlwD3AFe3cf2SutDCAmzbfpiTL57h1G2f4bQLZjl122c4+eIZtm0/zKOPVl2hel3bgi0zbwWaS9puysxD5a+fAs5o1/oldadGA9aPNdmwee9R7Rs272X9WJNGo6LCVBtVDh75N8BHK1y/pArsmk0ODTeXXXZwuMnsbHa4ItVNJcEWEW8FDgEfWuUxV0TETETM7Nmzp3PFSWqrLePB4L6RZZet2zfC+Hjv3i5F3aHjwRYRPwNcCvxUZq740Swzr83Micyc2LRpU8fqk9Rek5NwYH6E/XOjR7XvnxvlwPwIk5MVFaba6Ohw/4i4BLgK+IHMfLyT65bUHYaGYGrnANu2T/DUWJODw03W7RvhwPwIUzsHeu5uzeo+bQu2iLgBeCUwGhFfB95GMQryW4CbIwLgU5n58+2qQVJ32roV5ncP0GhsYnZ2lPHyOjZDTWuhbcGWma9fpvl97VqfpN6ycSPs2AHgOTWtLWcekTNASKoVg63PTU8XF8uuH2tyaLjJ4NQIV15VnOvYurXq6iTp2TPY+tjiGSAWXyx70two27ZPML/bE/mSeo+z+/cxZ4CQVEcGWx9zBghJdWSw9TFngJBURwZbH3MGCEl15OCRPuYMEOoHXs7Sf2KV6Rq7xsTERM7MzFRdRm09+mjxH392Np0BQrXyjMtZFn1w83KW3hcRd2TmxNJ2j9jkDBCqJS9n6V+eY5NUS17O0r8MNkm15OUs/ctgk1RLXs7Svww2SbXk5Sz9y8EjkmrJy1n6l8Emqba8oWl/Mtgk1ZqXs/Qfz7FJkmrFYJMk1YrBJkmqFYNNklQrBpskqVYMNklSrRhskqRaMdgkSbVisEmSasVgkyTVisEmSaoVg02SVCtOgiytkYUFaDSKOzdvKWeRHxqquiqp/xhs0hqYnoZt2w+zfqzJoeEmg1MjXHlVcd+vrVurrk7qLwabdIIWFopQO/niGTZs3vt0+0lzo2zbPsH8bm9qKXWS59ikE9RowPqx5lGhBrBh817WjzVpNCoqTOpTHrGpUnU4L7VrNjk03Fx22cHhJrOzo3iTS6lzDDZVpi7npbaMB4NTI8suW7dvhPFxQ03qpMjMqms4pomJiZyZmam6DK2hhQU446xnnpfaPzfKEzf31nmpOm2L1Esi4o7MnFja7hGbKrHaeamnxpo0GpvYsaOi4p6loSGY2jnAtu0TPDXW5OBwk3X7RjgwXxx9GmpSZxlsqkTdzktt3QrzuwdoNDYxOzvKeHm+0FCTOs9gUyXqeF5q40bKo8zeq12qk7YN94+I90fEgxFx16K2n4iIL0XEUxHxjH5R9Y/JSTgwP8L+udGj2vfPjXJgfoTJyYoKk9Tz2nnEdh3wB8AfLWq7C/hx4A/buF71AM9LSWqXtgVbZt4aEWcvabsbIMKuGnleSlJ7eI5NlfK8lKS11rVTakXEFRExExEze/bsqbocSVKP6Npgy8xrM3MiMyc2bdpUdTmSpB7RtcEmSdLxaOdw/xuATwLnRMTXI2JHRPxYRHwdeDkwFRF/2671S5L6UztHRb5+hUUfadc6JUmyK1KSVCsGmySpVryOrY/V4SafkrRU3wdbv+7c63KTT0laqq+DrV937gsLxXYvvTHmSXOjbNvujTEl9ba+DbZ+3rnX6SafkrRU3w4eWW3nvn6sSaNRUWEdcOybfGaHK5KktdO3wdbPO/ct48Hgvnrd5FOSjujbYOvnnbs3+ZRUZ317jm1yEq68aoST5kaP6o7sh527N/mUVGd9G2z9vnP3Jp+S6qpvgw3cuXuTT0l11NfBBu7cJalu+nbwiCSpngw2SVKtGGySpFox2CRJtWKwSZJqxWCTJNWKwSZJqhWDTZJUKwabJKlW+n7mEdXDwkJxj71ds8mWcmq0oaGqq5JUBYOtRf2y4+zF7ZyeLu6Gvn6syaHhJoNTI1x5VTGZ9datVVcnqdMis/tvqDkxMZEzMzOVrf8ZO85FdwGo046zF7dzYQHOOOswJ18884zbDz1x8wTzu+t/pwapX0XEHZk5sbTdI7ZjWFgodvZLd5wnzY2ybXt9dpy9up2NBqwfax5VM8CGzXt5aqxJo7GpnORaUr9w8MgxrLbjXD/WpNGoqLA11qvbuWs2OTTcXHbZweEms7Pd3yMhaW21FGwR8cFW2uqoX3acvbqdW8aDwX0jyy5bt2+E8XFvRyT1m1aP2L578S8RMQCct/bldJ9+2XH26nZOTsKB+RH2z40e1b5/bpQD8yNMTlZUmKTKrBpsEXF1RCwAL4mIR8qvBeBB4C87UmHF+mXH2avbOTQEUzsHeOLmCR6bOp+Hbh/nsanzeeLmCaZ2dud5QUnt1dKoyIh4V2Ze3YF6ltVNoyIPDjdZ1wOjBY9HL2/no48W5wlnZ5Px8jIFQ02qt5VGRbY83D8ixoCzWDSSMjNvXbMKV1F1sEH/7Dj7ZTsl9b4TCraI+C3gMuAfgMNlc2bm9jWtcgXdEGySpO5yotex/RhwTmY+ubZlSZK0tlodFfkVYF07C5EkaS2sesQWEb8PJPA4cGdE3AI8fdSWmW9ub3mSJD07x+qKPHJi6w5gZ5trkSTphK0abJl5facKkSRpLbQ0eCQivkjRJbnYwxRHdL+emd9Y68IkSToerQ4e+SgwBfxU+fVXFKF2P3Ddck+IiPdHxIMRcdeitpGIuDkidpXfh0+oekmSlmg12F6VmVdn5hfLr7cCP5CZ7wbOXuE51wGXLGn7D8AtmbkFuKX8XZKkNdNqsA1ExPlHfomI7wMGyl8PLfeEclaSpdPFvwY4ct7ueuC1rZcqSdKxtXqB9puA90fERiCAR4A3RcSpwLuexfqen5n3lT/fDzz/WTxXkqRjainYMvOzwPdExGnl7w8vWvynx7PizMyIWHE+r4i4ArgC4MwzzzyeVUiS+tCxLtB+Y2b+cUT8ypJ2ADLz957l+h6IiBdk5n0R8QKK298sKzOvBa6FYq7IZ7me2lhYKCYl3jWbbCknJR4aqroqSepexzpiO7X8vla70p3A5cBvld/74p5ux2vxbWQODTcZnBrhyqt64zYyklSVlm9b86xfOOIG4JXAKPAA8DbgLyi6Ls8Evgb8ZGYuHWDyDP04u//CApxx1mFOvniGDZv3Pt2+f26UJ26eYH63N9GU1N9Wmt2/pVGREfHtEXHLkWvSIuIlEfGfVntOZr4+M1+Qmesy84zMfF9mfiMzL8rMLZn5qlZCrV81GrB+rHlUqAFs2LyX9WNNGo2KCpOkLtfqcP//DVwNHATIzC9Q3J9NbbJrNjk0vHzuHxxuMjvbt6cdJWlVrQ73PyUzP3Nk0Ehp2evXulEvDsDYMh4MTo0su2zdvhHGx2PZZZLU71o9YtsbES+inC8yIl4H3Lf6U7rD9HRxrurqa/Zw7e33cPU1ezjjrMNMT1dd2eomJ+HA/Aj750aPat8/N8qB+REmJysqTJK6XKtHbL9IMfT+OyJiHpijmDOyqy0sFKMKlw7AOGlulG3bu3sAxtAQTO0cYNv2CZ4aa3JwuMm6fSMcmC9GRXZr3ZJUtVaDbR74APB3wAjFzCOXA+9sU11rYrUBGE+NNWk0NrFjR0XFtWDrVpjfPUCjsYnZ2VHGy27Ubgy1XuzulVRPrQbbXwIPAZ8D7m1fOWvr2AMwRilmCOteGzdShm/31un1dpK6SavBdkZmLp2pv+s5AKP9erm7V1I9tTp45PaI+J62VtIGDsBoP6+3k9RtjjVX5JE7Zw8CPxsRXwGepOgXy8x8SftLPH4OwGi/OnT3SqqXY3VFXtqRKtqolwZg9CK7eyV1m7bNFbmW+nGuyF7hnJaSqrLSXJGtDh6RlmV3r6RuY7DphNndK6mbGGxaE71wvZ2k/tDqcH9JknqCwSZJqhWDTZJUKwabJKlWDDZJUq0YbJKkWjHYJEm1YrBJkmrFYJMk1YrBJkmqFYNNklQrBpskqVYMNklSrRhskqRaMdgkSbVisEmSasVgkyTVisEmSaoVg02SVCsGmySpVgarLkA6XgsL0GjArtlky3gwOQlDQ1VXJalqBpt60vQ0bNt+mPVjTQ4NNxmcGuHKq0aY2jnA1q1VVyepSgabes7CQhFqJ188w4bNe59uP2lulG3bJ5jfPcDGjRUWKKlSnmNTz2k0YP1Y86hQA9iweS/rx5o0GhUVJqkrGGzqObtmk0PDzWWXHRxuMjubHa5IUjepJNgi4pci4q6I+FJEvKWKGtS7towHg/tGll22bt8I4+PR4YokdZOOB1tEvBj4OeB84KXApREx3uk61LsmJ+HA/Aj750aPat8/N8qB+REmJysqTFJXqGLwyHcCn87MxwEi4uPAjwO/XUEt6kFDQzC1c4Bt2yd4aqzJweEm6/aNcGC+GBXpwBGpv1URbHcBvxERzwX2A68GZiqoQz1s61aY3z1Ao7GJ2dlRxsvr2Aw1SR0Ptsy8OyLeDdwEPAbcCRxe+riIuAK4AuDMM8/saI3qDRs3wo4dAJ5Tk/RNlQweycz3ZeZ5mfkKYB9wzzKPuTYzJzJzYtOmTZ0vUpLUkyq5QDsinpeZD0bEmRTn115WRR2SpPqpauaR/1OeYzsI/GJmPlRRHZKkmqkk2DLzwirWq+7jRMaS1ppzRaoyTmQsqR0MNlWilycy9ihT6m7OFalK9OpExtPTcMZZh7n6mj1ce/s9XH3NHs446zDT01VXJukIj9hUiWNPZDxKt12f1stHmVI/8YhNlejFiYx79ShT6jcGmyrRixMZe7scqTfYFalK9OJExlvGg8Gp3jrKlPpRZHb/p8yJiYmcmXGe5Dp69NGii292Nrt+IuOFhWLgyNJzbPvnRnniZs+xSZ0WEXdk5sTSdo/YVKlemsi4F48ypX5ksEnPgrfLkbqfwSY9S710lCn1I0dFSpJqxWCTJNWKwSZJqhWDTZJUKwabJKlWDDZJUq0YbJKkWjHYJEm1YrBJkmrFYJMk1YrBJkmqFYNNklQrBpskqVYMNklSrRhskqRaMdgkSbVisEmSasVgkyTVisEmSaoVg02SVCsGmySpVgw2SVKtGGySpFox2CRJtWKwSZJqxWCTJNWKwSZJqhWDTZJUK5UEW0T8ckR8KSLuiogbIuLkKuqQJNVPx4MtIsaANwMTmfliYAC4rNN1SJLqqaquyEFgQ0QMAqcA91ZUhySpZjoebJk5D/wOsBu4D3g4M29a+riIuCIiZiJiZs+ePZ0uU5LUo6roihwGXgNsBl4InBoRb1z6uMy8NjMnMnNi06ZNnS5TktSjquiKfBUwl5l7MvMg8OfABRXUIUmqoSqCbTfwsog4JSICuAi4u4I6JEk1VMU5tk8DNwKfA75Y1nBtp+uQJNXTYBUrzcy3AW+rYt2SpHpz5hFJUq0YbJKkWjHYJEm1YrBJkmrFYJMk1YrBJkmqFYNNklQrBpskqVYMNklSrVQy80i/WliARgN2zSZbxoPJSRgaqroqSaoXg61Dpqdh2/bDrB9rcmi4yeDUCFdeNcLUzgG2bq26OkmqD4OtAxYWilA7+eIZNmze+3T7SXOjbNs+wfzuATZurLBASaoRz7F1QKMB68eaR4UawIbNe1k/1qTRqKgwSaohg60Dds0mh4abyy47ONxkdjY7XJEk1ZfB1gFbxoPBfSPLLlu3b4Tx8ehwRZJUXwZbB0xOwoH5EfbPjR7Vvn9ulAPzI0xOVlSYJNWQg0dOQKvD94eGYGrnANu2T/DUWJODw03W7RvhwHwxKtKBI5K0diKz+8/vTExM5MzMTNVlHOUZw/cXBdVKw/cffbQIwtnZZLwMQkNNko5PRNyRmRNL2z1iOw7HO3x/40bYsQPAc2qS1C6eYzsODt+XpO5lsB0Hh+9LUvcy2I6Dw/clqXsZbMfB4fuS1L0cPHIcHL4vSd3LYDtOW7fC/O4BGo1NzM6OOnxfkrqEwXYCHL4vSd3Hc2ySpFox2CRJtWKwSZJqxWCTJNWKwSZJqhWDTZJUKwabJKlWeuJ+bBGxB/haRasfBfYe81EC36tnw/eqNb5PrevH9+qszNy0tLEngq1KETGz3I3s9Ey+V63zvWqN71PrfK++ya5ISVKtGGySpFox2I7t2qoL6CG+V63zvWqN71PrfK9KnmOTJNWKR2ySpFox2FYREadHxI0R8eWIuDsiXl51Td0oIs6JiDsXfT0SEW+puq5uFBG/HBFfioi7IuKGiDi56pq6VUT8Uvk+fcl/T0eLiPdHxIMRcdeitpGIuDkidpXfh6ussUoG2+quAf4mM78DeClwd8X1dKXM/MfMPDczzwXOAx4HPlJxWV0nIsaANwMTmfliYAC4rNqqulNEvBj4OeB8iv97l0bEeLVVdZXrgEuWtP0H4JbM3ALcUv7elwy2FUTEacArgPcBZOaBzHyo2qp6wkXAP2VmVRfUd7tBYENEDAKnAPdWXE+3+k7g05n5eGYeAj4O/HjFNXWNzLwVaC5pfg1wffnz9cBrO1pUFzHYVrYZ2AN8ICI+HxHvjYhTqy6qB1wG3FB1Ed0oM+eB3wF2A/cBD2fmTdVW1bXuAi6MiOdGxCnAq4Fvrbimbvf8zLyv/Pl+4PlVFlMlg21lg8C/BP5nZn4v8Bh9fGjfiohYD2wH/qzqWrpRec7jNRQfml4InBoRb6y2qu6UmXcD7wZuAv4GuBM4XGlRPSSL4e59O+TdYFvZ14GvZ+any99vpAg6rexHgM9l5gNVF9KlXgXMZeaezDwI/DlwQcU1da3MfF9mnpeZrwD2AfdUXVOXeyAiXgBQfn+w4noqY7CtIDPvB/45Is4pmy4C/qHCknrB67EbcjW7gZdFxCkRERT/phyQtIKIeF75/UyK82sfrrairrcTuLz8+XLgLyuspVJeoL2KiDgXeC+wHvgK8LOZua/aqrpTef5xN/Btmflw1fV0q4h4BzAJHAI+D7wpM5+stqruFBG3Ac8FDgK/kpm3VFxS14iIG4BXUszo/wDwNuAvgD8FzqS4G8pPZubSASZ9wWCTJNWKXZGSpFox2CRJtWKwSZJqxWCTJNWKwSZJqhWDTepSEfHXEXH6Mu1vj4hfLX/+WERMLPOYcyPi1Z2oU+o2BpvUpTLz1Scw8fa5FPMrSn3HYJMqEhH/PiLeXP78noj4f+XPPxQRH4qIr0bEaNn21oi4JyKmgXOWvNRPRMRnyuUXlnN2vhOYLO+PN9nJ7ZKqZrBJ1bkNuLD8eQLYGBHryrZbjzwoIs6juGvCkaOw71vyOoOZeT7wFuBtmXkA+DWgUd4nr9HezZC6i8EmVecO4LyIeA7wJPBJioC7kCL0jrgQ+Eh5b7JHKOYEXOzPF73e2W2tWOoBg1UXIPWrzDwYEXPAzwC3A18AfhAY59lNjnxkrsnD+H9a8ohNqthtwK9SdD3eBvw88Pk8ehLXW4HXRsSGiBgCfrSF110Ahta6WKkXGGxStW4DXgB8sryP3RMc3Q1JZn4OaAB/D3wU+GwLr/t3wHc5eET9yNn9JUm14hGbJKlWDDZJUq0YbJKkWjHYJEm1YrBJkmrFYJMk1YrBJkmqFYNNklQr/x8D9qKNgaXCfwAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 504x360 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
},
{
"output_type": "stream",
"text": [
"done in 0.963s.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "TagocKdfFdVO",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "d52be9d8-ad73-43b3-f322-d0ed7c418712"
},
"source": [
"plt.hist(kmeans_lda)\n",
"plt.xlabel('Kmeans LDA')\n",
"plt.ylabel('Frequency')\n",
"plt.show"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<function matplotlib.pyplot.show>"
]
},
"metadata": {
"tags": []
},
"execution_count": 83
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAEGCAYAAABvtY4XAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAASn0lEQVR4nO3dfbRldX3f8fdHhogoJWlnIi4g3khVIA4ROpIHo0ZMLCLBtBqRFWzJsk7J04oxq3ViXfEmWU3r6iImtqRxSFyiqQaNDyVBa5UZA0lFvOAE0YkPQTSjSbnGKhgJT377x9l3uHO5D/vee/Y99/54v9a6i3322Xv/vr9zDp/Z53f2+Z1UFZKk9jxi0gVIkoZhwEtSowx4SWqUAS9JjTLgJalR2yZdwHzbt2+vqampSZchSVvGTTfd9JWq2rHYfZsq4KemppiZmZl0GZK0ZST5wlL3OUQjSY0y4CWpUQa8JDXKgJekRhnwktQoA16SGjXoZZJJbgfuAh4A7q+qXUO2J0l60EZcB//sqvrKBrQjSZrHIRpJatTQAV/A/05yU5Ldi22QZHeSmSQzs7Oz625w55U7132MxVx+6b7lN5g+fsm7rt13ypirkaSVDR3wP1RVZwHPA342yTMXblBVe6tqV1Xt2rFj0ekUJElrMGjAV9WXuv/eAbwHOHvI9iRJDxos4JM8Oslxc8vAc4Fbh2pPknSkIa+ieSzwniRz7bytqv7XgO1JkuYZLOCr6jbge4c6viRpeV4mKUmNMuAlqVEGvCQ1yoCXpEYZ8JLUKANekhplwEtSowx4SWqUAS9JjTLgJalRBrwkNcqAl6RGGfCS1CgDXpIaZcBLUqMMeElqlAEvSY0y4CWpUQa8JDXKgJekRhnwktQoA16SGmXAS1KjDHhJapQBL0mNMuAlqVEGvCQ1yoCXpEYZ8JLUKANekhplwEtSowx4SWrU4AGf5KgkH0/yJ0O3JUl60Eacwf8CcHAD2pEkzTNowCc5CXg+8HtDtiNJeqihz+B/C/j3wLeW2iDJ7iQzSWZmZ2cHLmd509PTE21/nA7tuX7SJWy4yy48f9IlSJvKYAGf5Hzgjqq6abntqmpvVe2qql07duwYqhxJetgZ8gz+6cAFSW4H/hA4J8kfDNieJGmewQK+qn65qk6qqingJcC+qrp4qPYkSUfyOnhJatS2jWikqj4MfHgj2pIkjXgGL0mNMuAlqVEGvCQ1yoCXpEYZ8JLUKANekhplwEtSowx4SWqUAS9JjTLgJalRBrwkNcqAl6RGGfCS1CgDXpIaZcBLUqMMeElqlAEvSY0y4CWpUQa8JDXKgJekRhnwktQoA16SGmXAS1KjDHhJapQBL0mNMuAlqVEGvCQ1yoCXpEb1CvgkO4cuRJI0Xn3P4H8nyY1JfibJ8YNWJEkai14BX1XPAH4SOBm4KcnbkvzooJVJktal9xh8VX0WeA3wKuBZwBuS/GWSfzlUcZKktes7Bn9GktcDB4FzgB+rqtO65dcvsc8x3bDOXyT5ZJJfHVvVkqQVbeu53X8Ffg94dVXdPbeyqr6c5DVL7HMPcE5VfSPJ0cCfJXl/Vd2wvpIlSX30DfjnA3dX1QMASR4BHFNV36yqty62Q1UV8I3u5tHdX62zXklST33H4D8EPGre7WO7dctKclSSA8AdwAer6qOrL1GStBZ9A/6Yqpo7G6dbPnalnarqgap6KnAScHaSpyzcJsnuJDNJZmZnZ/vW/RBTe66B6Qev4Lz80n1H3H/tvlMe3G4ZR2zXHe/gqaetua7p6elF16/nmPOt1J+Fdl555FcaDu25Hniw3xM33cZVuCfsPzDpEqTeAf/3Sc6au5HknwF3L7P9Earqa8B+4NxF7ttbVbuqateOHTv6HlKStIK+Y/CvAN6Z5MtAgBOAC5fbIckO4L6q+lqSRwE/CrxuPcVKkvrrFfBV9bEkpwJP7lZ9uqruW2G3xwFXJjmK0TuFd1TVn6y9VEnSavQ9gwd4GjDV7XNWEqrqLUttXFW3AGeurzxJ0lr1CvgkbwVOAQ4AD3SrC1gy4CVJk9X3DH4XcHp3bbskaQvoexXNrYw+WJUkbRF9z+C3A59KciOjKQgAqKoLBqlKkrRufQN+esgiJEnj1/cyyT9N8njgiVX1oSTHAkcNW5okaT36Thf8cuCPgDd2q04E3jtUUZKk9ev7IevPAk8H7oTDP/7xnUMVJUlav74Bf09V3Tt3I8k2nPpXkja1vgH/p0leDTyq+y3WdwJ/PFxZkqT16hvwe4BZ4BPAvwXex+j3WSVJm1Tfq2i+BVzR/UmStoC+c9F8nkXG3KvqCWOvSJI0FquZi2bOMcBPAP94/OVIksal1xh8Vf3dvL8vVdVvMfohbknSJtV3iOaseTcfweiMfjVzyUuSNljfkL5s3vL9wO3Ai8dejSRpbPpeRfPsoQuRJI1X3yGaVy53f1X95njKkSSNy2quonkacHV3+8eAG4HPDlGUJGn9+gb8ScBZVXUXQJJp4JqquniowiRJ69N3qoLHAvfOu31vt06StEn1PYN/C3Bjkvd0t38cuHKYkiRJ49D3Kpr/mOT9wDO6VT9VVR8frixJ0nr1HaIBOBa4s6p+GziU5LsHqkmSNAZ9f7LvtcCrgF/uVh0N/MFQRUmS1q/vGfy/AC4A/h6gqr4MHDdUUZKk9esb8PdWVdFNGZzk0cOVJEkah74B/44kbwS+PcnLgQ/hj39I0qa24lU0SQJcBZwK3Ak8GfiVqvrgwLVJktZhxYCvqkryvqraCRjqkrRF9B2iuTnJ0watRJI0Vn2/yfp9wMVJbmd0JU0YndyfMVRhkqT1WTbgk3xXVX0R+OerPXCSkxlNcfBYRlff7O2+JCVJ2gArncG/l9Eskl9I8q6qeuEqjn0/8EtVdXOS44Cbknywqj615molSb2tNAafectPWM2Bq+pvqurmbvku4CBw4urKkySt1UoBX0ssr0qSKeBM4KOL3Lc7yUySmdnZ2bU2cYSDp54GwGUXng/A9PQ0ACfsPzDaYPp4dl65k4Onnsbll+7jsgvP59Ce6xc91s4rd67Y3tSeaw5vO9c2cMQxT9h/4PB2i5mrcTGXX7pvxRoW23b+MRe2vdh2hx+fNZo75qE91y/bn4Wu3XfKutpdr/n9XvgcjsPca2jutbbQcq+LtZj/OgfG3p/WrObxWe9juVIOjNtKAf+9Se5MchdwRrd8Z5K7ktzZp4EkjwHeBbyiqh6yT1XtrapdVbVrx44dq++BJGlRy47BV9VR6zl4kqMZhfv/qKp3r+dYkqTVWc10wavSfQP294GD/ii3JG28wQIeeDrwUuCcJAe6v/MGbE+SNE/fLzqtWlX9GUdehSNJ2kBDnsFLkibIgJekRhnwktQoA16SGmXAS1KjDHhJapQBL0mNMuAlqVEGvCQ1yoCXpEYZ8JLUKANekhplwEtSowx4SWqUAS9JjTLgJalRBrwkNcqAl6RGGfCS1CgDXpIaZcBLUqMMeElqlAEvSY0y4CWpUQa8JDXKgJekRhnwktQoA16SGmXAS1KjDHhJapQBL0mNMuAlqVGDBXySNyW5I8mtQ7UhSVrakGfwbwbOHfD4kqRlDBbwVXUd8NWhji9JWt7Ex+CT7E4yk2RmdnZ27Mc/tOf6Ve9zwv4D62rz8kv3cdmF5y9+5/Tx7Lxy5xHbzdV47b5TOGH/Aab2XHN4u4OnnrZyg9PHAxzedsm2F+iz3eWX7lvyvqk91xxeXqzta/ed8uB2i9R4aM/1TE9PA2t/zFd6jOY/lrD0czO/xtVY8flZ0G848jV5uN/zauxjNdvCQ19rc+aew7nHcbnne7FjjsNq+7KiVTyPfdpe6jU09/pd6v/bxbabX+NaHvPVmnjAV9XeqtpVVbt27Ngx6XIkqRkTD3hJ0jAMeElq1JCXSb4d+Ajw5CSHkrxsqLYkSQ+1bagDV9VFQx1bkrQyh2gkqVEGvCQ1yoCXpEYZ8JLUKANekhplwEtSowx4SWqUAS9JjTLgJalRBrwkNcqAl6RGGfCS1CgDXpIaZcBLUqMMeElqlAEvSY0y4CWpUQa8JDXKgJekRhnwktQoA16SGmXAS1KjDHhJapQBL0mNMuAlqVEGvCQ1yoCXpEYZ8JLUKANekhplwEtSowx4SWqUAS9JjTLgJalRgwZ8knOTfDrJ55LsGbItSdKRBgv4JEcBlwPPA04HLkpy+lDtSZKONOQZ/NnA56rqtqq6F/hD4AUDtidJmidVNcyBkxcB51bVv+luvxT4vqr6uQXb7QZ2dzefDHx6Fc1sB74yhnK3Gvv98GK/H15W2+/HV9WOxe7YNp561q6q9gJ717Jvkpmq2jXmkjY9+/3wYr8fXsbZ7yGHaL4EnDzv9kndOknSBhgy4D8GPDHJdyf5NuAlwNUDtidJmmewIZqquj/JzwEfAI4C3lRVnxxzM2sa2mmA/X54sd8PL2Pr92AfskqSJstvskpSowx4SWrUpg/4laY7SPLIJFd19380ydTGVzl+Pfr9yiSfSnJLkmuTPH4SdQ6h7xQXSV6YpJI0cSldn34neXH3vH8yyds2usYh9Hitf1eS/Uk+3r3ez5tEneOU5E1J7khy6xL3J8kbusfkliRnramhqtq0f4w+nP0r4AnAtwF/AZy+YJufAX63W34JcNWk696gfj8bOLZb/ukW+t237912xwHXATcAuyZd9wY9508EPg58R3f7Oydd9wb1ey/w093y6cDtk657DP1+JnAWcOsS958HvB8I8P3AR9fSzmY/g+8z3cELgCu75T8CnpMkG1jjEFbsd1Xtr6pvdjdvYPQ9gxb0neLi14HXAf+wkcUNqE+/Xw5cXlX/D6Cq7tjgGofQp98F/KNu+XjgyxtY3yCq6jrgq8ts8gLgLTVyA/DtSR632nY2e8CfCPz1vNuHunWLblNV9wNfB/7JhlQ3nD79nu9ljP61b8GKfe/erp5cVddsZGED6/OcPwl4UpI/T3JDknM3rLrh9On3NHBxkkPA+4Cf35jSJmq1GbCoiU9VoPVJcjGwC3jWpGvZCEkeAfwmcMmES5mEbYyGaX6Y0Tu265LsrKqvTbSq4V0EvLmqLkvyA8Bbkzylqr416cI2u81+Bt9nuoPD2yTZxugt3N9tSHXD6TXNQ5IfAf4DcEFV3bNBtQ1tpb4fBzwF+HCS2xmNT17dwAetfZ7zQ8DVVXVfVX0e+AyjwN/K+vT7ZcA7AKrqI8AxjCbkatlYpnrZ7AHfZ7qDq4F/3S2/CNhX3acUW9iK/U5yJvBGRuHewljsnGX7XlVfr6rtVTVVVVOMPn+4oKpmJlPu2PR5rb+X0dk7SbYzGrK5bSOLHECffn8ReA5AktMYBfzshla58a4G/lV3Nc33A1+vqr9Z7UE29RBNLTHdQZJfA2aq6mrg9xm9Zfscow8tXjK5isejZ7//C/AY4J3dZ8pfrKoLJlb0mPTse3N69vsDwHOTfAp4APh3VbWl36327PcvAVck+UVGH7hestVP4pK8ndE/1tu7zxZeCxwNUFW/y+izhvOAzwHfBH5qTe1s8cdJkrSEzT5EI0laIwNekhplwEtSowx4SWqUAS9JjTLgtWUk+ca85fOSfGazzqKZ5PbuWvX56y5JMtvNivjZJB9I8oMLttme5L4kl25sxWqRAa8tJ8lzgDcAz6uqL0y6nlW6qqrOrKonAv8ZeHf35Z05P8Hoy1sXTaQ6NcWA15aS5JnAFcD5VfVX3bo3J/nv3QRctyX54W6+7YNJ3jxv3+cm+UiSm5O8M8ljuvW/kuRjSW5NsnduNtIkH07yuiQ3du8WntGt/55u3YFuru41TRdQVfsZTYW7e97qixh9sefEJK3MEKoJMeC1lTyS0df1f7yq/nLBfd8B/ADwi4y+5v164HuAnUme2g2XvAb4kao6C5gBXtnt+9+q6mlV9RTgUcD58467rarOBl7B6NuGAJcCv11VT2U00duhdfTpZuBUgCQnA4+rqhsZzb1y4TqOKxnw2lLuA/4Po8mnFvrj7uvrnwD+b1V9optt8JPAFKNJyU4H/jzJAUbzF82N3z87o18D+wRwDqN/GOa8u/vvTd1xAD4CvDrJq4DHV9Xd6+jT/N8uuJBuUi1G86I7TKN1MeC1lXwLeDFwdpJXL7jvnnnb3LNgn22MgvSDVfXU7u/0qnpZkmOA3wFeVFU7GQ3/HLPIcR/ojkNVvQ24ALgbeF+Sc9bRpzOBg93yRcAl3SyZVwNnrHX4RwIDXltM9ytWzwd+MsliZ/JLuQF4epJ/CpDk0UmexINh/pVuTP5FKx0oyROA26rqDcD/BM5YTR/mHedZjMbfr+hqeUxVnThvpsz/hGfxWodNPZuktJiq+mr3a0bXJek1bWxVzSa5BHh7kkd2q19TVZ9JcgVwK/C3jKavXcmLgZcmua/b5zeW2O6WJHM/SvEO4BbgwiQ/BBwLfB54YVUdTPJa4D0L9n8XcBXwa336KC3kbJKS1CiHaCSpUQa8JDXKgJekRhnwktQoA16SGmXAS1KjDHhJatT/Bx5zNCChdVD8AAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UI3VZFQPb9uM"
},
"source": [
"### Splitting Data"
]
},
{
"cell_type": "code",
"metadata": {
"id": "tY8o51BDcAXT",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "fbc14a8a-ad6b-46b1-8ba1-e19458decad2"
},
"source": [
"# Import train_test_split function\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"# Split dataset into training set and test set\n",
"X_train, X_test, y_train, y_test = train_test_split(XLDA, kmeans_lda, test_size=0.3,random_state=109) # 70% training and 30% test\n",
"X_train.shape, X_test.shape, y_train.shape, y_test.shape"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"((7, 25), (3, 25), (7, 25), (3, 25))"
]
},
"metadata": {
"tags": []
},
"execution_count": 84
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "vudGhJJhcxn8",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "4d4a458d-01df-45c4-97b9-d18e9b35d16c"
},
"source": [
"print(\"X train\", X_train)\n",
"print(\"X test\", X_test)\n",
"print(\"y train\", y_train)\n",
"print(\"X test\", y_test)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"X train [[0.71942444 0.63997389 0.73540432 0.68775008 0.77909644 0.77846923\n",
" 0.78218557 0.70571175 0.72475793 0.69337689 0.63270695 0.65847644\n",
" 0.78475209 0.80481943 0.78343423 0.78024404 0.70221961 0.72836256\n",
" 0.75478443 0.66953312 0.81795132 0.78496336 0.64501827 0.86455628\n",
" 0.7377681 ]\n",
" [0.80082112 1.76384661 0.72823224 0.76284712 0.79769123 0.68656457\n",
" 0.83468914 0.74576771 1.94665113 0.80749889 0.7036918 0.70478701\n",
" 0.91673652 0.74132829 0.94034999 0.66550394 1.16302158 0.68062933\n",
" 0.67621618 1.05192408 0.93577483 0.67832871 0.84483248 0.69404551\n",
" 1.53855969]\n",
" [1.13924605 0.8834537 1.24858232 0.68218711 0.74324957 0.92721692\n",
" 0.80725571 1.01261206 0.89262673 0.83145245 0.95676322 1.05289823\n",
" 0.69093688 0.85540123 1.10791895 0.65036905 0.95625662 1.09999268\n",
" 0.68827518 0.61648041 0.91613008 1.07665834 0.77859659 0.89589489\n",
" 0.8634334 ]\n",
" [0.65745455 0.80215024 0.7392082 0.95836202 1.07041545 0.72317494\n",
" 0.78623641 0.74624506 0.7962408 0.84845508 0.68805863 0.67005629\n",
" 0.72831332 0.70083931 0.82121772 0.84230552 0.66828209 0.63080163\n",
" 1.21824859 1.03140545 0.71835222 0.70200315 0.66626471 0.71922809\n",
" 0.95468925]\n",
" [0.72459914 0.79974719 0.71309036 0.6514595 0.7044665 0.78131889\n",
" 0.71651015 0.79658169 0.84474558 0.79919044 0.74956753 0.82235703\n",
" 0.73147939 0.69559448 0.88491606 0.73277901 0.79832352 0.6427745\n",
" 0.63452474 0.7791277 0.75350713 0.63719132 0.89661358 0.81957116\n",
" 0.7558214 ]\n",
" [0.74764282 0.71764245 0.80932737 0.78948172 0.79773925 0.65235912\n",
" 0.71555191 0.71207991 0.81888121 0.67013132 0.80878859 0.78203076\n",
" 0.80086094 0.74111737 0.729344 0.83743921 0.8317783 0.78911041\n",
" 0.73506724 0.80197202 0.83564276 0.64197372 0.82392196 0.70835664\n",
" 0.79124558]\n",
" [0.6192943 0.76447613 0.74562272 0.81693046 0.96685607 0.77933322\n",
" 0.78707106 0.74232162 0.63330059 0.93563986 0.70171466 0.88411573\n",
" 0.6775884 0.86686346 0.72558653 0.7164088 0.97268528 0.6765899\n",
" 0.81242026 0.79376679 0.72214931 0.77784214 0.76303316 0.70253696\n",
" 0.71185104]]\n",
"X test [[0.72188201 0.84294267 0.77683419 0.95038401 0.75993562 0.7802179\n",
" 0.66251075 0.823229 0.72116528 0.69789107 0.89461667 0.92457791\n",
" 0.73145992 0.97224079 0.92285554 0.81959204 0.77832303 0.67393852\n",
" 0.77252999 0.8511185 0.719634 0.89311744 0.69634768 0.7904796\n",
" 0.70562698]\n",
" [0.72535567 0.65560033 0.94117306 0.64073553 0.67360546 0.75448764\n",
" 1.17542527 0.77022075 1.15575047 0.81220636 1.33320481 0.90742293\n",
" 0.72428474 0.73349717 0.8333343 1.00306665 0.74894603 0.69853774\n",
" 0.8332386 0.74361725 0.85759665 0.77392573 0.78816635 0.79244518\n",
" 1.23985637]\n",
" [0.63979681 0.81466649 1.04548043 0.70819415 0.67283206 0.66136784\n",
" 0.82582351 0.81133018 0.78281342 0.65088382 0.8291177 0.97817116\n",
" 0.80143616 0.8665057 0.97777469 0.87853807 0.79371411 0.74131466\n",
" 0.75753523 0.71023808 0.84652979 0.81549846 0.92952519 0.85711209\n",
" 0.77484846]]\n",
"y train [[0.19257583 0. 0.04167002 0.14801837 0.2672757 0.45881948\n",
" 0.23332313 0. 0.06963666 0.14922623 0. 0.\n",
" 0.44810594 0.39481803 0.15130212 0.36823327 0.06859676 0.20793435\n",
" 0.20602154 0.12183599 0.4580899 0.33625286 0. 0.84474261\n",
" 0.0385879 ]\n",
" [0.34912243 1. 0.02827657 0.38445027 0.31404524 0.1244478\n",
" 0.33568632 0.1305178 1. 0.54999735 0.10133485 0.11741381\n",
" 1. 0.16531511 0.5617192 0.04291181 1. 0.10619919\n",
" 0.07142322 1. 1. 0.09360747 0.70231758 0.\n",
" 1. ]\n",
" [1. 0.21664357 1. 0.13050418 0.17711382 1.\n",
" 0.28220093 1. 0.19745387 0.63411694 0.46260851 1.\n",
" 0.05581676 0.57765728 1. 0. 0.58207306 1.\n",
" 0.09208196 0. 0.90964718 1. 0.46950814 1.\n",
" 0.18945878]\n",
" [0.0733919 0.14430134 0.04877355 1. 1. 0.2576453\n",
" 0.2412208 0.1320732 0.12406452 0.6938264 0.07901764 0.02935904\n",
" 0.21210668 0.01895863 0.25012577 0.54419556 0. 0.\n",
" 1. 0.95287881 0. 0.14747826 0.07467809 0.12475926\n",
" 0.29901848]\n",
" [0.2025281 0.14216316 0. 0.03376285 0.07956682 0.46918724\n",
" 0.10527952 0.29608944 0.16099662 0.52081991 0.16682504 0.41549577\n",
" 0.22534562 0. 0.41673037 0.23365613 0.2628483 0.02551812\n",
" 0. 0.37352085 0.16168932 0. 0.88432051 0.62187781\n",
" 0.06026228]\n",
" [0.24684698 0.06910796 0.17971699 0.46830539 0.31416601 0.\n",
" 0.1034113 0.02074991 0.14130319 0.06759295 0.25136642 0.31325429\n",
" 0.51546524 0.16455268 0.00982777 0.53039816 0.3304693 0.33740794\n",
" 0.17224325 0.42598303 0.53945883 0.01088228 0.62882016 0.07090004\n",
" 0.10279173]\n",
" [0. 0.11077966 0.06075228 0.55472366 0.7395279 0.46196286\n",
" 0.24284808 0.11928911 0. 1. 0.09851238 0.57207611\n",
" 0. 0.61909008 0. 0.18724185 0.61527975 0.09758983\n",
" 0.30475971 0.40713964 0.01746411 0.32004863 0.41480499 0.04206827\n",
" 0.00747247]]\n",
"X test [[0.19730237 0.18059766 0.11903788 0.97488242 0.2190825 0.46518156\n",
" 0. 0.38291668 0.06690118 0.16507901 0.37389083 0.6746622\n",
" 0.22526422 1. 0.51596201 0.47979627 0.22242199 0.09193887\n",
" 0.23642215 0.53884833 0.00589533 0.5823557 0.18041531 0.47775274\n",
" 0. ]\n",
" [0.20398311 0.01390409 0.42593114 0. 0.00194526 0.37156856\n",
" 1. 0.21019529 0.39779927 0.56652895 1. 0.63116819\n",
" 0.19526114 0.13700777 0.28181698 1. 0.16304326 0.14436787\n",
" 0.34042444 0.29197082 0.64043219 0.31113691 0.50314444 0.4874906\n",
" 0.6413836 ]\n",
" [0.03943156 0.15543806 0.62071907 0.21238348 0. 0.03277592\n",
" 0.31840151 0.34414571 0.11384077 0. 0.28038737 0.81054021\n",
" 0.51787052 0.61779685 0.65960444 0.64692535 0.25353147 0.23553952\n",
" 0.21073405 0.21531527 0.58953195 0.40573497 1. 0.80786268\n",
" 0.08310572]]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "PO0KkwX1cDhI"
},
"source": [
"y_train = np.argmax(y_train, axis=1)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "CZjD769scHhn"
},
"source": [
"### Model Classifier"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ExLYF2jXcKvX"
},
"source": [
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.tree import DecisionTreeRegressor\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.svm import SVC\n",
"from sklearn.svm import SVR\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"\n",
"# Logistic Regression\n",
"clf = LogisticRegression(penalty='l2', C=0.1)\n",
"clf.fit(X_train, y_train)\n",
"y_predLR = clf.predict(X_test)\n",
"\n",
"# Decision Tree\n",
"Dt = DecisionTreeClassifier()\n",
"Dt.fit(X_train, y_train)\n",
"y_predDT = Dt.predict(X_test)\n",
"\n",
"# Decision Tree Regressor\n",
"Dtr = DecisionTreeRegressor()\n",
"Dtr.fit(X_train, y_train)\n",
"y_predDTR = Dtr.predict(X_test)\n",
"\n",
"# Gaussian Naive Bias\n",
"gnb = GaussianNB()\n",
"gnb.fit(X_train, y_train)\n",
"y_predGNB = gnb.predict(X_test)\n",
"\n",
"# random forest classifier\n",
"rfc = RandomForestClassifier()\n",
"rfc.fit(X_train, y_train)\n",
"y_predRFC = rfc.predict(X_test)\n",
"\n",
"# random forest regressor\n",
"rfr = RandomForestRegressor(n_estimators = 100)\n",
"rfr.fit(X_train,y_train)\n",
"y_predRFR = rfr.predict(X_test)\n",
"\n",
"# Support vector classifier\n",
"ppn = SVC(C=1, random_state = 0)\n",
"ppn.fit(X_train,y_train)\n",
"y_predSVC = ppn.predict(X_test)\n",
"\n",
"# support vector regression\n",
"svm = SVR(C = 2, kernel = 'rbf', degree = 2)\n",
"svm.fit(X_train, y_train)\n",
"y_predSVR = svm.predict(X_test)\n",
"\n",
"# k neearest neighbor classifier\n",
"Knn = KNeighborsClassifier(n_neighbors = 2, p =2, metric = 'minkowski')\n",
"Knn = Knn.fit(X_train,y_train)\n",
"y_predKNN = Knn.predict(X_test)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "4Yn41p0TcN-8",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 176
},
"outputId": "19c562ca-95bd-4031-e1a7-c174d5c8e387"
},
"source": [
"print(\"prediksi logistic regression\", y_predLR)\n",
"print(\"prediksi Decision Tree\", y_predDT)\n",
"print(\"prediksi Decision Tree Regression\", y_predDTR)\n",
"print(\"prediksi Gaussian Naive Bias\", y_predGNB)\n",
"print(\"prediksi random forest classifer\", y_predRFC)\n",
"print(\"prediksi random forest regressor\", y_predRFR)\n",
"print(\"prediksi support vector classifer\", y_predSVC)\n",
"print(\"prediksi support vector regression\", y_predSVR)\n",
"print(\"prediksi k nearest neighbor classifer\", y_predKNN)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"prediksi logistic regression [22 22 22]\n",
"prediksi Decision Tree [22 9 1]\n",
"prediksi Decision Tree Regression [23. 18. 22.]\n",
"prediksi Gaussian Naive Bias [22 22 22]\n",
"prediksi random forest classifer [22 22 23]\n",
"prediksi random forest regressor [17.94 15.85 15.72]\n",
"prediksi support vector classifer [22 22 22]\n",
"prediksi support vector regression [18.60724939 17.45024725 18.76016895]\n",
"prediksi k nearest neighbor classifer [ 9 22 22]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "zh_1we3ncTFj"
},
"source": [
"### Model Evaluation"
]
},
{
"cell_type": "code",
"metadata": {
"id": "10K9fzIZcUW8",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 410
},
"outputId": "e42bcf8e-1923-4b16-9989-f35dd94e0d51"
},
"source": [
"# Python script for confusion matrix creation. \n",
"from sklearn.metrics import confusion_matrix \n",
"from sklearn.metrics import accuracy_score \n",
"from sklearn.metrics import classification_report \n",
"\n",
"# actual = [3, 0, 1]\n",
"# actual = [2 , 0, 0]\n",
"actual = [0 , 0, 0]\n",
"\n",
"# predicted = y_predLR\n",
"# predicted = y_predDT\n",
"# predicted = y_predDTR\n",
"# predicted = y_predGNB\n",
"# predicted = y_predRFC\n",
"# predicted = y_predSVC\n",
"predicted = y_predKNN\n",
"results = confusion_matrix(actual, predicted) \n",
"\n",
"print ('Confusion Matrix :')\n",
"print(results,'\\n') \n",
"print ('Accuracy Score :',accuracy_score(actual, predicted)) \n",
"\n",
"print ('Report : ')\n",
"print (classification_report(actual, predicted)) \n",
"\n"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Confusion Matrix :\n",
"[[0 1 2]\n",
" [0 0 0]\n",
" [0 0 0]] \n",
"\n",
"Accuracy Score : 0.0\n",
"Report : \n",
" precision recall f1-score support\n",
"\n",
" 0 0.00 0.00 0.00 3.0\n",
" 9 0.00 0.00 0.00 0.0\n",
" 22 0.00 0.00 0.00 0.0\n",
"\n",
" accuracy 0.00 3.0\n",
" macro avg 0.00 0.00 0.00 3.0\n",
"weighted avg 0.00 0.00 0.00 3.0\n",
"\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n",
"/usr/local/lib/python3.6/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n"
],
"name": "stderr"
}
]
}
]
}