asyrofist/tracereq

View on GitHub
notebook/modul_traceability.ipynb

Summary

Maintainability
Test Coverage
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "modul_traceability.ipynb",
      "provenance": [],
      "collapsed_sections": [
        "XE2oNhk3bDQy",
        "xImrib3hVC5-",
        "P7DNbxIebJT3",
        "le3PaCqcelnk"
      ],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/asyrofist/Simple-Traceability-SRS-Document/blob/main/modul_traceability.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "VzavsoBC3f-3",
        "outputId": "73fc3001-db6b-4ea3-bbc7-4d4c7efc57db"
      },
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')\n",
        "%cd /content/drive"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Mounted at /content/drive\n",
            "/content/drive\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HobAntwcbqdQ"
      },
      "source": [
        "!pip install py-automl"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "XE2oNhk3bDQy"
      },
      "source": [
        "# Preprocessing"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "1zyZ9Pt_4GWG",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "b58f33e4-b3bd-4191-c4af-88377e6c1a41"
      },
      "source": [
        "import pandas as pd\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "from spacy.lang.en import English\n",
        "from tabulate import tabulate\n",
        "from sklearn.cluster import KMeans\n",
        "from sklearn.preprocessing import MinMaxScaler\n",
        "from pyAutoML.ml import ML,ml, EncodeCategorical\n",
        "from sklearn.ensemble import RandomForestClassifier\n",
        "from sklearn.tree import DecisionTreeClassifier\n",
        "from sklearn.neighbors import KNeighborsClassifier\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from sklearn.svm import SVC\n",
        "from sklearn.model_selection import train_test_split\n",
        "\n",
        "class prosesData:  \n",
        "  def __init__(self, namaFile = '/content/drive/MyDrive/dataset/reqDataset_v1/dataset_baru.xlsx'):\n",
        "      self.__dataFile = namaFile\n",
        "\n",
        "  def fulldataset(self, inputSRS= 'SRS1'):\n",
        "      xl = pd.ExcelFile(self.__dataFile)\n",
        "      dfs = {sh:xl.parse(sh) for sh in xl.sheet_names}\n",
        "      kalimat = dfs[inputSRS]\n",
        "      kalimat_semua = kalimat.head(len(kalimat))\n",
        "      return kalimat_semua\n",
        "\n",
        "  def preprocessing(self):\n",
        "      xl = pd.ExcelFile(self.__dataFile)\n",
        "      for sh in xl.sheet_names:\n",
        "        df = xl.parse(sh)\n",
        "        print('Processing: [{}] ...'.format(sh))\n",
        "        print(df.head())\n",
        "\n",
        "  def apply_cleaning_function_to_list(self, X):\n",
        "      cleaned_X = []\n",
        "      for element in X:\n",
        "          cleaned_X.append(prosesData.clean_text(self, raw_text= element))\n",
        "      return cleaned_X\n",
        "\n",
        "  def clean_text(self, raw_text):\n",
        "      nlp = English()\n",
        "      tokenizer = nlp.Defaults.create_tokenizer(nlp)\n",
        "      tokens = tokenizer(raw_text)\n",
        "      lemma_list = [token.lemma_.lower() for token in tokens if token.is_stop is False and token.is_punct is False and token.is_alpha is True]\n",
        "      joined_words = ( \" \".join(lemma_list))\n",
        "      return joined_words  \n",
        "\n",
        "  def __del__(self):\n",
        "      print ('Destructor called.')    \n",
        "\n",
        "\n",
        "\n",
        "class pengukuranEvaluasi:\n",
        "  def __init__(self, dataPertama, dataKedua):\n",
        "      self.data1 = dataPertama\n",
        "      self.data2 = dataKedua\n",
        "\n",
        "  def kmeans_cluster(self, nilai_cluster= 3):\n",
        "      XVSM = np.array(self.data1)\n",
        "      yVSM = np.array(self.data2)\n",
        "      kmeans = KMeans(n_clusters=nilai_cluster) # You want cluster the passenger records into 2: Survived or Not survived\n",
        "      kmeans.fit(XVSM)\n",
        "      correct = 0\n",
        "      for i in range(len(XVSM)):\n",
        "          predict_me = np.array(XVSM[i].astype(float))\n",
        "          predict_me = predict_me.reshape(-1, len(predict_me))\n",
        "          prediction = kmeans.predict(predict_me)\n",
        "          if prediction[0] == yVSM.all():\n",
        "              correct += 1\n",
        "      scaler = MinMaxScaler()\n",
        "      XVSM_scaled = scaler.fit_transform(yVSM)\n",
        "      print(\"data_correction {}\".format(correct/len(XVSM)))\n",
        "      return (XVSM_scaled)\n",
        "\n",
        "\n",
        "  def ukur_evaluasi(self):\n",
        "      X_train, X_test, y_train, y_test = train_test_split(pengukuranEvaluasi.kmeans_cluster(self), self.data2, test_size=0.3,random_state=109) # 70% training and 30% test\n",
        "      y_train = y_train.argmax(axis= 1)\n",
        "      X = X_train\n",
        "      Y = y_train\n",
        "      Y = EncodeCategorical(Y)\n",
        "      size = 0.4\n",
        "      return ML(X, Y, size, SVC(), RandomForestClassifier(), DecisionTreeClassifier(), KNeighborsClassifier(), LogisticRegression(max_iter = 7000))      \n",
        "\n",
        "  def __del__(self):\n",
        "      print ('Destructor called.')    \n",
        "\n",
        "\n",
        "if __name__ == \"__main__\":\n",
        "    myData = prosesData() # myData.preprocessing()\n",
        "    req = myData.fulldataset() # myData.fulldataset(inputSRS)\n",
        "    text_to_clean = list(req['Requirement Statement'])\n",
        "    cleaned_text = myData.apply_cleaning_function_to_list(text_to_clean)\n",
        "    data_raw = pd.DataFrame([text_to_clean, cleaned_text],index=['ORIGINAL','CLEANED'], columns= req['ID'])\n",
        "    print(tabulate(data_raw, headers = 'keys', tablefmt = 'psql'))  \n",
        "    myData.__del__()\n",
        "\n",
        "    # myUkur= pengukuranEvaluasi(index1, index2)\n",
        "    # myUkur.kmeans_cluster()\n",
        "    # myUkur.ukur_evaluasi()\n"
      ],
      "execution_count": 30,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "+----------+-------------------------------------------+---------------------------------------+-----------------------------------------------------+-----------------------------------------------+-------------------------------------------------------+-------------------------------------------------------------------+-----------------------------------------------+-----------------------------------+----------------------------------+--------------------------------------------------------+--------------------------------------------------------------------+----------------------------------------------------+------------------------------------------------------------------+\n",
            "|          | F01                                       | F02                                   | F03                                                 | F04                                           | F05                                                   | F06                                                               | F07                                           | NF01a                             | NF01b                            | NF01c                                                  | NF02                                                               | NF03                                               | NF04                                                             |\n",
            "|----------+-------------------------------------------+---------------------------------------+-----------------------------------------------------+-----------------------------------------------+-------------------------------------------------------+-------------------------------------------------------------------+-----------------------------------------------+-----------------------------------+----------------------------------+--------------------------------------------------------+--------------------------------------------------------------------+----------------------------------------------------+------------------------------------------------------------------|\n",
            "| ORIGINAL | Users can search for other user accounts. | Users can add other users as friends. | Users can confirm friend requests from other users. | The sender of the message can send a message. | The recipient of the message can receive the message. | The sender of the message can send a message drawn on the canvas. | Message senders can send text-based messages. | A user wants to create an account | The desired username is occupied | The user must be asked to choose a different username. | Reliability that the system gives the right results on the search. | The application must be connected to the Internet. | Applications must be portable with all types of Android devices. |\n",
            "| CLEANED  | users search user accounts                | users add users friends               | users confirm friend requests users                 | sender message send message                   | recipient message receive message                     | sender message send message drawn canvas                          | message senders send text based messages      | user wants create account         | desired username occupied        | user asked choose different username                   | reliability system gives right results search                      | application connected internet                     | applications portable types android devices                      |\n",
            "+----------+-------------------------------------------+---------------------------------------+-----------------------------------------------------+-----------------------------------------------+-------------------------------------------------------+-------------------------------------------------------------------+-----------------------------------------------+-----------------------------------+----------------------------------+--------------------------------------------------------+--------------------------------------------------------------------+----------------------------------------------------+------------------------------------------------------------------+\n",
            "Destructor called.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "xImrib3hVC5-"
      },
      "source": [
        "# Vector Space Model (VSM)"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zBBspiPE7NcW",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "2e91d6dd-c584-4974-eba0-712772f77715"
      },
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "import string #allows for format()\n",
        "import math\n",
        "from sklearn.feature_extraction.text import CountVectorizer\n",
        "from sklearn.metrics import pairwise_distances\n",
        "from sklearn.metrics.pairwise import pairwise_kernels\n",
        "from sklearn.cluster import KMeans\n",
        "from sklearn.preprocessing import MinMaxScaler\n",
        "\n",
        "\n",
        "class measurement:\n",
        "  def __init__(selft):\n",
        "      pass\n",
        "  \n",
        "  def bagOfWords(self, data_raw):\n",
        "      b = CountVectorizer(data_raw) # dilakukan vektorisasi\n",
        "      c = b.fit(data_raw) # dilakukan fiting \n",
        "      d = b.get_feature_names() # diambil namanya, sebagai kolom\n",
        "      e = b.transform(data_raw).toarray() #data \n",
        "      f = req.ID # diambil sebagai indeks\n",
        "      bow_df= pd.DataFrame(e, f, d) #data, indeks, kolom\n",
        "      return bow_df\n",
        "\n",
        "  def l2_normalizer(self, vec):\n",
        "      denom = np.sum([el**2 for el in vec])\n",
        "      return [(el / math.sqrt(denom)) for el in vec]\n",
        "\n",
        "  def build_lexicon(self, corpus):\n",
        "      lexicon = set()\n",
        "      for doc in corpus:\n",
        "          lexicon.update([word for word in doc.split()])\n",
        "      return lexicon\n",
        "\n",
        "  def freq(self, term, document):\n",
        "    return document.split().count(term)\n",
        "\n",
        "  def numDocsContaining(self, word, doclist):\n",
        "      doccount = 0\n",
        "      for doc in doclist:\n",
        "          if measurement.freq(self, term= word, document= doc) > 0:\n",
        "              doccount +=1\n",
        "      return doccount \n",
        "\n",
        "  def idf(self, word, doclist):\n",
        "      n_samples = len(doclist)\n",
        "      df = measurement.numDocsContaining(self, word, doclist)\n",
        "      return np.log(n_samples / 1+df)\n",
        "\n",
        "  def build_idf_matrix(self, idf_vector):\n",
        "      idf_mat = np.zeros((len(idf_vector), len(idf_vector)))\n",
        "      np.fill_diagonal(idf_mat, idf_vector)\n",
        "      return idf_mat\n",
        "\n",
        "  def cosine_measurement(self, data):\n",
        "      X = np.array(data)\n",
        "      Y = np.array(data)\n",
        "      cosine_similaritas = pairwise_kernels(X, Y, metric='linear')\n",
        "      frequency_cosine = pd.DataFrame(cosine_similaritas, index=req['ID'],columns=req['ID'])\n",
        "      return frequency_cosine    \n",
        "\n",
        "  def threshold_value(self, threshold, data):\n",
        "      dt = data.values >= threshold\n",
        "      dt1 = pd.DataFrame(dt, index= data.index, columns= data.columns)\n",
        "      mask = dt1.isin([True])\n",
        "      dt3 = dt1.where(mask, other= 0)\n",
        "      mask2 = dt3.isin([False])\n",
        "      th_cosine1 = dt3.where(mask2, other= 1)\n",
        "      return th_cosine1\n",
        "\n",
        "  def __del__(self):\n",
        "      print ('Destructor called.')    \n",
        "\n",
        "\n",
        "if __name__ == \"__main__\":\n",
        "\n",
        "      myVSMMeasurement = measurement()\n",
        "      bow = myVSMMeasurement.bagOfWords(data_raw= cleaned_text)\n",
        "      print(\"\\nBag of Words\")\n",
        "      print(tabulate(bow, headers = 'keys', tablefmt = 'psql'))   \n",
        "\n",
        "      mydoclist = cleaned_text\n",
        "      vocabulary = myVSMMeasurement.build_lexicon(cleaned_text)\n",
        "\n",
        "      # tfidf normal\n",
        "      my_idf_vector = [myVSMMeasurement.idf(word, mydoclist) for word in vocabulary] # vektor idf\n",
        "      my_idf_matrix = myVSMMeasurement.build_idf_matrix(my_idf_vector) # membuat matriks idf\n",
        "      doc_term_matrix_tfidf = [np.dot(tf_vector, my_idf_matrix) for tf_vector in bow.values] \n",
        "      frequency_TFIDF_normal = pd.DataFrame(doc_term_matrix_tfidf, index= req.ID, columns= vocabulary) #hasil freq_tfidf\n",
        "      print(\"\\nTFIDF\")\n",
        "      print(tabulate(frequency_TFIDF_normal, headers = 'keys', tablefmt = 'psql'))   \n",
        "\n",
        "      dt_cosine = myVSMMeasurement.cosine_measurement(frequency_TFIDF_normal.values)\n",
        "      print(\"\\ncosine + TFIDF with l2 normalizer\")\n",
        "      print(tabulate(dt_cosine, headers = 'keys', tablefmt = 'psql'))   \n",
        "\n",
        "      print(\"\\nthreshold normal\")\n",
        "      th_cosine = myVSMMeasurement.threshold_value(0.2, dt_cosine)   \n",
        "      print(tabulate(myVSMMeasurement.threshold_value(0.2, dt_cosine), headers = 'keys', tablefmt = 'psql'))   \n",
        "\n",
        "      # tfidf dengan l2 normalizer\n",
        "      doc_term_matrix_l2 = [myVSMMeasurement.l2_normalizer(vec) for vec in bow.values]\n",
        "      doc_term_matrix_tfidf_l2 = [myVSMMeasurement.l2_normalizer(tf_vector) for tf_vector in doc_term_matrix_tfidf]\n",
        "      frequency_TFIDF_l2 = pd.DataFrame(doc_term_matrix_tfidf_l2, index= req.ID, columns= vocabulary) #hasil freq_tfidf\n",
        "      print(\"\\nTFIDF with l2 normalizer\")\n",
        "      print(tabulate(frequency_TFIDF_l2, headers = 'keys', tablefmt = 'psql'))   \n",
        "\n",
        "      dt_cosine_l2 = myVSMMeasurement.cosine_measurement(frequency_TFIDF_l2.values)\n",
        "      print(\"\\ncosine + TFIDF with l2 normalizer\")\n",
        "      print(tabulate(dt_cosine_l2, headers = 'keys', tablefmt = 'psql'))   \n",
        "\n",
        "      print(\"\\nthreshold l2 normalizer\")\n",
        "      th_cosine_l2 = myVSMMeasurement.threshold_value(0.2, dt_cosine_l2)   \n",
        "      print(tabulate(th_cosine_l2, headers = 'keys', tablefmt = 'psql')) \n",
        "\n",
        "      myVSMMeasurement.__del__()\n",
        "\n",
        "      # myEvaluasi = pengukuranEvaluasi(dt_cosine_l2.values, th_cosine_l2.values) #ndengan l2 normalizer\n",
        "      myEvaluasi = pengukuranEvaluasi(dt_cosine.values, th_cosine.values) # normal\n",
        "      myEvaluasi.ukur_evaluasi()"
      ],
      "execution_count": 27,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Destructor called.\n",
            "\n",
            "Bag of Words\n",
            "+-------+-----------+------------+-------+-----------+---------------+----------------+---------+---------+----------+----------+-----------+-------------+----------+-----------+-----------+-------------+---------+----------+-----------+---------+------------+-----------+------------+------------+------------+-----------+-------------+---------------+------------+-----------+---------+----------+--------+----------+-----------+----------+--------+---------+--------+------------+---------+---------+\n",
            "| ID    |   account |   accounts |   add |   android |   application |   applications |   asked |   based |   canvas |   choose |   confirm |   connected |   create |   desired |   devices |   different |   drawn |   friend |   friends |   gives |   internet |   message |   messages |   occupied |   portable |   receive |   recipient |   reliability |   requests |   results |   right |   search |   send |   sender |   senders |   system |   text |   types |   user |   username |   users |   wants |\n",
            "|-------+-----------+------------+-------+-----------+---------------+----------------+---------+---------+----------+----------+-----------+-------------+----------+-----------+-----------+-------------+---------+----------+-----------+---------+------------+-----------+------------+------------+------------+-----------+-------------+---------------+------------+-----------+---------+----------+--------+----------+-----------+----------+--------+---------+--------+------------+---------+---------|\n",
            "| F01   |         0 |          1 |     0 |         0 |             0 |              0 |       0 |       0 |        0 |        0 |         0 |           0 |        0 |         0 |         0 |           0 |       0 |        0 |         0 |       0 |          0 |         0 |          0 |          0 |          0 |         0 |           0 |             0 |          0 |         0 |       0 |        1 |      0 |        0 |         0 |        0 |      0 |       0 |      1 |          0 |       1 |       0 |\n",
            "| F02   |         0 |          0 |     1 |         0 |             0 |              0 |       0 |       0 |        0 |        0 |         0 |           0 |        0 |         0 |         0 |           0 |       0 |        0 |         1 |       0 |          0 |         0 |          0 |          0 |          0 |         0 |           0 |             0 |          0 |         0 |       0 |        0 |      0 |        0 |         0 |        0 |      0 |       0 |      0 |          0 |       2 |       0 |\n",
            "| F03   |         0 |          0 |     0 |         0 |             0 |              0 |       0 |       0 |        0 |        0 |         1 |           0 |        0 |         0 |         0 |           0 |       0 |        1 |         0 |       0 |          0 |         0 |          0 |          0 |          0 |         0 |           0 |             0 |          1 |         0 |       0 |        0 |      0 |        0 |         0 |        0 |      0 |       0 |      0 |          0 |       2 |       0 |\n",
            "| F04   |         0 |          0 |     0 |         0 |             0 |              0 |       0 |       0 |        0 |        0 |         0 |           0 |        0 |         0 |         0 |           0 |       0 |        0 |         0 |       0 |          0 |         2 |          0 |          0 |          0 |         0 |           0 |             0 |          0 |         0 |       0 |        0 |      1 |        1 |         0 |        0 |      0 |       0 |      0 |          0 |       0 |       0 |\n",
            "| F05   |         0 |          0 |     0 |         0 |             0 |              0 |       0 |       0 |        0 |        0 |         0 |           0 |        0 |         0 |         0 |           0 |       0 |        0 |         0 |       0 |          0 |         2 |          0 |          0 |          0 |         1 |           1 |             0 |          0 |         0 |       0 |        0 |      0 |        0 |         0 |        0 |      0 |       0 |      0 |          0 |       0 |       0 |\n",
            "| F06   |         0 |          0 |     0 |         0 |             0 |              0 |       0 |       0 |        1 |        0 |         0 |           0 |        0 |         0 |         0 |           0 |       1 |        0 |         0 |       0 |          0 |         2 |          0 |          0 |          0 |         0 |           0 |             0 |          0 |         0 |       0 |        0 |      1 |        1 |         0 |        0 |      0 |       0 |      0 |          0 |       0 |       0 |\n",
            "| F07   |         0 |          0 |     0 |         0 |             0 |              0 |       0 |       1 |        0 |        0 |         0 |           0 |        0 |         0 |         0 |           0 |       0 |        0 |         0 |       0 |          0 |         1 |          1 |          0 |          0 |         0 |           0 |             0 |          0 |         0 |       0 |        0 |      1 |        0 |         1 |        0 |      1 |       0 |      0 |          0 |       0 |       0 |\n",
            "| NF01a |         1 |          0 |     0 |         0 |             0 |              0 |       0 |       0 |        0 |        0 |         0 |           0 |        1 |         0 |         0 |           0 |       0 |        0 |         0 |       0 |          0 |         0 |          0 |          0 |          0 |         0 |           0 |             0 |          0 |         0 |       0 |        0 |      0 |        0 |         0 |        0 |      0 |       0 |      1 |          0 |       0 |       1 |\n",
            "| NF01b |         0 |          0 |     0 |         0 |             0 |              0 |       0 |       0 |        0 |        0 |         0 |           0 |        0 |         1 |         0 |           0 |       0 |        0 |         0 |       0 |          0 |         0 |          0 |          1 |          0 |         0 |           0 |             0 |          0 |         0 |       0 |        0 |      0 |        0 |         0 |        0 |      0 |       0 |      0 |          1 |       0 |       0 |\n",
            "| NF01c |         0 |          0 |     0 |         0 |             0 |              0 |       1 |       0 |        0 |        1 |         0 |           0 |        0 |         0 |         0 |           1 |       0 |        0 |         0 |       0 |          0 |         0 |          0 |          0 |          0 |         0 |           0 |             0 |          0 |         0 |       0 |        0 |      0 |        0 |         0 |        0 |      0 |       0 |      1 |          1 |       0 |       0 |\n",
            "| NF02  |         0 |          0 |     0 |         0 |             0 |              0 |       0 |       0 |        0 |        0 |         0 |           0 |        0 |         0 |         0 |           0 |       0 |        0 |         0 |       1 |          0 |         0 |          0 |          0 |          0 |         0 |           0 |             1 |          0 |         1 |       1 |        1 |      0 |        0 |         0 |        1 |      0 |       0 |      0 |          0 |       0 |       0 |\n",
            "| NF03  |         0 |          0 |     0 |         0 |             1 |              0 |       0 |       0 |        0 |        0 |         0 |           1 |        0 |         0 |         0 |           0 |       0 |        0 |         0 |       0 |          1 |         0 |          0 |          0 |          0 |         0 |           0 |             0 |          0 |         0 |       0 |        0 |      0 |        0 |         0 |        0 |      0 |       0 |      0 |          0 |       0 |       0 |\n",
            "| NF04  |         0 |          0 |     0 |         1 |             0 |              1 |       0 |       0 |        0 |        0 |         0 |           0 |        0 |         0 |         1 |           0 |       0 |        0 |         0 |       0 |          0 |         0 |          0 |          0 |          1 |         0 |           0 |             0 |          0 |         0 |       0 |        0 |      0 |        0 |         0 |        0 |      0 |       1 |      0 |          0 |       0 |       0 |\n",
            "+-------+-----------+------------+-------+-----------+---------------+----------------+---------+---------+----------+----------+-----------+-------------+----------+-----------+-----------+-------------+---------+----------+-----------+---------+------------+-----------+------------+------------+------------+-----------+-------------+---------------+------------+-----------+---------+----------+--------+----------+-----------+----------+--------+---------+--------+------------+---------+---------+\n",
            "\n",
            "TFIDF\n",
            "+-------+------------+-----------+---------+----------+---------+----------+---------------+------------+---------+---------+----------+-----------+-----------+---------+-------------+----------+------------+-----------+---------+------------+-----------+------------+----------+----------+---------+---------+---------+-----------+----------+-----------+-------------+------------+-----------+---------+----------------+-----------+------------+-----------+---------------+---------+---------+-------------+\n",
            "| ID    |   portable |   results |   drawn |   canvas |   users |   friend |   reliability |   internet |    text |   gives |   search |   message |   confirm |   asked |   recipient |   choose |   requests |   receive |   right |   username |   account |   messages |   create |   system |    user |    send |   based |   android |   sender |   desired |   connected |   accounts |   friends |   types |   applications |   senders |   occupied |   devices |   application |   wants |     add |   different |\n",
            "|-------+------------+-----------+---------+----------+---------+----------+---------------+------------+---------+---------+----------+-----------+-----------+---------+-------------+----------+------------+-----------+---------+------------+-----------+------------+----------+----------+---------+---------+---------+-----------+----------+-----------+-------------+------------+-----------+---------+----------------+-----------+------------+-----------+---------------+---------+---------+-------------|\n",
            "| F01   |    0       |   2.63906 | 0       |  0       | 0       |  0       |       0       |    0       | 0       | 0       |  0       |   0       |   0       | 0       |     0       |  0       |    0       |   0       | 0       |    0       |   0       |    0       |  0       |  0       | 0       | 0       | 0       |   0       |  0       |   0       |     0       |    2.63906 |   0       | 0       |        0       |   0       |    0       |   0       |       2.63906 | 0       | 2.63906 |     0       |\n",
            "| F02   |    0       |   0       | 2.63906 |  0       | 0       |  0       |       0       |    0       | 0       | 0       |  0       |   0       |   0       | 0       |     0       |  0       |    0       |   0       | 2.63906 |    0       |   0       |    0       |  0       |  0       | 0       | 0       | 0       |   0       |  0       |   0       |     0       |    0       |   0       | 0       |        0       |   0       |    0       |   0       |       0       | 0       | 5.27811 |     0       |\n",
            "| F03   |    0       |   0       | 0       |  0       | 0       |  0       |       0       |    0       | 0       | 0       |  2.70805 |   0       |   0       | 0       |     0       |  0       |    0       |   2.63906 | 0       |    0       |   0       |    0       |  0       |  0       | 0       | 0       | 0       |   0       |  2.70805 |   0       |     0       |    0       |   0       | 0       |        0       |   0       |    0       |   0       |       0       | 0       | 5.27811 |     0       |\n",
            "| F04   |    0       |   0       | 0       |  0       | 0       |  0       |       0       |    0       | 0       | 0       |  0       |   0       |   0       | 0       |     0       |  0       |    0       |   0       | 0       |    0       |   0       |    5.27811 |  0       |  0       | 0       | 0       | 0       |   0       |  0       |   0       |     0       |    0       |   2.63906 | 2.63906 |        0       |   0       |    0       |   0       |       0       | 0       | 0       |     0       |\n",
            "| F05   |    0       |   0       | 0       |  0       | 0       |  0       |       0       |    0       | 0       | 0       |  0       |   0       |   0       | 0       |     0       |  0       |    0       |   0       | 0       |    0       |   0       |    5.27811 |  0       |  0       | 0       | 2.77259 | 2.63906 |   0       |  0       |   0       |     0       |    0       |   0       | 0       |        0       |   0       |    0       |   0       |       0       | 0       | 0       |     0       |\n",
            "| F06   |    0       |   0       | 0       |  0       | 0       |  0       |       0       |    0       | 2.63906 | 0       |  0       |   0       |   0       | 0       |     0       |  0       |    2.63906 |   0       | 0       |    0       |   0       |    5.27811 |  0       |  0       | 0       | 0       | 0       |   0       |  0       |   0       |     0       |    0       |   2.63906 | 2.63906 |        0       |   0       |    0       |   0       |       0       | 0       | 0       |     0       |\n",
            "| F07   |    0       |   0       | 0       |  0       | 0       |  0       |       0       |    2.63906 | 0       | 0       |  0       |   0       |   0       | 0       |     0       |  0       |    0       |   0       | 0       |    0       |   0       |    2.63906 |  2.63906 |  0       | 0       | 0       | 0       |   0       |  0       |   0       |     0       |    0       |   2.63906 | 0       |        2.63906 |   0       |    2.63906 |   0       |       0       | 0       | 0       |     0       |\n",
            "| NF01a |    2.63906 |   0       | 0       |  0       | 0       |  0       |       0       |    0       | 0       | 0       |  0       |   0       |   2.63906 | 0       |     0       |  0       |    0       |   0       | 0       |    0       |   0       |    0       |  0       |  0       | 0       | 0       | 0       |   0       |  0       |   0       |     0       |    0       |   0       | 0       |        0       |   0       |    0       |   0       |       2.63906 | 0       | 0       |     2.63906 |\n",
            "| NF01b |    0       |   0       | 0       |  0       | 0       |  0       |       0       |    0       | 0       | 0       |  0       |   0       |   0       | 2.63906 |     0       |  0       |    0       |   0       | 0       |    0       |   0       |    0       |  0       |  2.63906 | 0       | 0       | 0       |   0       |  0       |   0       |     0       |    0       |   0       | 0       |        0       |   0       |    0       |   0       |       0       | 2.63906 | 0       |     0       |\n",
            "| NF01c |    0       |   0       | 0       |  0       | 0       |  0       |       2.63906 |    0       | 0       | 2.63906 |  0       |   0       |   0       | 0       |     0       |  2.63906 |    0       |   0       | 0       |    0       |   0       |    0       |  0       |  0       | 0       | 0       | 0       |   0       |  0       |   0       |     0       |    0       |   0       | 0       |        0       |   0       |    0       |   0       |       2.63906 | 2.63906 | 0       |     0       |\n",
            "| NF02  |    0       |   0       | 0       |  0       | 0       |  0       |       0       |    0       | 0       | 0       |  0       |   0       |   0       | 0       |     0       |  0       |    0       |   0       | 0       |    2.70805 |   0       |    0       |  0       |  0       | 0       | 0       | 0       |   2.63906 |  0       |   2.63906 |     2.63906 |    2.63906 |   0       | 0       |        0       |   2.63906 |    0       |   0       |       0       | 0       | 0       |     0       |\n",
            "| NF03  |    0       |   0       | 0       |  0       | 2.77259 |  0       |       0       |    0       | 0       | 0       |  0       |   2.83321 |   0       | 0       |     0       |  0       |    0       |   0       | 0       |    0       |   2.63906 |    0       |  0       |  0       | 0       | 0       | 0       |   0       |  0       |   0       |     0       |    0       |   0       | 0       |        0       |   0       |    0       |   0       |       0       | 0       | 0       |     0       |\n",
            "| NF04  |    0       |   0       | 0       |  2.63906 | 0       |  2.63906 |       0       |    0       | 0       | 0       |  0       |   0       |   0       | 0       |     2.63906 |  0       |    0       |   0       | 0       |    0       |   0       |    0       |  0       |  0       | 2.77259 | 0       | 0       |   0       |  0       |   0       |     0       |    0       |   0       | 0       |        0       |   0       |    0       |   2.63906 |       0       | 0       | 0       |     0       |\n",
            "+-------+------------+-----------+---------+----------+---------+----------+---------------+------------+---------+---------+----------+-----------+-----------+---------+-------------+----------+------------+-----------+---------+------------+-----------+------------+----------+----------+---------+---------+---------+-----------+----------+-----------+-------------+------------+-----------+---------+----------------+-----------+------------+-----------+---------------+---------+---------+-------------+\n",
            "\n",
            "cosine + TFIDF with l2 normalizer\n",
            "+-------+----------+---------+---------+---------+---------+---------+---------+----------+----------+----------+----------+--------+---------+\n",
            "| ID    |      F01 |     F02 |     F03 |     F04 |     F05 |     F06 |     F07 |    NF01a |    NF01b |    NF01c |     NF02 |   NF03 |    NF04 |\n",
            "|-------+----------+---------+---------+---------+---------+---------+---------+----------+----------+----------+----------+--------+---------|\n",
            "| F01   | 27.8585  | 13.9292 | 13.9292 |  0      |  0      |  0      |  0      |  6.96462 |  0       |  6.96462 |  6.96462 |  0     |  0      |\n",
            "| F02   | 13.9292  | 41.7877 | 27.8585 |  0      |  0      |  0      |  0      |  0       |  0       |  0       |  0       |  0     |  0      |\n",
            "| F03   | 13.9292  | 27.8585 | 49.4902 |  0      |  0      |  0      |  0      |  0       |  0       |  0       |  0       |  0     |  0      |\n",
            "| F04   |  0       |  0      |  0      | 41.7877 | 27.8585 | 41.7877 | 20.8939 |  0       |  0       |  0       |  0       |  0     |  0      |\n",
            "| F05   |  0       |  0      |  0      | 27.8585 | 42.5104 | 27.8585 | 13.9292 |  0       |  0       |  0       |  0       |  0     |  0      |\n",
            "| F06   |  0       |  0      |  0      | 41.7877 | 27.8585 | 55.717  | 20.8939 |  0       |  0       |  0       |  0       |  0     |  0      |\n",
            "| F07   |  0       |  0      |  0      | 20.8939 | 13.9292 | 20.8939 | 41.7877 |  0       |  0       |  0       |  0       |  0     |  0      |\n",
            "| NF01a |  6.96462 |  0      |  0      |  0      |  0      |  0      |  0      | 27.8585  |  0       |  6.96462 |  0       |  0     |  0      |\n",
            "| NF01b |  0       |  0      |  0      |  0      |  0      |  0      |  0      |  0       | 20.8939  |  6.96462 |  0       |  0     |  0      |\n",
            "| NF01c |  6.96462 |  0      |  0      |  0      |  0      |  0      |  0      |  6.96462 |  6.96462 | 34.8231  |  0       |  0     |  0      |\n",
            "| NF02  |  6.96462 |  0      |  0      |  0      |  0      |  0      |  0      |  0       |  0       |  0       | 42.1567  |  0     |  0      |\n",
            "| NF03  |  0       |  0      |  0      |  0      |  0      |  0      |  0      |  0       |  0       |  0       |  0       | 22.679 |  0      |\n",
            "| NF04  |  0       |  0      |  0      |  0      |  0      |  0      |  0      |  0       |  0       |  0       |  0       |  0     | 35.5457 |\n",
            "+-------+----------+---------+---------+---------+---------+---------+---------+----------+----------+----------+----------+--------+---------+\n",
            "\n",
            "threshold normal\n",
            "+-------+-------+-------+-------+-------+-------+-------+-------+---------+---------+---------+--------+--------+--------+\n",
            "| ID    |   F01 |   F02 |   F03 |   F04 |   F05 |   F06 |   F07 |   NF01a |   NF01b |   NF01c |   NF02 |   NF03 |   NF04 |\n",
            "|-------+-------+-------+-------+-------+-------+-------+-------+---------+---------+---------+--------+--------+--------|\n",
            "| F01   |     1 |     1 |     1 |     0 |     0 |     0 |     0 |       1 |       0 |       1 |      1 |      0 |      0 |\n",
            "| F02   |     1 |     1 |     1 |     0 |     0 |     0 |     0 |       0 |       0 |       0 |      0 |      0 |      0 |\n",
            "| F03   |     1 |     1 |     1 |     0 |     0 |     0 |     0 |       0 |       0 |       0 |      0 |      0 |      0 |\n",
            "| F04   |     0 |     0 |     0 |     1 |     1 |     1 |     1 |       0 |       0 |       0 |      0 |      0 |      0 |\n",
            "| F05   |     0 |     0 |     0 |     1 |     1 |     1 |     1 |       0 |       0 |       0 |      0 |      0 |      0 |\n",
            "| F06   |     0 |     0 |     0 |     1 |     1 |     1 |     1 |       0 |       0 |       0 |      0 |      0 |      0 |\n",
            "| F07   |     0 |     0 |     0 |     1 |     1 |     1 |     1 |       0 |       0 |       0 |      0 |      0 |      0 |\n",
            "| NF01a |     1 |     0 |     0 |     0 |     0 |     0 |     0 |       1 |       0 |       1 |      0 |      0 |      0 |\n",
            "| NF01b |     0 |     0 |     0 |     0 |     0 |     0 |     0 |       0 |       1 |       1 |      0 |      0 |      0 |\n",
            "| NF01c |     1 |     0 |     0 |     0 |     0 |     0 |     0 |       1 |       1 |       1 |      0 |      0 |      0 |\n",
            "| NF02  |     1 |     0 |     0 |     0 |     0 |     0 |     0 |       0 |       0 |       0 |      1 |      0 |      0 |\n",
            "| NF03  |     0 |     0 |     0 |     0 |     0 |     0 |     0 |       0 |       0 |       0 |      0 |      1 |      0 |\n",
            "| NF04  |     0 |     0 |     0 |     0 |     0 |     0 |     0 |       0 |       0 |       0 |      0 |      0 |      1 |\n",
            "+-------+-------+-------+-------+-------+-------+-------+-------+---------+---------+---------+--------+--------+--------+\n",
            "\n",
            "TFIDF with l2 normalizer\n",
            "+-------+------------+-----------+----------+----------+----------+----------+---------------+------------+----------+----------+----------+-----------+-----------+---------+-------------+----------+------------+-----------+----------+------------+-----------+------------+----------+----------+----------+----------+----------+-----------+----------+-----------+-------------+------------+-----------+----------+----------------+-----------+------------+-----------+---------------+----------+----------+-------------+\n",
            "| ID    |   portable |   results |    drawn |   canvas |    users |   friend |   reliability |   internet |     text |    gives |   search |   message |   confirm |   asked |   recipient |   choose |   requests |   receive |    right |   username |   account |   messages |   create |   system |     user |     send |    based |   android |   sender |   desired |   connected |   accounts |   friends |    types |   applications |   senders |   occupied |   devices |   application |    wants |      add |   different |\n",
            "|-------+------------+-----------+----------+----------+----------+----------+---------------+------------+----------+----------+----------+-----------+-----------+---------+-------------+----------+------------+-----------+----------+------------+-----------+------------+----------+----------+----------+----------+----------+-----------+----------+-----------+-------------+------------+-----------+----------+----------------+-----------+------------+-----------+---------------+----------+----------+-------------|\n",
            "| F01   |        0   |       0.5 | 0        | 0        | 0        | 0        |      0        |   0        | 0        | 0        | 0        |  0        |       0   | 0       |    0        | 0        |   0        |  0        | 0        |   0        |  0        |   0        | 0        |  0       | 0        | 0        | 0        |  0        | 0        |  0        |    0        |   0.5      |  0        | 0        |       0        |  0        |   0        |  0        |      0.5      | 0        | 0.5      |         0   |\n",
            "| F02   |        0   |       0   | 0.408248 | 0        | 0        | 0        |      0        |   0        | 0        | 0        | 0        |  0        |       0   | 0       |    0        | 0        |   0        |  0        | 0.408248 |   0        |  0        |   0        | 0        |  0       | 0        | 0        | 0        |  0        | 0        |  0        |    0        |   0        |  0        | 0        |       0        |  0        |   0        |  0        |      0        | 0        | 0.816497 |         0   |\n",
            "| F03   |        0   |       0   | 0        | 0        | 0        | 0        |      0        |   0        | 0        | 0        | 0.384944 |  0        |       0   | 0       |    0        | 0        |   0        |  0.375136 | 0        |   0        |  0        |   0        | 0        |  0       | 0        | 0        | 0        |  0        | 0.384944 |  0        |    0        |   0        |  0        | 0        |       0        |  0        |   0        |  0        |      0        | 0        | 0.750273 |         0   |\n",
            "| F04   |        0   |       0   | 0        | 0        | 0        | 0        |      0        |   0        | 0        | 0        | 0        |  0        |       0   | 0       |    0        | 0        |   0        |  0        | 0        |   0        |  0        |   0.816497 | 0        |  0       | 0        | 0        | 0        |  0        | 0        |  0        |    0        |   0        |  0.408248 | 0.408248 |       0        |  0        |   0        |  0        |      0        | 0        | 0        |         0   |\n",
            "| F05   |        0   |       0   | 0        | 0        | 0        | 0        |      0        |   0        | 0        | 0        | 0        |  0        |       0   | 0       |    0        | 0        |   0        |  0        | 0        |   0        |  0        |   0.809527 | 0        |  0       | 0        | 0.425244 | 0.404764 |  0        | 0        |  0        |    0        |   0        |  0        | 0        |       0        |  0        |   0        |  0        |      0        | 0        | 0        |         0   |\n",
            "| F06   |        0   |       0   | 0        | 0        | 0        | 0        |      0        |   0        | 0.353553 | 0        | 0        |  0        |       0   | 0       |    0        | 0        |   0.353553 |  0        | 0        |   0        |  0        |   0.707107 | 0        |  0       | 0        | 0        | 0        |  0        | 0        |  0        |    0        |   0        |  0.353553 | 0.353553 |       0        |  0        |   0        |  0        |      0        | 0        | 0        |         0   |\n",
            "| F07   |        0   |       0   | 0        | 0        | 0        | 0        |      0        |   0.408248 | 0        | 0        | 0        |  0        |       0   | 0       |    0        | 0        |   0        |  0        | 0        |   0        |  0        |   0.408248 | 0.408248 |  0       | 0        | 0        | 0        |  0        | 0        |  0        |    0        |   0        |  0.408248 | 0        |       0.408248 |  0        |   0.408248 |  0        |      0        | 0        | 0        |         0   |\n",
            "| NF01a |        0.5 |       0   | 0        | 0        | 0        | 0        |      0        |   0        | 0        | 0        | 0        |  0        |       0.5 | 0       |    0        | 0        |   0        |  0        | 0        |   0        |  0        |   0        | 0        |  0       | 0        | 0        | 0        |  0        | 0        |  0        |    0        |   0        |  0        | 0        |       0        |  0        |   0        |  0        |      0.5      | 0        | 0        |         0.5 |\n",
            "| NF01b |        0   |       0   | 0        | 0        | 0        | 0        |      0        |   0        | 0        | 0        | 0        |  0        |       0   | 0.57735 |    0        | 0        |   0        |  0        | 0        |   0        |  0        |   0        | 0        |  0.57735 | 0        | 0        | 0        |  0        | 0        |  0        |    0        |   0        |  0        | 0        |       0        |  0        |   0        |  0        |      0        | 0.57735  | 0        |         0   |\n",
            "| NF01c |        0   |       0   | 0        | 0        | 0        | 0        |      0.447214 |   0        | 0        | 0.447214 | 0        |  0        |       0   | 0       |    0        | 0.447214 |   0        |  0        | 0        |   0        |  0        |   0        | 0        |  0       | 0        | 0        | 0        |  0        | 0        |  0        |    0        |   0        |  0        | 0        |       0        |  0        |   0        |  0        |      0.447214 | 0.447214 | 0        |         0   |\n",
            "| NF02  |        0   |       0   | 0        | 0        | 0        | 0        |      0        |   0        | 0        | 0        | 0        |  0        |       0   | 0       |    0        | 0        |   0        |  0        | 0        |   0.417084 |  0        |   0        | 0        |  0       | 0        | 0        | 0        |  0.406458 | 0        |  0.406458 |    0.406458 |   0.406458 |  0        | 0        |       0        |  0.406458 |   0        |  0        |      0        | 0        | 0        |         0   |\n",
            "| NF03  |        0   |       0   | 0        | 0        | 0.582202 | 0        |      0        |   0        | 0        | 0        | 0        |  0.594932 |       0   | 0       |    0        | 0        |   0        |  0        | 0        |   0        |  0.554163 |   0        | 0        |  0       | 0        | 0        | 0        |  0        | 0        |  0        |    0        |   0        |  0        | 0        |       0        |  0        |   0        |  0        |      0        | 0        | 0        |         0   |\n",
            "| NF04  |        0   |       0   | 0        | 0.442644 | 0        | 0.442644 |      0        |   0        | 0        | 0        | 0        |  0        |       0   | 0       |    0.442644 | 0        |   0        |  0        | 0        |   0        |  0        |   0        | 0        |  0       | 0.465041 | 0        | 0        |  0        | 0        |  0        |    0        |   0        |  0        | 0        |       0        |  0        |   0        |  0.442644 |      0        | 0        | 0        |         0   |\n",
            "+-------+------------+-----------+----------+----------+----------+----------+---------------+------------+----------+----------+----------+-----------+-----------+---------+-------------+----------+------------+-----------+----------+------------+-----------+------------+----------+----------+----------+----------+----------+-----------+----------+-----------+-------------+------------+-----------+----------+----------------+-----------+------------+-----------+---------------+----------+----------+-------------+\n",
            "\n",
            "cosine + TFIDF with l2 normalizer\n",
            "+-------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+--------+--------+\n",
            "| ID    |      F01 |      F02 |      F03 |      F04 |      F05 |      F06 |      F07 |    NF01a |    NF01b |    NF01c |     NF02 |   NF03 |   NF04 |\n",
            "|-------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+--------+--------|\n",
            "| F01   | 1        | 0.408248 | 0.375136 | 0        | 0        | 0        | 0        | 0.25     | 0        | 0.223607 | 0.203229 |      0 |      0 |\n",
            "| F02   | 0.408248 | 1        | 0.612595 | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 0        |      0 |      0 |\n",
            "| F03   | 0.375136 | 0.612595 | 1        | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 0        |      0 |      0 |\n",
            "| F04   | 0        | 0        | 0        | 1        | 0.660976 | 0.866025 | 0.5      | 0        | 0        | 0        | 0        |      0 |      0 |\n",
            "| F05   | 0        | 0        | 0        | 0.660976 | 1        | 0.572422 | 0.330488 | 0        | 0        | 0        | 0        |      0 |      0 |\n",
            "| F06   | 0        | 0        | 0        | 0.866025 | 0.572422 | 1        | 0.433013 | 0        | 0        | 0        | 0        |      0 |      0 |\n",
            "| F07   | 0        | 0        | 0        | 0.5      | 0.330488 | 0.433013 | 1        | 0        | 0        | 0        | 0        |      0 |      0 |\n",
            "| NF01a | 0.25     | 0        | 0        | 0        | 0        | 0        | 0        | 1        | 0        | 0.223607 | 0        |      0 |      0 |\n",
            "| NF01b | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 1        | 0.258199 | 0        |      0 |      0 |\n",
            "| NF01c | 0.223607 | 0        | 0        | 0        | 0        | 0        | 0        | 0.223607 | 0.258199 | 1        | 0        |      0 |      0 |\n",
            "| NF02  | 0.203229 | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 1        |      0 |      0 |\n",
            "| NF03  | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 0        |      1 |      0 |\n",
            "| NF04  | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 0        | 0        |      0 |      1 |\n",
            "+-------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+--------+--------+\n",
            "\n",
            "threshold l2 normalizer\n",
            "+-------+-------+-------+-------+-------+-------+-------+-------+---------+---------+---------+--------+--------+--------+\n",
            "| ID    |   F01 |   F02 |   F03 |   F04 |   F05 |   F06 |   F07 |   NF01a |   NF01b |   NF01c |   NF02 |   NF03 |   NF04 |\n",
            "|-------+-------+-------+-------+-------+-------+-------+-------+---------+---------+---------+--------+--------+--------|\n",
            "| F01   |     1 |     1 |     1 |     0 |     0 |     0 |     0 |       1 |       0 |       1 |      1 |      0 |      0 |\n",
            "| F02   |     1 |     1 |     1 |     0 |     0 |     0 |     0 |       0 |       0 |       0 |      0 |      0 |      0 |\n",
            "| F03   |     1 |     1 |     1 |     0 |     0 |     0 |     0 |       0 |       0 |       0 |      0 |      0 |      0 |\n",
            "| F04   |     0 |     0 |     0 |     1 |     1 |     1 |     1 |       0 |       0 |       0 |      0 |      0 |      0 |\n",
            "| F05   |     0 |     0 |     0 |     1 |     1 |     1 |     1 |       0 |       0 |       0 |      0 |      0 |      0 |\n",
            "| F06   |     0 |     0 |     0 |     1 |     1 |     1 |     1 |       0 |       0 |       0 |      0 |      0 |      0 |\n",
            "| F07   |     0 |     0 |     0 |     1 |     1 |     1 |     1 |       0 |       0 |       0 |      0 |      0 |      0 |\n",
            "| NF01a |     1 |     0 |     0 |     0 |     0 |     0 |     0 |       1 |       0 |       1 |      0 |      0 |      0 |\n",
            "| NF01b |     0 |     0 |     0 |     0 |     0 |     0 |     0 |       0 |       1 |       1 |      0 |      0 |      0 |\n",
            "| NF01c |     1 |     0 |     0 |     0 |     0 |     0 |     0 |       1 |       1 |       1 |      0 |      0 |      0 |\n",
            "| NF02  |     1 |     0 |     0 |     0 |     0 |     0 |     0 |       0 |       0 |       0 |      1 |      0 |      0 |\n",
            "| NF03  |     0 |     0 |     0 |     0 |     0 |     0 |     0 |       0 |       0 |       0 |      0 |      1 |      0 |\n",
            "| NF04  |     0 |     0 |     0 |     0 |     0 |     0 |     0 |       0 |       0 |       0 |      0 |      0 |      1 |\n",
            "+-------+-------+-------+-------+-------+-------+-------+-------+---------+---------+---------+--------+--------+--------+\n",
            "Destructor called.\n",
            "data_correction 0.5384615384615384\n",
            "____________________________________________________\n",
            ".....................Py-AutoML......................\n",
            "____________________________________________________\n",
            "SVC ______________________________ \n",
            "\n",
            "Accuracy Score for SVC is \n",
            "0.75\n",
            "\n",
            "\n",
            "Confusion Matrix for SVC is \n",
            "[[2 0 0 0]\n",
            " [0 1 0 0]\n",
            " [0 0 0 1]\n",
            " [0 0 0 0]]\n",
            "\n",
            "\n",
            "Classification Report for SVC is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       1.00      1.00      1.00         2\n",
            "           1       1.00      1.00      1.00         1\n",
            "           2       0.00      0.00      0.00         1\n",
            "           3       0.00      0.00      0.00         0\n",
            "\n",
            "    accuracy                           0.75         4\n",
            "   macro avg       0.50      0.50      0.50         4\n",
            "weighted avg       0.75      0.75      0.75         4\n",
            "\n",
            "\n",
            "\n",
            "____________________________________________________\n",
            "RandomForestClassifier ______________________________ \n",
            "\n",
            "Accuracy Score for RandomForestClassifier is \n",
            "0.25\n",
            "\n",
            "\n",
            "Confusion Matrix for RandomForestClassifier is \n",
            "[[0 0 0 2]\n",
            " [0 1 0 0]\n",
            " [0 0 0 1]\n",
            " [0 0 0 0]]\n",
            "\n",
            "\n",
            "Classification Report for RandomForestClassifier is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.00      0.00      0.00         2\n",
            "           1       1.00      1.00      1.00         1\n",
            "           2       0.00      0.00      0.00         1\n",
            "           3       0.00      0.00      0.00         0\n",
            "\n",
            "    accuracy                           0.25         4\n",
            "   macro avg       0.25      0.25      0.25         4\n",
            "weighted avg       0.25      0.25      0.25         4\n",
            "\n",
            "\n",
            "\n",
            "____________________________________________________\n",
            "DecisionTreeClassifier ______________________________ \n",
            "\n",
            "Accuracy Score for DecisionTreeClassifier is \n",
            "0.25\n",
            "\n",
            "\n",
            "Confusion Matrix for DecisionTreeClassifier is \n",
            "[[0 0 0 2]\n",
            " [0 1 0 0]\n",
            " [1 0 0 0]\n",
            " [0 0 0 0]]\n",
            "\n",
            "\n",
            "Classification Report for DecisionTreeClassifier is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.00      0.00      0.00         2\n",
            "           1       1.00      1.00      1.00         1\n",
            "           2       0.00      0.00      0.00         1\n",
            "           3       0.00      0.00      0.00         0\n",
            "\n",
            "    accuracy                           0.25         4\n",
            "   macro avg       0.25      0.25      0.25         4\n",
            "weighted avg       0.25      0.25      0.25         4\n",
            "\n",
            "\n",
            "\n",
            "____________________________________________________\n",
            "KNeighborsClassifier ______________________________ \n",
            "\n",
            "Accuracy Score for KNeighborsClassifier is \n",
            "0.25\n",
            "\n",
            "\n",
            "Confusion Matrix for KNeighborsClassifier is \n",
            "[[0 2 0]\n",
            " [0 1 0]\n",
            " [0 1 0]]\n",
            "\n",
            "\n",
            "Classification Report for KNeighborsClassifier is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.00      0.00      0.00         2\n",
            "           1       0.25      1.00      0.40         1\n",
            "           2       0.00      0.00      0.00         1\n",
            "\n",
            "    accuracy                           0.25         4\n",
            "   macro avg       0.08      0.33      0.13         4\n",
            "weighted avg       0.06      0.25      0.10         4\n",
            "\n",
            "\n",
            "\n",
            "____________________________________________________\n",
            "LogisticRegression ______________________________ \n",
            "\n",
            "Accuracy Score for LogisticRegression is \n",
            "0.75\n",
            "\n",
            "\n",
            "Confusion Matrix for LogisticRegression is \n",
            "[[2 0 0 0]\n",
            " [0 1 0 0]\n",
            " [0 0 0 1]\n",
            " [0 0 0 0]]\n",
            "\n",
            "\n",
            "Classification Report for LogisticRegression is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       1.00      1.00      1.00         2\n",
            "           1       1.00      1.00      1.00         1\n",
            "           2       0.00      0.00      0.00         1\n",
            "           3       0.00      0.00      0.00         0\n",
            "\n",
            "    accuracy                           0.75         4\n",
            "   macro avg       0.50      0.50      0.50         4\n",
            "weighted avg       0.75      0.75      0.75         4\n",
            "\n",
            "\n",
            "\n",
            "                    Model Accuracy\n",
            "0                     SVC     0.75\n",
            "1  RandomForestClassifier     0.25\n",
            "2  DecisionTreeClassifier     0.25\n",
            "3    KNeighborsClassifier     0.25\n",
            "4      LogisticRegression     0.75\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "P7DNbxIebJT3"
      },
      "source": [
        "# Latent Semantic Analysis (LSA)"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "M4bwx745rVpv",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "ea449e4c-c2be-4c79-f819-bd4e3d58acd9"
      },
      "source": [
        "import pandas as pd\n",
        "from sklearn.feature_extraction.text import TfidfVectorizer\n",
        "from sklearn.decomposition import TruncatedSVD\n",
        "from tabulate import tabulate\n",
        "\n",
        "\n",
        "class latentSemantic:\n",
        "  def __init__(self, data_raw= cleaned_text):\n",
        "      self.__data = data_raw\n",
        "\n",
        "  def ukurLSA(self):\n",
        "      vectorizer = TfidfVectorizer(stop_words='english', \n",
        "                                    max_features= 1000, # keep top 1000 terms \n",
        "                                    max_df = 0.5, \n",
        "                                    smooth_idf=True)\n",
        "      X = vectorizer.fit_transform(self.__data)\n",
        "      svd_model = TruncatedSVD(n_components=len(self.__data), algorithm='randomized', n_iter=100, random_state=122)\n",
        "      svd_model.fit(X)\n",
        "      terms = vectorizer.get_feature_names()\n",
        "      return pd.DataFrame(svd_model.components_, index= req.ID, columns= terms)\n",
        "\n",
        "  def urutLSA(self):\n",
        "      hasil_LSA = []\n",
        "      for i, comp in enumerate(data_svd.components_):\n",
        "          terms_comp = zip(data_terms, comp)\n",
        "          sorted_terms = sorted(terms_comp, key= lambda x:x[1], reverse=True)[:7]\n",
        "          for t in sorted_terms:\n",
        "              hasil_LSA.append(t)\n",
        "      return hasil_LSA\n",
        "\n",
        "  def threshold_value(self, threshold, data):\n",
        "        dt = data.values >= threshold\n",
        "        dt1 = pd.DataFrame(dt, index= data.index, columns= data.columns)\n",
        "        mask = dt1.isin([True])\n",
        "        dt2 = dt1.where(mask, other= 0)\n",
        "        mask2 = dt2.isin([False])\n",
        "        tbl_5 = dt2.where(mask2, other= 1)\n",
        "        return tbl_5\n",
        "\n",
        "  def __del__(self):\n",
        "      print ('Destructor called.')    \n",
        "\n",
        "if __name__ == \"__main__\":\n",
        "      myLSA = latentSemantic()\n",
        "      dt_lsa = myLSA.ukurLSA()\n",
        "      print(tabulate(dt_lsa, headers = 'keys', tablefmt = 'psql'))\n",
        "      th_lsa = myLSA.threshold_value(0.2, dt_lsa)\n",
        "      print(tabulate(dt_lsa, headers = 'keys', tablefmt = 'psql'))\n",
        "\n",
        "      myEvaluasi = pengukuranEvaluasi(dataPertama= dt_lsa.values, dataKedua= th_lsa.values)\n",
        "      myEvaluasi.ukur_evaluasi()"
      ],
      "execution_count": 26,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Destructor called.\n",
            "+-------+--------------+--------------+--------------+--------------+---------------+----------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+---------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+\n",
            "| ID    |      account |     accounts |          add |      android |   application |   applications |        asked |        based |       canvas |       choose |      confirm |    connected |       create |      desired |      devices |    different |        drawn |       friend |      friends |        gives |     internet |      message |     messages |     occupied |     portable |      receive |    recipient |   reliability |     requests |      results |        right |       search |         send |       sender |      senders |         text |        types |         user |     username |        users |        wants |\n",
            "|-------+--------------+--------------+--------------+--------------+---------------+----------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+---------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------|\n",
            "| F01   |  2.00885e-16 |  5.60828e-17 |  2.4261e-17  |  9.8226e-18  |  -1.84581e-16 |    9.8226e-18  | -5.15879e-17 |  0.102753    |  0.1581      | -5.15879e-17 |  2.05095e-16 | -1.84581e-16 |  6.56653e-17 | -4.24404e-17 |  9.8226e-18  | -5.15879e-17 |  0.1581      |  2.05095e-16 |  2.4261e-17  |  8.94908e-19 | -1.84581e-16 |  0.791744    |  0.102753    | -4.24404e-17 |  9.8226e-18  |  0.15029     |  0.15029     |   8.94908e-19 |  2.05095e-16 |  8.94908e-19 |  8.94908e-19 |  4.91355e-17 |  0.363756    |  0.321598    |  0.102753    |  0.102753    |  9.8226e-18  |  5.36521e-17 | -8.10866e-17 |  3.93668e-16 |  6.56653e-17 |\n",
            "| F02   |  0.0609018   |  0.239677    |  0.204097    |  3.27623e-18 |  -2.89635e-18 |    3.27623e-18 |  0.0554963   |  2.52379e-17 | -2.18417e-17 |  0.0554963   |  0.1795      | -2.89635e-18 |  0.0609018   |  0.0190255   |  3.27623e-18 |  0.0554963   | -2.18417e-17 |  0.1795      |  0.204097    |  0.0475243   | -2.89635e-18 | -1.14979e-16 |  2.52379e-17 |  0.0190255   |  3.27623e-18 | -4.70059e-17 | -4.70059e-17 |   0.0475243   |  0.1795      |  0.0475243   |  0.0475243   |  0.247672    | -1.52958e-17 | -3.73654e-17 |  2.54104e-17 |  2.54104e-17 |  3.27623e-18 |  0.272294    |  0.0642649   |  0.769963    |  0.0609018   |\n",
            "| F03   |  0.197236    |  0.0804866   | -0.106291    | -3.08414e-17 |   3.75325e-17 |   -3.08414e-17 |  0.275462    | -2.71594e-17 |  4.11233e-17 |  0.275462    | -0.0982899   |  3.75325e-17 |  0.197236    |  0.253617    | -3.08414e-17 |  0.275462    |  4.11233e-17 | -0.0982899   | -0.106291    |  0.0428607   |  3.75325e-17 |  7.1002e-17  | -2.71594e-17 |  0.253617    | -3.08414e-17 |  6.59632e-18 |  6.59632e-18 |   0.0428607   | -0.0982899   |  0.0428607   |  0.0428607   |  0.10637     |  4.33835e-17 |  5.7521e-17  | -2.67029e-17 | -2.67029e-17 | -3.08414e-17 |  0.423025    |  0.456258    | -0.251341    |  0.197236    |\n",
            "| F04   |  0.146742    |  0.158913    | -0.0971113   | -1.03401e-15 |   1.48381e-16 |   -9.45179e-16 | -0.0850844   | -8.94684e-17 |  1.25162e-17 | -0.0850844   | -0.0942416   |  1.4857e-16  |  0.146742    | -0.280994    | -9.45215e-16 | -0.0850844   |  1.25162e-17 | -0.0942416   | -0.0971113   |  0.303546    |  1.4857e-16  |  7.15787e-17 | -8.94684e-17 | -0.280994    | -9.45215e-16 |  1.06388e-16 |  1.06388e-16 |   0.303546    | -0.0942416   |  0.303546    |  0.303546    |  0.398808    | -9.86985e-17 | -2.49089e-17 | -8.56853e-17 | -8.56853e-17 | -9.45201e-16 |  0.168672    | -0.315693    | -0.171136    |  0.146742    |\n",
            "| F05   |  0.113707    |  3.52765e-16 | -3.63989e-16 |  0.41773     |  -0.0842636   |    0.41773     | -1.62033e-16 |  5.8817e-16  | -3.9763e-17  | -1.4626e-16  | -7.61341e-17 | -0.0842636   |  0.113707    | -0.0894126   |  0.41773     | -1.4626e-16  | -3.9763e-17  | -7.61341e-17 | -3.67473e-16 | -0.0894126   | -0.0842636   | -4.60728e-16 |  5.8817e-16  | -0.0894126   |  0.41773     | -3.95933e-16 | -3.95933e-16 |  -0.0894126   | -7.61341e-17 | -0.0894126   | -0.0894126   | -0.0771062   |  2.99674e-16 | -2.02228e-16 |  5.93852e-16 |  5.93852e-16 |  0.41773     |  0.0869528   | -0.0771062   | -4.29868e-16 |  0.113707    |\n",
            "| F06   |  0.130274    | -3.69283e-17 |  5.0342e-17  |  0.00655726  |   0.535529    |    0.00655726  | -5.03339e-17 |  3.27324e-16 |  6.34827e-18 |  5.85005e-17 | -5.63092e-17 |  0.535529    |  0.130274    | -0.10244     |  0.00655726  |  5.85005e-17 |  6.34827e-18 | -5.63092e-17 |  6.9317e-17  | -0.10244     |  0.535529    | -4.53837e-16 |  3.27324e-16 | -0.10244     |  0.00655726  | -3.7225e-16  | -3.7225e-16  |  -0.10244     | -5.63092e-17 | -0.10244     | -0.10244     | -0.0883403   |  1.78579e-16 | -1.12022e-16 |  3.29169e-16 |  3.29169e-16 |  0.00655726  |  0.0996215   | -0.0883403   |  2.84466e-17 |  0.130274    |\n",
            "| F07   |  0.303045    | -6.74308e-17 |  3.93027e-18 | -0.159558    |  -0.198598    |   -0.159558    |  3.58169e-16 | -5.96085e-15 | -1.11564e-15 |  2.98955e-16 | -3.21962e-18 | -0.198598    |  0.303045    | -0.238297    | -0.159558    |  2.98955e-16 | -1.11564e-15 | -3.21962e-18 | -4.84607e-17 | -0.238297    | -0.198598    |  4.11569e-15 | -5.96085e-15 | -0.238297    | -0.159558    |  7.49481e-15 |  7.49481e-15 |  -0.238297    | -3.21962e-18 | -0.238297    | -0.238297    | -0.205498    | -5.71018e-15 | -1.34031e-15 | -5.95848e-15 | -5.95848e-15 | -0.159558    |  0.231741    | -0.205498    |  2.89088e-17 |  0.303045    |\n",
            "| NF01a |  4.66199e-15 | -1.52501e-16 |  5.29251e-17 | -3.05492e-15 |  -3.36948e-15 |   -3.05731e-15 | -1.19609e-16 |  0.447281    | -0.0776422   | -1.09087e-16 | -4.78617e-17 | -3.36721e-15 |  4.61862e-15 | -3.39714e-15 | -3.05608e-15 | -1.09087e-16 | -0.0776422   | -4.78617e-17 |  5.85035e-17 | -3.33685e-15 | -3.36721e-15 | -0.17793     |  0.447281    | -3.39714e-15 | -3.05608e-15 | -0.208428    | -0.208428    |  -3.33685e-15 | -4.78617e-17 | -3.33685e-15 | -3.33685e-15 | -3.01052e-15 |  0.23166     | -0.124474    |  0.447281    |  0.447281    | -3.05614e-15 |  3.33011e-15 | -3.02398e-15 | -1.01859e-16 |  4.61827e-15 |\n",
            "| NF01b | -0.291632    |  0.0917473   | -0.026401    |  1.53894e-16 |   5.57643e-18 |   -1.11842e-17 |  0.365666    |  1.06531e-16 |  1.74568e-16 |  0.365666    | -0.0333431   |  1.35792e-17 | -0.291632    | -0.387374    |  6.33708e-17 |  0.365666    |  6.35457e-17 | -0.0333431   | -0.026401    | -0.0562156   |  1.35792e-17 | -2.52491e-16 |  1.13579e-16 | -0.387374    |  6.33708e-17 | -4.1977e-16  | -4.1977e-16  |  -0.0562156   | -0.0333431   | -0.0562156   | -0.0562156   |  0.0306413   |  2.40825e-16 |  2.31955e-16 |  1.07616e-16 |  1.07616e-16 |  1.18882e-16 |  0.126774    | -0.0187201   | -0.0212136   | -0.291632    |\n",
            "| NF01c | -2.56746e-15 |  3.79219e-16 | -4.6989e-16  |  1.40145e-15 |   1.68501e-15 |    1.38115e-15 |  1.45406e-16 |  0.115673    | -0.28878     |  1.87815e-16 |  2.55475e-16 |  1.69187e-15 | -2.72483e-15 |  1.76741e-15 |  1.39424e-15 |  1.46206e-16 | -0.28878     |  2.55426e-16 | -4.86731e-16 |  1.72209e-15 |  1.69187e-15 |  0.146038    |  0.115673    |  1.76741e-15 |  1.39424e-15 |  0.513009    |  0.513009    |   1.72209e-15 |  2.55426e-16 |  1.72209e-15 |  1.72209e-15 |  1.81719e-15 | -0.267027    | -0.400879    |  0.115673    |  0.115673    |  1.39951e-15 | -1.6758e-15  |  1.65251e-15 | -5.81738e-17 | -2.71948e-15 |\n",
            "| NF02  | -0.148338    |  0.563382    | -0.0452419   | -1.08338e-16 |   2.04911e-18 |    5.43939e-18 | -0.181098    |  6.53214e-17 |  7.41281e-17 | -0.181098    | -0.246409    |  9.73544e-18 | -0.148338    |  0.116761    |  4.84284e-18 | -0.181098    |  6.02774e-17 | -0.246409    | -0.0452419   | -0.21009     |  9.73547e-18 | -8.84741e-17 |  5.83283e-17 |  0.116761    |  4.84284e-18 | -1.71504e-16 | -1.71504e-16 |  -0.21009     | -0.246409    | -0.21009     | -0.21009     |  0.304666    |  1.1869e-16  |  6.89995e-17 |  5.49062e-17 |  5.49062e-17 | -1.59762e-17 |  0.178901    | -0.0554816   | -0.0152327   | -0.148338    |\n",
            "| NF03  |  0.0416162   | -0.168519    |  0.504008    | -6.79353e-17 |  -4.29123e-17 |    6.67572e-17 |  0.0432983   |  4.20169e-17 | -1.77913e-16 |  0.0432983   | -0.374653    | -2.9563e-17  |  0.0416162   | -0.0239098   |  8.58624e-17 |  0.0432983   | -1.60565e-16 | -0.374653    |  0.504008    |  0.0538235   | -1.80744e-18 |  1.52542e-16 |  3.85474e-17 | -0.0239098   |  8.58624e-17 |  5.19403e-16 |  5.19403e-16 |   0.0538235   | -0.374653    |  0.0538235   |  0.0538235   | -0.0989092   | -3.0594e-16  | -3.83006e-16 |  3.84155e-17 |  3.84155e-17 |  8.58625e-17 | -0.0639331   |  0.0167199   |  0.0689703   |  0.0416162   |\n",
            "| NF04  | -2.00123e-16 | -5.83785e-17 |  2.55266e-19 |  1.53135e-16 |  -5.775e-18   |    1.44633e-16 |  5.53927e-17 | -0.0835597   | -0.620952    | -2.26768e-16 | -1.09315e-17 | -7.11338e-18 |  9.92342e-17 | -8.84251e-17 |  1.4081e-16  |  5.0788e-17  | -0.620952    | -5.25648e-17 |  9.25198e-17 | -2.93988e-17 | -3.4869e-17  |  0.155917    | -0.0835597   | -8.84251e-17 |  1.4081e-16  | -0.174254    | -0.174254    |  -2.93988e-17 | -5.25648e-17 | -2.93988e-17 | -2.93988e-17 | -9.10363e-17 |  0.187833    |  0.283879    | -0.0835597   | -0.0835597   |  1.33893e-16 |  1.64622e-16 | -1.9765e-17  | -6.04224e-18 |  2.80325e-16 |\n",
            "+-------+--------------+--------------+--------------+--------------+---------------+----------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+---------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+\n",
            "+-------+--------------+--------------+--------------+--------------+---------------+----------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+---------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+\n",
            "| ID    |      account |     accounts |          add |      android |   application |   applications |        asked |        based |       canvas |       choose |      confirm |    connected |       create |      desired |      devices |    different |        drawn |       friend |      friends |        gives |     internet |      message |     messages |     occupied |     portable |      receive |    recipient |   reliability |     requests |      results |        right |       search |         send |       sender |      senders |         text |        types |         user |     username |        users |        wants |\n",
            "|-------+--------------+--------------+--------------+--------------+---------------+----------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+---------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------|\n",
            "| F01   |  2.00885e-16 |  5.60828e-17 |  2.4261e-17  |  9.8226e-18  |  -1.84581e-16 |    9.8226e-18  | -5.15879e-17 |  0.102753    |  0.1581      | -5.15879e-17 |  2.05095e-16 | -1.84581e-16 |  6.56653e-17 | -4.24404e-17 |  9.8226e-18  | -5.15879e-17 |  0.1581      |  2.05095e-16 |  2.4261e-17  |  8.94908e-19 | -1.84581e-16 |  0.791744    |  0.102753    | -4.24404e-17 |  9.8226e-18  |  0.15029     |  0.15029     |   8.94908e-19 |  2.05095e-16 |  8.94908e-19 |  8.94908e-19 |  4.91355e-17 |  0.363756    |  0.321598    |  0.102753    |  0.102753    |  9.8226e-18  |  5.36521e-17 | -8.10866e-17 |  3.93668e-16 |  6.56653e-17 |\n",
            "| F02   |  0.0609018   |  0.239677    |  0.204097    |  3.27623e-18 |  -2.89635e-18 |    3.27623e-18 |  0.0554963   |  2.52379e-17 | -2.18417e-17 |  0.0554963   |  0.1795      | -2.89635e-18 |  0.0609018   |  0.0190255   |  3.27623e-18 |  0.0554963   | -2.18417e-17 |  0.1795      |  0.204097    |  0.0475243   | -2.89635e-18 | -1.14979e-16 |  2.52379e-17 |  0.0190255   |  3.27623e-18 | -4.70059e-17 | -4.70059e-17 |   0.0475243   |  0.1795      |  0.0475243   |  0.0475243   |  0.247672    | -1.52958e-17 | -3.73654e-17 |  2.54104e-17 |  2.54104e-17 |  3.27623e-18 |  0.272294    |  0.0642649   |  0.769963    |  0.0609018   |\n",
            "| F03   |  0.197236    |  0.0804866   | -0.106291    | -3.08414e-17 |   3.75325e-17 |   -3.08414e-17 |  0.275462    | -2.71594e-17 |  4.11233e-17 |  0.275462    | -0.0982899   |  3.75325e-17 |  0.197236    |  0.253617    | -3.08414e-17 |  0.275462    |  4.11233e-17 | -0.0982899   | -0.106291    |  0.0428607   |  3.75325e-17 |  7.1002e-17  | -2.71594e-17 |  0.253617    | -3.08414e-17 |  6.59632e-18 |  6.59632e-18 |   0.0428607   | -0.0982899   |  0.0428607   |  0.0428607   |  0.10637     |  4.33835e-17 |  5.7521e-17  | -2.67029e-17 | -2.67029e-17 | -3.08414e-17 |  0.423025    |  0.456258    | -0.251341    |  0.197236    |\n",
            "| F04   |  0.146742    |  0.158913    | -0.0971113   | -1.03401e-15 |   1.48381e-16 |   -9.45179e-16 | -0.0850844   | -8.94684e-17 |  1.25162e-17 | -0.0850844   | -0.0942416   |  1.4857e-16  |  0.146742    | -0.280994    | -9.45215e-16 | -0.0850844   |  1.25162e-17 | -0.0942416   | -0.0971113   |  0.303546    |  1.4857e-16  |  7.15787e-17 | -8.94684e-17 | -0.280994    | -9.45215e-16 |  1.06388e-16 |  1.06388e-16 |   0.303546    | -0.0942416   |  0.303546    |  0.303546    |  0.398808    | -9.86985e-17 | -2.49089e-17 | -8.56853e-17 | -8.56853e-17 | -9.45201e-16 |  0.168672    | -0.315693    | -0.171136    |  0.146742    |\n",
            "| F05   |  0.113707    |  3.52765e-16 | -3.63989e-16 |  0.41773     |  -0.0842636   |    0.41773     | -1.62033e-16 |  5.8817e-16  | -3.9763e-17  | -1.4626e-16  | -7.61341e-17 | -0.0842636   |  0.113707    | -0.0894126   |  0.41773     | -1.4626e-16  | -3.9763e-17  | -7.61341e-17 | -3.67473e-16 | -0.0894126   | -0.0842636   | -4.60728e-16 |  5.8817e-16  | -0.0894126   |  0.41773     | -3.95933e-16 | -3.95933e-16 |  -0.0894126   | -7.61341e-17 | -0.0894126   | -0.0894126   | -0.0771062   |  2.99674e-16 | -2.02228e-16 |  5.93852e-16 |  5.93852e-16 |  0.41773     |  0.0869528   | -0.0771062   | -4.29868e-16 |  0.113707    |\n",
            "| F06   |  0.130274    | -3.69283e-17 |  5.0342e-17  |  0.00655726  |   0.535529    |    0.00655726  | -5.03339e-17 |  3.27324e-16 |  6.34827e-18 |  5.85005e-17 | -5.63092e-17 |  0.535529    |  0.130274    | -0.10244     |  0.00655726  |  5.85005e-17 |  6.34827e-18 | -5.63092e-17 |  6.9317e-17  | -0.10244     |  0.535529    | -4.53837e-16 |  3.27324e-16 | -0.10244     |  0.00655726  | -3.7225e-16  | -3.7225e-16  |  -0.10244     | -5.63092e-17 | -0.10244     | -0.10244     | -0.0883403   |  1.78579e-16 | -1.12022e-16 |  3.29169e-16 |  3.29169e-16 |  0.00655726  |  0.0996215   | -0.0883403   |  2.84466e-17 |  0.130274    |\n",
            "| F07   |  0.303045    | -6.74308e-17 |  3.93027e-18 | -0.159558    |  -0.198598    |   -0.159558    |  3.58169e-16 | -5.96085e-15 | -1.11564e-15 |  2.98955e-16 | -3.21962e-18 | -0.198598    |  0.303045    | -0.238297    | -0.159558    |  2.98955e-16 | -1.11564e-15 | -3.21962e-18 | -4.84607e-17 | -0.238297    | -0.198598    |  4.11569e-15 | -5.96085e-15 | -0.238297    | -0.159558    |  7.49481e-15 |  7.49481e-15 |  -0.238297    | -3.21962e-18 | -0.238297    | -0.238297    | -0.205498    | -5.71018e-15 | -1.34031e-15 | -5.95848e-15 | -5.95848e-15 | -0.159558    |  0.231741    | -0.205498    |  2.89088e-17 |  0.303045    |\n",
            "| NF01a |  4.66199e-15 | -1.52501e-16 |  5.29251e-17 | -3.05492e-15 |  -3.36948e-15 |   -3.05731e-15 | -1.19609e-16 |  0.447281    | -0.0776422   | -1.09087e-16 | -4.78617e-17 | -3.36721e-15 |  4.61862e-15 | -3.39714e-15 | -3.05608e-15 | -1.09087e-16 | -0.0776422   | -4.78617e-17 |  5.85035e-17 | -3.33685e-15 | -3.36721e-15 | -0.17793     |  0.447281    | -3.39714e-15 | -3.05608e-15 | -0.208428    | -0.208428    |  -3.33685e-15 | -4.78617e-17 | -3.33685e-15 | -3.33685e-15 | -3.01052e-15 |  0.23166     | -0.124474    |  0.447281    |  0.447281    | -3.05614e-15 |  3.33011e-15 | -3.02398e-15 | -1.01859e-16 |  4.61827e-15 |\n",
            "| NF01b | -0.291632    |  0.0917473   | -0.026401    |  1.53894e-16 |   5.57643e-18 |   -1.11842e-17 |  0.365666    |  1.06531e-16 |  1.74568e-16 |  0.365666    | -0.0333431   |  1.35792e-17 | -0.291632    | -0.387374    |  6.33708e-17 |  0.365666    |  6.35457e-17 | -0.0333431   | -0.026401    | -0.0562156   |  1.35792e-17 | -2.52491e-16 |  1.13579e-16 | -0.387374    |  6.33708e-17 | -4.1977e-16  | -4.1977e-16  |  -0.0562156   | -0.0333431   | -0.0562156   | -0.0562156   |  0.0306413   |  2.40825e-16 |  2.31955e-16 |  1.07616e-16 |  1.07616e-16 |  1.18882e-16 |  0.126774    | -0.0187201   | -0.0212136   | -0.291632    |\n",
            "| NF01c | -2.56746e-15 |  3.79219e-16 | -4.6989e-16  |  1.40145e-15 |   1.68501e-15 |    1.38115e-15 |  1.45406e-16 |  0.115673    | -0.28878     |  1.87815e-16 |  2.55475e-16 |  1.69187e-15 | -2.72483e-15 |  1.76741e-15 |  1.39424e-15 |  1.46206e-16 | -0.28878     |  2.55426e-16 | -4.86731e-16 |  1.72209e-15 |  1.69187e-15 |  0.146038    |  0.115673    |  1.76741e-15 |  1.39424e-15 |  0.513009    |  0.513009    |   1.72209e-15 |  2.55426e-16 |  1.72209e-15 |  1.72209e-15 |  1.81719e-15 | -0.267027    | -0.400879    |  0.115673    |  0.115673    |  1.39951e-15 | -1.6758e-15  |  1.65251e-15 | -5.81738e-17 | -2.71948e-15 |\n",
            "| NF02  | -0.148338    |  0.563382    | -0.0452419   | -1.08338e-16 |   2.04911e-18 |    5.43939e-18 | -0.181098    |  6.53214e-17 |  7.41281e-17 | -0.181098    | -0.246409    |  9.73544e-18 | -0.148338    |  0.116761    |  4.84284e-18 | -0.181098    |  6.02774e-17 | -0.246409    | -0.0452419   | -0.21009     |  9.73547e-18 | -8.84741e-17 |  5.83283e-17 |  0.116761    |  4.84284e-18 | -1.71504e-16 | -1.71504e-16 |  -0.21009     | -0.246409    | -0.21009     | -0.21009     |  0.304666    |  1.1869e-16  |  6.89995e-17 |  5.49062e-17 |  5.49062e-17 | -1.59762e-17 |  0.178901    | -0.0554816   | -0.0152327   | -0.148338    |\n",
            "| NF03  |  0.0416162   | -0.168519    |  0.504008    | -6.79353e-17 |  -4.29123e-17 |    6.67572e-17 |  0.0432983   |  4.20169e-17 | -1.77913e-16 |  0.0432983   | -0.374653    | -2.9563e-17  |  0.0416162   | -0.0239098   |  8.58624e-17 |  0.0432983   | -1.60565e-16 | -0.374653    |  0.504008    |  0.0538235   | -1.80744e-18 |  1.52542e-16 |  3.85474e-17 | -0.0239098   |  8.58624e-17 |  5.19403e-16 |  5.19403e-16 |   0.0538235   | -0.374653    |  0.0538235   |  0.0538235   | -0.0989092   | -3.0594e-16  | -3.83006e-16 |  3.84155e-17 |  3.84155e-17 |  8.58625e-17 | -0.0639331   |  0.0167199   |  0.0689703   |  0.0416162   |\n",
            "| NF04  | -2.00123e-16 | -5.83785e-17 |  2.55266e-19 |  1.53135e-16 |  -5.775e-18   |    1.44633e-16 |  5.53927e-17 | -0.0835597   | -0.620952    | -2.26768e-16 | -1.09315e-17 | -7.11338e-18 |  9.92342e-17 | -8.84251e-17 |  1.4081e-16  |  5.0788e-17  | -0.620952    | -5.25648e-17 |  9.25198e-17 | -2.93988e-17 | -3.4869e-17  |  0.155917    | -0.0835597   | -8.84251e-17 |  1.4081e-16  | -0.174254    | -0.174254    |  -2.93988e-17 | -5.25648e-17 | -2.93988e-17 | -2.93988e-17 | -9.10363e-17 |  0.187833    |  0.283879    | -0.0835597   | -0.0835597   |  1.33893e-16 |  1.64622e-16 | -1.9765e-17  | -6.04224e-18 |  2.80325e-16 |\n",
            "+-------+--------------+--------------+--------------+--------------+---------------+----------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+---------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+\n",
            "data_correction 0.8461538461538461\n",
            "____________________________________________________\n",
            ".....................Py-AutoML......................\n",
            "____________________________________________________\n",
            "SVC ______________________________ \n",
            "\n",
            "Accuracy Score for SVC is \n",
            "0.0\n",
            "\n",
            "\n",
            "Confusion Matrix for SVC is \n",
            "[[0 1 0 0]\n",
            " [0 0 0 0]\n",
            " [0 1 0 0]\n",
            " [0 2 0 0]]\n",
            "\n",
            "\n",
            "Classification Report for SVC is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           1       0.00      0.00      0.00       1.0\n",
            "           2       0.00      0.00      0.00       0.0\n",
            "           4       0.00      0.00      0.00       1.0\n",
            "           5       0.00      0.00      0.00       2.0\n",
            "\n",
            "    accuracy                           0.00       4.0\n",
            "   macro avg       0.00      0.00      0.00       4.0\n",
            "weighted avg       0.00      0.00      0.00       4.0\n",
            "\n",
            "\n",
            "\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "____________________________________________________\n",
            "RandomForestClassifier ______________________________ \n",
            "\n",
            "Accuracy Score for RandomForestClassifier is \n",
            "0.0\n",
            "\n",
            "\n",
            "Confusion Matrix for RandomForestClassifier is \n",
            "[[0 0 0 0 0]\n",
            " [0 0 1 0 0]\n",
            " [0 0 0 0 0]\n",
            " [0 0 1 0 0]\n",
            " [1 0 1 0 0]]\n",
            "\n",
            "\n",
            "Classification Report for RandomForestClassifier is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.00      0.00      0.00       0.0\n",
            "           1       0.00      0.00      0.00       1.0\n",
            "           2       0.00      0.00      0.00       0.0\n",
            "           4       0.00      0.00      0.00       1.0\n",
            "           5       0.00      0.00      0.00       2.0\n",
            "\n",
            "    accuracy                           0.00       4.0\n",
            "   macro avg       0.00      0.00      0.00       4.0\n",
            "weighted avg       0.00      0.00      0.00       4.0\n",
            "\n",
            "\n",
            "\n",
            "____________________________________________________\n",
            "DecisionTreeClassifier ______________________________ \n",
            "\n",
            "Accuracy Score for DecisionTreeClassifier is \n",
            "0.0\n",
            "\n",
            "\n",
            "Confusion Matrix for DecisionTreeClassifier is \n",
            "[[0 0 0 0 0]\n",
            " [1 0 0 0 0]\n",
            " [0 0 0 0 1]\n",
            " [1 0 0 0 1]\n",
            " [0 0 0 0 0]]\n",
            "\n",
            "\n",
            "Classification Report for DecisionTreeClassifier is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.00      0.00      0.00       0.0\n",
            "           1       0.00      0.00      0.00       1.0\n",
            "           4       0.00      0.00      0.00       1.0\n",
            "           5       0.00      0.00      0.00       2.0\n",
            "           7       0.00      0.00      0.00       0.0\n",
            "\n",
            "    accuracy                           0.00       4.0\n",
            "   macro avg       0.00      0.00      0.00       4.0\n",
            "weighted avg       0.00      0.00      0.00       4.0\n",
            "\n",
            "\n",
            "\n",
            "____________________________________________________\n",
            "KNeighborsClassifier ______________________________ \n",
            "\n",
            "Accuracy Score for KNeighborsClassifier is \n",
            "0.0\n",
            "\n",
            "\n",
            "Confusion Matrix for KNeighborsClassifier is \n",
            "[[0 0 0 0]\n",
            " [1 0 0 0]\n",
            " [1 0 0 0]\n",
            " [2 0 0 0]]\n",
            "\n",
            "\n",
            "Classification Report for KNeighborsClassifier is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.00      0.00      0.00       0.0\n",
            "           1       0.00      0.00      0.00       1.0\n",
            "           4       0.00      0.00      0.00       1.0\n",
            "           5       0.00      0.00      0.00       2.0\n",
            "\n",
            "    accuracy                           0.00       4.0\n",
            "   macro avg       0.00      0.00      0.00       4.0\n",
            "weighted avg       0.00      0.00      0.00       4.0\n",
            "\n",
            "\n",
            "\n",
            "____________________________________________________\n",
            "LogisticRegression ______________________________ \n",
            "\n",
            "Accuracy Score for LogisticRegression is \n",
            "0.0\n",
            "\n",
            "\n",
            "Confusion Matrix for LogisticRegression is \n",
            "[[0 0 0 0 0]\n",
            " [0 0 1 0 0]\n",
            " [0 0 0 0 0]\n",
            " [0 0 1 0 0]\n",
            " [1 0 1 0 0]]\n",
            "\n",
            "\n",
            "Classification Report for LogisticRegression is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.00      0.00      0.00       0.0\n",
            "           1       0.00      0.00      0.00       1.0\n",
            "           2       0.00      0.00      0.00       0.0\n",
            "           4       0.00      0.00      0.00       1.0\n",
            "           5       0.00      0.00      0.00       2.0\n",
            "\n",
            "    accuracy                           0.00       4.0\n",
            "   macro avg       0.00      0.00      0.00       4.0\n",
            "weighted avg       0.00      0.00      0.00       4.0\n",
            "\n",
            "\n",
            "\n",
            "                    Model Accuracy\n",
            "0                     SVC      0.0\n",
            "1  RandomForestClassifier      0.0\n",
            "2  DecisionTreeClassifier      0.0\n",
            "3    KNeighborsClassifier      0.0\n",
            "4      LogisticRegression      0.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "le3PaCqcelnk"
      },
      "source": [
        "# Latent Dirichlet Allocation (LDA)"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hXRRo6ZqewvI",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "199790f9-9711-4db5-c45f-970d0fd5edb7"
      },
      "source": [
        "from time import time\n",
        "from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\n",
        "from sklearn.decomposition import NMF, LatentDirichletAllocation\n",
        "from sklearn.datasets import fetch_20newsgroups\n",
        "\n",
        "class latentDirichlet:\n",
        "  def __init__(self, data_raw= cleaned_text):\n",
        "      self.__data = data_raw\n",
        "      self.__n_features = len(self.__data)\n",
        "\n",
        "  def ukur_tfidf_vectorizer(self):\n",
        "      # Use tf-idf features for NMF.\n",
        "      tfidf_vectorizer = TfidfVectorizer(max_df=0.95, min_df=2,\n",
        "                                        max_features=self.__n_features,\n",
        "                                        stop_words='english')\n",
        "      tfidf = tfidf_vectorizer.fit_transform(self.__data)\n",
        "      return tfidf_vectorizer, tfidf\n",
        "\n",
        "  def ukur_tf(self):\n",
        "      tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2,\n",
        "                                      max_features=self.__n_features,\n",
        "                                      stop_words='english')\n",
        "      tf = tf_vectorizer.fit_transform(self.__data)\n",
        "      return tf_vectorizer, tf\n",
        "\n",
        "  def Frobenius_norm_feature(self):\n",
        "      nmf = NMF(n_components=len(self.__data), random_state=1, alpha=.1, l1_ratio=.5).fit(latentDirichlet.ukur_tfidf_vectorizer(self)[1])\n",
        "      nmf_tfidf = latentDirichlet.ukur_tfidf_vectorizer(self)[0].get_feature_names()\n",
        "      fitur_frb_tfidf = (nmf_tfidf)\n",
        "      data_frb_tfidf = (nmf.components_)\n",
        "      dt_df =  pd.DataFrame(data_frb_tfidf, index= req.ID, columns= fitur_frb_tfidf)\n",
        "      return dt_df\n",
        "\n",
        "  def Kullback_feature(self):\n",
        "      nmf = NMF(n_components=len(self.__data), random_state=1, beta_loss='kullback-leibler', solver='mu', max_iter=1000, alpha=.1, l1_ratio=.5).fit(latentDirichlet.ukur_tfidf_vectorizer(self)[1])\n",
        "      tfidf_feature_names = latentDirichlet.ukur_tfidf_vectorizer(self)[0].get_feature_names()\n",
        "      fitur_kll_tfidfi = (tfidf_feature_names)\n",
        "      data_kll_tfidf = (nmf.components_)\n",
        "      dt_df =  pd.DataFrame(data_kll_tfidf, index= req.ID, columns= fitur_kll_tfidfi)\n",
        "      return dt_df\n",
        "\n",
        "  def lda_feature(self):\n",
        "      lda = LatentDirichletAllocation(n_components=len(self.__data), max_iter=5, learning_method='online', learning_offset=50., random_state=0)\n",
        "      lda.fit(latentDirichlet.ukur_tf(self)[1])\n",
        "      tf_feature_names = latentDirichlet.ukur_tf(self)[0].get_feature_names()\n",
        "      fitur_lda = (tf_feature_names)\n",
        "      nmf = NMF(n_components=len(self.__data), random_state=1, beta_loss='kullback-leibler', solver='mu', max_iter=1000, alpha=.1, l1_ratio=.5).fit(latentDirichlet.ukur_tfidf_vectorizer(self)[1])\n",
        "      data_lda = (nmf.components_)\n",
        "      dt_df =  pd.DataFrame(data_lda, index= req.ID, columns= fitur_lda)\n",
        "      return dt_df\n",
        "\n",
        "  def threshold_value(self, threshold, data):\n",
        "      dt = data.values >= threshold\n",
        "      dt1 = pd.DataFrame(dt, index= data.index, columns= data.columns)\n",
        "      mask = dt1.isin([True])\n",
        "      dt3 = dt1.where(mask, other= 0)\n",
        "      mask2 = dt3.isin([False])\n",
        "      th_cosine1 = dt3.where(mask2, other= 1)\n",
        "      return th_cosine1\n",
        "\n",
        "  def __del__(self):\n",
        "      print ('Destructor called.')    \n",
        "\n",
        "\n",
        "if __name__ == \"__main__\":\n",
        "      myLDA = latentDirichlet()\n",
        "      dt_fr = myLDA.Frobenius_norm_feature()\n",
        "      print(tabulate(dt_fr, headers = 'keys', tablefmt = 'psql'))\n",
        "      th_fr = myLDA.threshold_value(0.2, dt_fr)\n",
        "      print(tabulate(th_fr, headers = 'keys', tablefmt = 'psql'))\n",
        "\n",
        "      dt_kl = myLDA.Kullback_feature()\n",
        "      print(tabulate(dt_kl, headers = 'keys', tablefmt = 'psql'))\n",
        "      th_kl = myLDA.threshold_value(0.2, dt_kl)\n",
        "      print(tabulate(th_kl, headers = 'keys', tablefmt = 'psql'))\n",
        "\n",
        "      dt_lda = myLDA.lda_feature()\n",
        "      print(tabulate(dt_lda, headers = 'keys', tablefmt = 'psql'))\n",
        "      th_lda = myLDA.threshold_value(0.2, dt_lda)\n",
        "      print(tabulate(th_lda, headers = 'keys', tablefmt = 'psql'))\n",
        "\n",
        "      myLDA.__del__()\n",
        "\n",
        "      # myEvaluasi = pengukuranEvaluasi(dataPertama= dt_lda.values, dataKedua= th_lda.values)\n",
        "      # myEvaluasi = pengukuranEvaluasi(dataPertama= dt_kl.values, dataKedua= th_kl.values)\n",
        "      myEvaluasi = pengukuranEvaluasi(dataPertama= dt_fr.values, dataKedua= th_fr.values)\n",
        "      myEvaluasi.ukur_evaluasi()      "
      ],
      "execution_count": 29,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Destructor called.\n",
            "+-------+-----------+----------+----------+----------+---------+------------+---------+\n",
            "| ID    |   message |   search |     send |   sender |    user |   username |   users |\n",
            "|-------+-----------+----------+----------+----------+---------+------------+---------|\n",
            "| F01   |  0        |    0     | 0        | 0        | 0       |    0       | 0       |\n",
            "| F02   |  0        |    0     | 0        | 0        | 1.20988 |    0       | 0       |\n",
            "| F03   |  0        |    0     | 0        | 0        | 0       |    0       | 0       |\n",
            "| F04   |  0.779288 |    0     | 0.819326 | 0.047748 | 0       |    0       | 0       |\n",
            "| F05   |  0        |    0     | 0        | 0        | 0       |    0       | 0       |\n",
            "| F06   |  0        |    1.096 | 0        | 0        | 0       |    0       | 0       |\n",
            "| F07   |  0        |    0     | 0        | 0        | 0       |    1.13479 | 0       |\n",
            "| NF01a |  0.186711 |    0     | 0        | 0.738124 | 0       |    0       | 0       |\n",
            "| NF01b |  0        |    0     | 0        | 0        | 0       |    0       | 0       |\n",
            "| NF01c |  0        |    0     | 0        | 0        | 0       |    0       | 0       |\n",
            "| NF02  |  1.04506  |    0     | 0        | 0        | 0       |    0       | 0       |\n",
            "| NF03  |  0        |    0     | 0        | 0        | 0       |    0       | 0       |\n",
            "| NF04  |  0        |    0     | 0        | 0        | 0       |    0       | 1.30169 |\n",
            "+-------+-----------+----------+----------+----------+---------+------------+---------+\n",
            "+-------+-----------+----------+--------+----------+--------+------------+---------+\n",
            "| ID    |   message |   search |   send |   sender |   user |   username |   users |\n",
            "|-------+-----------+----------+--------+----------+--------+------------+---------|\n",
            "| F01   |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| F02   |         0 |        0 |      0 |        0 |      1 |          0 |       0 |\n",
            "| F03   |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| F04   |         1 |        0 |      1 |        0 |      0 |          0 |       0 |\n",
            "| F05   |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| F06   |         0 |        1 |      0 |        0 |      0 |          0 |       0 |\n",
            "| F07   |         0 |        0 |      0 |        0 |      0 |          1 |       0 |\n",
            "| NF01a |         0 |        0 |      0 |        1 |      0 |          0 |       0 |\n",
            "| NF01b |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| NF01c |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| NF02  |         1 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| NF03  |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| NF04  |         0 |        0 |      0 |        0 |      0 |          0 |       1 |\n",
            "+-------+-----------+----------+--------+----------+--------+------------+---------+\n",
            "+-------+-----------+----------+----------+----------+---------+------------+---------+\n",
            "| ID    |   message |   search |     send |   sender |    user |   username |   users |\n",
            "|-------+-----------+----------+----------+----------+---------+------------+---------|\n",
            "| F01   |  0        |  0       | 0        |  0       | 0       |    0       | 0       |\n",
            "| F02   |  0        |  0       | 0        |  0       | 1.22415 |    0       | 0       |\n",
            "| F03   |  0        |  0       | 0        |  0       | 0       |    0       | 0       |\n",
            "| F04   |  0        |  0       | 0        |  0       | 0       |    0       | 0       |\n",
            "| F05   |  0        |  0       | 0        |  0       | 0       |    0       | 0       |\n",
            "| F06   |  0        |  1.10737 | 0        |  0       | 0       |    0       | 0       |\n",
            "| F07   |  0        |  0       | 0        |  0       | 0       |    1.14364 | 0       |\n",
            "| NF01a |  0.473269 |  0       | 0.266275 |  0.68396 | 0       |    0       | 0       |\n",
            "| NF01b |  0        |  0       | 0        |  0       | 0       |    0       | 0       |\n",
            "| NF01c |  0        |  0       | 0        |  0       | 0       |    0       | 1.30916 |\n",
            "| NF02  |  1.06323  |  0       | 0        |  0       | 0       |    0       | 0       |\n",
            "| NF03  |  0.63881  |  0       | 0.785624 |  0       | 0       |    0       | 0       |\n",
            "| NF04  |  0        |  0       | 0        |  0       | 0       |    0       | 0       |\n",
            "+-------+-----------+----------+----------+----------+---------+------------+---------+\n",
            "+-------+-----------+----------+--------+----------+--------+------------+---------+\n",
            "| ID    |   message |   search |   send |   sender |   user |   username |   users |\n",
            "|-------+-----------+----------+--------+----------+--------+------------+---------|\n",
            "| F01   |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| F02   |         0 |        0 |      0 |        0 |      1 |          0 |       0 |\n",
            "| F03   |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| F04   |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| F05   |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| F06   |         0 |        1 |      0 |        0 |      0 |          0 |       0 |\n",
            "| F07   |         0 |        0 |      0 |        0 |      0 |          1 |       0 |\n",
            "| NF01a |         1 |        0 |      1 |        1 |      0 |          0 |       0 |\n",
            "| NF01b |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| NF01c |         0 |        0 |      0 |        0 |      0 |          0 |       1 |\n",
            "| NF02  |         1 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| NF03  |         1 |        0 |      1 |        0 |      0 |          0 |       0 |\n",
            "| NF04  |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "+-------+-----------+----------+--------+----------+--------+------------+---------+\n",
            "+-------+-----------+----------+----------+----------+---------+------------+---------+\n",
            "| ID    |   message |   search |     send |   sender |    user |   username |   users |\n",
            "|-------+-----------+----------+----------+----------+---------+------------+---------|\n",
            "| F01   |  0        |  0       | 0        |  0       | 0       |    0       | 0       |\n",
            "| F02   |  0        |  0       | 0        |  0       | 1.22415 |    0       | 0       |\n",
            "| F03   |  0        |  0       | 0        |  0       | 0       |    0       | 0       |\n",
            "| F04   |  0        |  0       | 0        |  0       | 0       |    0       | 0       |\n",
            "| F05   |  0        |  0       | 0        |  0       | 0       |    0       | 0       |\n",
            "| F06   |  0        |  1.10737 | 0        |  0       | 0       |    0       | 0       |\n",
            "| F07   |  0        |  0       | 0        |  0       | 0       |    1.14364 | 0       |\n",
            "| NF01a |  0.473269 |  0       | 0.266275 |  0.68396 | 0       |    0       | 0       |\n",
            "| NF01b |  0        |  0       | 0        |  0       | 0       |    0       | 0       |\n",
            "| NF01c |  0        |  0       | 0        |  0       | 0       |    0       | 1.30916 |\n",
            "| NF02  |  1.06323  |  0       | 0        |  0       | 0       |    0       | 0       |\n",
            "| NF03  |  0.63881  |  0       | 0.785624 |  0       | 0       |    0       | 0       |\n",
            "| NF04  |  0        |  0       | 0        |  0       | 0       |    0       | 0       |\n",
            "+-------+-----------+----------+----------+----------+---------+------------+---------+\n",
            "+-------+-----------+----------+--------+----------+--------+------------+---------+\n",
            "| ID    |   message |   search |   send |   sender |   user |   username |   users |\n",
            "|-------+-----------+----------+--------+----------+--------+------------+---------|\n",
            "| F01   |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| F02   |         0 |        0 |      0 |        0 |      1 |          0 |       0 |\n",
            "| F03   |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| F04   |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| F05   |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| F06   |         0 |        1 |      0 |        0 |      0 |          0 |       0 |\n",
            "| F07   |         0 |        0 |      0 |        0 |      0 |          1 |       0 |\n",
            "| NF01a |         1 |        0 |      1 |        1 |      0 |          0 |       0 |\n",
            "| NF01b |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| NF01c |         0 |        0 |      0 |        0 |      0 |          0 |       1 |\n",
            "| NF02  |         1 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "| NF03  |         1 |        0 |      1 |        0 |      0 |          0 |       0 |\n",
            "| NF04  |         0 |        0 |      0 |        0 |      0 |          0 |       0 |\n",
            "+-------+-----------+----------+--------+----------+--------+------------+---------+\n",
            "Destructor called.\n",
            "data_correction 0.15384615384615385\n",
            "____________________________________________________\n",
            ".....................Py-AutoML......................\n",
            "____________________________________________________\n",
            "SVC ______________________________ \n",
            "\n",
            "Accuracy Score for SVC is \n",
            "0.5\n",
            "\n",
            "\n",
            "Confusion Matrix for SVC is \n",
            "[[2 0 0]\n",
            " [1 0 0]\n",
            " [1 0 0]]\n",
            "\n",
            "\n",
            "Classification Report for SVC is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.50      1.00      0.67         2\n",
            "           1       0.00      0.00      0.00         1\n",
            "           2       0.00      0.00      0.00         1\n",
            "\n",
            "    accuracy                           0.50         4\n",
            "   macro avg       0.17      0.33      0.22         4\n",
            "weighted avg       0.25      0.50      0.33         4\n",
            "\n",
            "\n",
            "\n",
            "____________________________________________________\n",
            "RandomForestClassifier ______________________________ \n",
            "\n",
            "Accuracy Score for RandomForestClassifier is \n",
            "0.5\n",
            "\n",
            "\n",
            "Confusion Matrix for RandomForestClassifier is \n",
            "[[2 0 0]\n",
            " [1 0 0]\n",
            " [1 0 0]]\n",
            "\n",
            "\n",
            "Classification Report for RandomForestClassifier is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.50      1.00      0.67         2\n",
            "           1       0.00      0.00      0.00         1\n",
            "           2       0.00      0.00      0.00         1\n",
            "\n",
            "    accuracy                           0.50         4\n",
            "   macro avg       0.17      0.33      0.22         4\n",
            "weighted avg       0.25      0.50      0.33         4\n",
            "\n",
            "\n",
            "\n",
            "____________________________________________________\n",
            "DecisionTreeClassifier ______________________________ \n",
            "\n",
            "Accuracy Score for DecisionTreeClassifier is \n",
            "0.5\n",
            "\n",
            "\n",
            "Confusion Matrix for DecisionTreeClassifier is \n",
            "[[2 0 0]\n",
            " [1 0 0]\n",
            " [1 0 0]]\n",
            "\n",
            "\n",
            "Classification Report for DecisionTreeClassifier is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.50      1.00      0.67         2\n",
            "           1       0.00      0.00      0.00         1\n",
            "           2       0.00      0.00      0.00         1\n",
            "\n",
            "    accuracy                           0.50         4\n",
            "   macro avg       0.17      0.33      0.22         4\n",
            "weighted avg       0.25      0.50      0.33         4\n",
            "\n",
            "\n",
            "\n",
            "____________________________________________________\n",
            "KNeighborsClassifier ______________________________ \n",
            "\n",
            "Accuracy Score for KNeighborsClassifier is \n",
            "0.5\n",
            "\n",
            "\n",
            "Confusion Matrix for KNeighborsClassifier is \n",
            "[[2 0 0]\n",
            " [1 0 0]\n",
            " [1 0 0]]\n",
            "\n",
            "\n",
            "Classification Report for KNeighborsClassifier is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.50      1.00      0.67         2\n",
            "           1       0.00      0.00      0.00         1\n",
            "           2       0.00      0.00      0.00         1\n",
            "\n",
            "    accuracy                           0.50         4\n",
            "   macro avg       0.17      0.33      0.22         4\n",
            "weighted avg       0.25      0.50      0.33         4\n",
            "\n",
            "\n",
            "\n",
            "____________________________________________________\n",
            "LogisticRegression ______________________________ \n",
            "\n",
            "Accuracy Score for LogisticRegression is \n",
            "0.5\n",
            "\n",
            "\n",
            "Confusion Matrix for LogisticRegression is \n",
            "[[2 0 0]\n",
            " [1 0 0]\n",
            " [1 0 0]]\n",
            "\n",
            "\n",
            "Classification Report for LogisticRegression is \n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.50      1.00      0.67         2\n",
            "           1       0.00      0.00      0.00         1\n",
            "           2       0.00      0.00      0.00         1\n",
            "\n",
            "    accuracy                           0.50         4\n",
            "   macro avg       0.17      0.33      0.22         4\n",
            "weighted avg       0.25      0.50      0.33         4\n",
            "\n",
            "\n",
            "\n",
            "                    Model Accuracy\n",
            "0                     SVC      0.5\n",
            "1  RandomForestClassifier      0.5\n",
            "2  DecisionTreeClassifier      0.5\n",
            "3    KNeighborsClassifier      0.5\n",
            "4      LogisticRegression      0.5\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n",
            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n"
          ]
        }
      ]
    }
  ]
}