drhenner/ror_ecommerce

View on GitHub
opshub.ipynb

Summary

Maintainability
Test Coverage
{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/drhenner/ror_ecommerce/blob/master/opshub.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "pNEylOfVn6vm",
        "outputId": "65d76d93-2208-452a-e91c-a10e22dda746"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Installing collected packages: mypy-extensions, multidict, marshmallow, frozenlist, async-timeout, yarl, typing-inspect, openapi-schema-pydantic, marshmallow-enum, langchainplus-sdk, aiosignal, dataclasses-json, aiohttp, langchain\n",
            "Successfully installed aiohttp-3.8.4 aiosignal-1.3.1 async-timeout-4.0.2 dataclasses-json-0.5.8 frozenlist-1.3.3 langchain-0.0.198 langchainplus-sdk-0.0.9 marshmallow-3.19.0 marshmallow-enum-1.5.1 multidict-6.0.4 mypy-extensions-1.0.0 openapi-schema-pydantic-1.2.4 typing-inspect-0.9.0 yarl-1.9.2\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting openai\n",
            "  Downloading openai-0.27.8-py3-none-any.whl (73 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.6/73.6 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: requests>=2.20 in /usr/local/lib/python3.10/dist-packages (from openai) (2.27.1)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai) (4.65.0)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from openai) (3.8.4)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai) (1.26.15)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai) (2022.12.7)\n",
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai) (2.0.12)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai) (3.4)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai) (23.1.0)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai) (6.0.4)\n",
            "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai) (4.0.2)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai) (1.9.2)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai) (1.3.3)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai) (1.3.1)\n",
            "Installing collected packages: openai\n",
            "Successfully installed openai-0.27.8\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting pyPDF2\n",
            "  Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m232.6/232.6 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: pyPDF2\n",
            "Successfully installed pyPDF2-3.0.1\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting faiss-cpu\n",
            "  Downloading faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.6/17.6 MB\u001b[0m \u001b[31m46.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: faiss-cpu\n",
            "Successfully installed faiss-cpu-1.7.4\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting tiktoken\n",
            "  Downloading tiktoken-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m19.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken) (2022.10.31)\n",
            "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.10/dist-packages (from tiktoken) (2.27.1)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken) (1.26.15)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken) (2022.12.7)\n",
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken) (2.0.12)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken) (3.4)\n",
            "Installing collected packages: tiktoken\n",
            "Successfully installed tiktoken-0.4.0\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting gradio\n",
            "  Downloading gradio-3.34.0-py3-none-any.whl (20.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m43.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting aiofiles (from gradio)\n",
            "  Downloading aiofiles-23.1.0-py3-none-any.whl (14 kB)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from gradio) (3.8.4)\n",
            "Requirement already satisfied: altair>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.2.2)\n",
            "Collecting fastapi (from gradio)\n",
            "  Downloading fastapi-0.97.0-py3-none-any.whl (56 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.0/57.0 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting ffmpy (from gradio)\n",
            "  Downloading ffmpy-0.3.0.tar.gz (4.8 kB)\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Collecting gradio-client>=0.2.6 (from gradio)\n",
            "  Downloading gradio_client-0.2.6-py3-none-any.whl (288 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m288.3/288.3 kB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting httpx (from gradio)\n",
            "  Downloading httpx-0.24.1-py3-none-any.whl (75 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.4/75.4 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting huggingface-hub>=0.14.0 (from gradio)\n",
            "  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.1.2)\n",
            "Requirement already satisfied: markdown-it-py[linkify]>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.2.0)\n",
            "Requirement already satisfied: markupsafe in /usr/local/lib/python3.10/dist-packages (from gradio) (2.1.2)\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from gradio) (3.7.1)\n",
            "Collecting mdit-py-plugins<=0.3.3 (from gradio)\n",
            "  Downloading mdit_py_plugins-0.3.3-py3-none-any.whl (50 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.5/50.5 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from gradio) (1.22.4)\n",
            "Collecting orjson (from gradio)\n",
            "  Downloading orjson-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (136 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m137.0/137.0 kB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from gradio) (1.5.3)\n",
            "Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from gradio) (8.4.0)\n",
            "Requirement already satisfied: pydantic in /usr/local/lib/python3.10/dist-packages (from gradio) (1.10.7)\n",
            "Collecting pydub (from gradio)\n",
            "  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
            "Requirement already satisfied: pygments>=2.12.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.14.0)\n",
            "Collecting python-multipart (from gradio)\n",
            "  Downloading python_multipart-0.0.6-py3-none-any.whl (45 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from gradio) (6.0)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from gradio) (2.27.1)\n",
            "Collecting semantic-version (from gradio)\n",
            "  Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from gradio) (4.5.0)\n",
            "Collecting uvicorn>=0.14.0 (from gradio)\n",
            "  Downloading uvicorn-0.22.0-py3-none-any.whl (58 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting websockets>=10.0 (from gradio)\n",
            "  Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->gradio) (0.4)\n",
            "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->gradio) (4.3.3)\n",
            "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->gradio) (0.12.0)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client>=0.2.6->gradio) (2023.4.0)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from gradio-client>=0.2.6->gradio) (23.1)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.14.0->gradio) (3.12.0)\n",
            "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.14.0->gradio) (4.65.0)\n",
            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py[linkify]>=2.0.0->gradio) (0.1.2)\n",
            "Collecting linkify-it-py<3,>=1 (from markdown-it-py[linkify]>=2.0.0->gradio)\n",
            "  Downloading linkify_it_py-2.0.2-py3-none-any.whl (19 kB)\n",
            "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->gradio) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->gradio) (2022.7.1)\n",
            "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn>=0.14.0->gradio) (8.1.3)\n",
            "Collecting h11>=0.8 (from uvicorn>=0.14.0->gradio)\n",
            "  Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->gradio) (23.1.0)\n",
            "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->gradio) (2.0.12)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->gradio) (6.0.4)\n",
            "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->gradio) (4.0.2)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->gradio) (1.9.2)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->gradio) (1.3.3)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->gradio) (1.3.1)\n",
            "Collecting starlette<0.28.0,>=0.27.0 (from fastapi->gradio)\n",
            "  Downloading starlette-0.27.0-py3-none-any.whl (66 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.0/67.0 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx->gradio) (2022.12.7)\n",
            "Collecting httpcore<0.18.0,>=0.15.0 (from httpx->gradio)\n",
            "  Downloading httpcore-0.17.2-py3-none-any.whl (72 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.5/72.5 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx->gradio) (3.4)\n",
            "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->gradio) (1.3.0)\n",
            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->gradio) (1.0.7)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->gradio) (0.11.0)\n",
            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->gradio) (4.39.3)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->gradio) (1.4.4)\n",
            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->gradio) (3.0.9)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->gradio) (1.26.15)\n",
            "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.10/dist-packages (from httpcore<0.18.0,>=0.15.0->httpx->gradio) (3.6.2)\n",
            "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=4.2.0->gradio) (0.19.3)\n",
            "Collecting uc-micro-py (from linkify-it-py<3,>=1->markdown-it-py[linkify]>=2.0.0->gradio)\n",
            "  Downloading uc_micro_py-1.0.2-py3-none-any.whl (6.2 kB)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->gradio) (1.16.0)\n",
            "Building wheels for collected packages: ffmpy\n",
            "  Building wheel for ffmpy (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for ffmpy: filename=ffmpy-0.3.0-py3-none-any.whl size=4694 sha256=ef91c50f2d6392cd786667abb2856a61cafc5fe1b4c91d05d2a91d443df6f045\n",
            "  Stored in directory: /root/.cache/pip/wheels/0c/c2/0e/3b9c6845c6a4e35beb90910cc70d9ac9ab5d47402bd62af0df\n",
            "Successfully built ffmpy\n",
            "Installing collected packages: pydub, ffmpy, websockets, uc-micro-py, semantic-version, python-multipart, orjson, h11, aiofiles, uvicorn, starlette, mdit-py-plugins, linkify-it-py, huggingface-hub, httpcore, httpx, fastapi, gradio-client, gradio\n",
            "Successfully installed aiofiles-23.1.0 fastapi-0.97.0 ffmpy-0.3.0 gradio-3.34.0 gradio-client-0.2.6 h11-0.14.0 httpcore-0.17.2 httpx-0.24.1 huggingface-hub-0.15.1 linkify-it-py-2.0.2 mdit-py-plugins-0.3.3 orjson-3.9.1 pydub-0.25.1 python-multipart-0.0.6 semantic-version-2.10.0 starlette-0.27.0 uc-micro-py-1.0.2 uvicorn-0.22.0 websockets-11.0.3\n"
          ]
        }
      ],
      "source": [
        "\n",
        "!#export\n",
        "!pip install langchain\n",
        "!pip install openai\n",
        "!pip install pyPDF2\n",
        "!pip install faiss-cpu\n",
        "!pip install tiktoken\n",
        "!pip install gradio\n",
        "\n",
        "from langchain.embeddings.openai import OpenAIEmbeddings\n",
        "from langchain.text_splitter import CharacterTextSplitter\n",
        "from langchain.vectorstores import ElasticVectorSearch, pinecone, Weaviate, FAISS\n",
        "from PyPDF2 import PdfReader\n",
        "from langchain.chains.question_answering import load_qa_chain\n",
        "from langchain.llms import OpenAI\n",
        "import gradio as gr"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "rKB5vtc0ouCd"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "os.environ [\"OPENAI_API_KEY\"]=\"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\"\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "id": "7Swzv6FAuLbe"
      },
      "outputs": [],
      "source": [
        "global g_docSearch #global"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "euNCf7ztpdjA",
        "outputId": "bfb874b8-509d-4ff9-e124-bdd5b5f9253f"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Mounted at /content/gdrive\n"
          ]
        }
      ],
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/gdrive',force_remount=True)\n",
        "root_drive = \"/content/gdrive/MyDrive\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "lzUOTEo4q3g5"
      },
      "outputs": [],
      "source": [
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "id": "plKPN7Egr12l"
      },
      "outputs": [],
      "source": [
        "# read raw text from pdf\n",
        "def convertpdftotext(reader):\n",
        "    rawtext = ''\n",
        "    for i, page in enumerate(reader.pages):\n",
        "        text=page.extract_text()\n",
        "    if text:\n",
        "        rawtext+=text\n",
        "\n",
        "    return(rawtext)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "id": "_MYDoP6L8nKs"
      },
      "outputs": [],
      "source": [
        "\n",
        "#textsplitter = RecursiveCharacterTextSplitter(\n",
        " #   chunk_size=1000,\n",
        "  #  chunk_overlap=100,\n",
        "#)\n",
        "\n",
        "def splittext(rawtext):\n",
        "    text_splitter = CharacterTextSplitter(\n",
        "    separator='\\n',\n",
        "    chunk_size=1000,\n",
        "    chunk_overlap=100,\n",
        "    length_function=len,)\n",
        "\n",
        "\n",
        "    return (text_splitter.split_text(rawtext))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "id": "iTrz03VcHhtk"
      },
      "outputs": [],
      "source": [
        "def getsplittextpdf(reader):\n",
        "   text = convertpdftotext(reader)\n",
        "   return(splittext(text))\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "id": "qgNuJ3sZWCh0"
      },
      "outputs": [],
      "source": [
        "def retrieveinfo(query, embeddings):\n",
        "    chain = load_qa_chain(OpenAI(), chain_type='stuff')\n",
        "    global g_docSearch\n",
        "    docs = g_docSearch.similarity_search(query)\n",
        "    return (chain.run(input_documents=docs, question=query))  \n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "id": "JzCXBbyxNcM0"
      },
      "outputs": [],
      "source": [
        "def convertPDFsTotext(pdfFiles):\n",
        "\n",
        "\n",
        "    type (pdfFiles)\n",
        "\n",
        "\n",
        "    text = \"\"\n",
        "    for pdf in pdfFiles:\n",
        "        print(pdf.name)\n",
        "        pdf_reader = PdfReader(pdf.name)\n",
        "        for page in pdf_reader.pages:\n",
        "            text += page.extract_text()\n",
        "\n",
        "    return(text)\n",
        "\n",
        "  \n",
        "#  for idx, file in enumerate(pdfFiles):\n",
        "#      print(file.name)\n",
        "#      reader = reader+PdfReader(file.name)\n",
        "      \n",
        "      \n",
        "\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "id": "uf-ykpQsvH6v"
      },
      "outputs": [],
      "source": [
        "def processPDFs(files):\n",
        "\n",
        "    rawtext = convertPDFsTotext(files)\n",
        "\n",
        "    print (rawtext)\n",
        "\n",
        "    splittedText = splittext(rawtext)\n",
        "\n",
        "    embeddings = OpenAIEmbeddings()\n",
        "    \n",
        "    global g_docSearch\n",
        "\n",
        "    g_docSearch = FAISS.from_texts(texts=splittedText,  embedding=embeddings)\n",
        "\n",
        "    return(\"finished ... pdf created embeddings ready to querry\")\n",
        "\n",
        "\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 11,
      "metadata": {
        "id": "4wQTwer8PNgJ"
      },
      "outputs": [],
      "source": [
        "def retrieveInfo(query):\n",
        "    chain = load_qa_chain(OpenAI(), chain_type='stuff')\n",
        "    global g_docSearch\n",
        "    docs = g_docSearch.similarity_search(query)\n",
        "    return (chain.run(input_documents=docs, question=query))  \n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 17,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_37CKumBh4Py",
        "outputId": "2bef4a87-9fa7-4851-bdf5-90445bc9752e"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            " The document was written by OpsHub, Inc.\n"
          ]
        }
      ],
      "source": [
        "\n",
        "filetoread = root_drive+'/opshub/opshubwp3.pdf'\n",
        "reader = PdfReader(filetoread)\n",
        "splitTexts = getsplittextpdf(reader)\n",
        "\n",
        "\n",
        "embeddingS = OpenAIEmbeddings()\n",
        "\n",
        "g_docSearch = FAISS.from_texts(texts=splitTexts, embedding=embeddingS)\n",
        "\n",
        "print(retrieveInfo(\"who wrote this document\"))\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 12,
      "metadata": {
        "id": "O9vWiCZJaXEn"
      },
      "outputs": [],
      "source": [
        "def chat(chat_history, user_input):\n",
        "  \n",
        "  bot_response = retrieveInfo(user_input)\n",
        "  print(bot_response)\n",
        "  response = \"\"\n",
        "  for letter in ''.join(bot_response): #[bot_response[i:i+1] for i in range(0, len(bot_response), 1)]:\n",
        "      response += letter + \"\" \n",
        "      yield chat_history + [(user_input, response)]\n",
        "  "
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 14,
      "metadata": {
        "id": "XiHDrgDoS3Tw"
      },
      "outputs": [],
      "source": [
        "def showtext(text_input):\n",
        "  textsplits=splittext(text_input)\n",
        "  embeddings = OpenAIEmbeddings()\n",
        "  docsearch = FAISS.from_texts(textsplits,  embeddings)\n",
        "  return(\"Finished\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "20SHoVlHWUzI",
        "outputId": "0a56dc02-94f0-4d91-fe32-a10fb1d6e23f"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n",
            "\n",
            "Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().\n",
            "Running on public URL: https://b62e98291f135be088.gradio.live\n",
            "\n",
            "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "<div><iframe src=\"https://b62e98291f135be088.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            " This document is about how organizations can become more customer-centric by integrating their solution, services, and sales teams for better collaboration and productivity.\n",
            " I don't know\n",
            "/tmp/gradio/a726c2fb0a9aa052f18606c902f6c794ff98a3f9/resume5.pdf\n",
            "/tmp/gradio/34116016ab760d612135672e73f77bcdc09bdcf9/Application from Mercy Renteria for Sales Associate _ ZipRecruiter.pdf\n",
            "/tmp/gradio/628c3db9e7fb65878c50a4de5c64fd6f19abc4c1/resume2.pdf\n",
            "Laura E. Brooks\n",
            "415.250.3454\n",
            "Laura.brooks89@yahoo.com\n",
            "Qualifications\n",
            "Clerical & office equipment experience\n",
            "Knowledgeable in MS Office\n",
            "Great customer service skills\n",
            "Skilled with using POS devicesCash handling and management \n",
            "Well organized, responsible & reliable \n",
            "Excellent multi-tasker under pressure\n",
            "Self-motivated\n",
            "Work Experience\n",
            "Customer Service Representative – Seasonal GAP, Inc.Oct. 2020 to Dec. 2020\n",
            "Solved customer’s issues through critical reasoning \n",
            "Answered multi-phone lines, assisted customers in their shopping experience \n",
            "Provided customer service from order assistance to customer refund inquiries\n",
            "Team Member Round Table Pizza Jun. 2017 to Apr. 2018\n",
            "Provided customer service support by answering phones and accurately communicating orders\n",
            "Assisted team member in both front and back of house, including \n",
            "Performed closing duties such as stocking shelves and cleaning \n",
            "Receptionist Electrical Certification Consultants Dec. 2015 to Jun. 2017\n",
            "Provided phone support such as directing calls, taking accurate messages, and offering excellence customer services\n",
            "Performed routine office duties such as maintain and organizing company files\n",
            "Data entry to digitalize files\n",
            "Cashier Popeye’s Chicken Feb. 2015 to Nov. 2015\n",
            "Assisted customers with their food orders with excellent listening skills both in person and over drive thru\n",
            "Operated under strict time management expectations while maintaining a positive demeanor \n",
            "Stocked shelves, cleaned store and ensured store was closed properl y\n",
            "Sales Associate and Cashier Shell Gas Station Mar. 2012 to Aug. 2013\n",
            "Used POS system including cash management, drawer balancing procedures, and sales\n",
            "Responsible for restocking and cleanliness of the entire gas station\n",
            "Inventory of regulated items \n",
            "Uber Driver During employment gaps\n",
            "Provided safe transportation for passengers and their baggage\n",
            "Utilized excellent customer service and a polite attitude\n",
            "Upkeep of car maintenance and cleanliness \n",
            "Education\n",
            "High School Diploma Novato High School Novato, CA5/15/23, 12:58 PM Application from Mercy Renteria for Sales Associate | ZipRecruiter\n",
            "https://www .ziprecruiter .com/contact/response/dc1 15697/483fd28c?q=&label_name=&status=&great=1&invite=0&applied_date=&my_candidates=0& … 1/2show more\n",
            "show moreMercy Renteria\n",
            "Petaluma, CA, US\n",
            "Email Address\n",
            "mercyrenteria47@gmail.com\n",
            "Phone Number\n",
            "+1 7077219789\n",
            "Years of Experience\n",
            "Mid Level (3-6 years)\n",
            "Highest Degree Earned\n",
            "High School Diploma/GEDSkills\n",
            "Bilingual Customer Service Oriented Labelling Multi Tasking\n",
            "Receptionist Scanning Shipping and Receiving Stocking\n",
            "Experience\n",
            "Grocery Clerk\n",
            "Lucky’s Jan 2019 - Current\n",
            "My responsibilities at Lucky’s is to maintain the front end of the store\n",
            "clean as well as focusing on customer service. I interact with customers\n",
            "and help them find specific items they’re looking for. When I am on cart\n",
            "hours, I sweep the whole entire store and clean the bathrooms...\n",
            "Food Runner\n",
            "McDonald’s Jan 2018 - Dec 2018\n",
            "At McDonald’s, I was in charge of taking orders in the front of the store as\n",
            "well as the drive thru side. I would also bag the food and hand it out.\n",
            "Sometimes I would move to the back of the grill and help out. When the\n",
            "store was low on supplies I would restock. Last but not leas...\n",
            "Front Desk Receptionist\n",
            "LDA 4 You Jun 2016 - Aug 2017\n",
            "One of my main responsibilities at LDA 4 You was to keep the office\n",
            "clean and organized. I was in charge of filing papers and documents. I\n",
            "would also retype certain documents and print them out.\n",
            "Education\n",
            "San Antonio High School\n",
            "High School Diploma 20195/15/23, 12:58 PM Application from Mercy Renteria for Sales Associate | ZipRecruiter\n",
            "https://www .ziprecruiter .com/contact/response/dc1 15697/483fd28c?q=&label_name=&status=&great=1&invite=0&applied_date=&my_candidates=0& … 2/2Sourced via ZipRecruiterJEANNINE M. STUART  \n",
            "Phone: (301) 675 - 0092                                                                             Sonoma, CA. 95476                                                                                                       \n",
            "Email: Jstuart5063@gmail.com    \n",
            "______________________________________________________________________________  \n",
            "Detailed -oriented Manager with the ability to prioritize tasks,  work under pressure, and \n",
            "collaborate with a team.  Successful in Wedding Management, a proactive problem solver, \n",
            "and a passion for outstanding customer service.  \n",
            "• Effective in Identifying Cus tomers’ needs and finding solutions.  \n",
            "• Excellent verbal and written communications skills  \n",
            "• Flexible and adaptive  \n",
            "• Able to master ne w skills quickly  \n",
            " \n",
            "EXPERIENCE  \n",
            "Senior Planner at Extraordinary Weddings                                                     March 2021 -June 2022    \n",
            "Napa, California          \n",
            "• Effectively and efficiently plan and detail all aspects of weddings for boutique planning company  \n",
            "• Collaborate with couples on venue searches , vendor selectio n, design and rental coordination  \n",
            "• Continually manage  rental orders and vendor sourcing within client budget, to ensure all \n",
            "couples ’ and vendors ’ needs  are addressed  \n",
            "• Build a full timeline for a wedding that included all vendors and minute to the minute detailed \n",
            "informatio n \n",
            "• Finalize  all schematics, rental orders, and timelines and distribute  to vendors two weeks in \n",
            "advance of the wedding  \n",
            "• Conduct final walk throughs of a venue with the couple, venue management, and other vendors  \n",
            "• Execute and oversee  full installation, event management  and break down of weddings  \n",
            " \n",
            " \n",
            " \n",
            "Guest Experience Manager at Viansa Winery                                              October 2018 -March 2021              \n",
            "Sonoma, California          \n",
            "• Created  and sold all guest experiences fo r destination winery  \n",
            "• Planned and executed all aspects of special events at the winery such as corporate events, \n",
            "winery “pick up parties,” engagements, and weddings  \n",
            "• Managed entire sales process from site tours with prospective clients, planning and executio n, \n",
            "payment collection and follow up to ensure complete guest satisfaction  \n",
            "• Exceeded monthly budgets by bringing in new business to the winery during “down season”  \n",
            "• Created SOP’s and t rained tasting room staff on prope r hospitality etiquette  \n",
            "• Managed guest experience and fielded any customer complaints  \n",
            "• Actively networked with new  clients , local business and industry partners to promote the winery  \n",
            "• Developed  a preferred vendor list of over 50 local vendors , maintaining strong and professional \n",
            "relationships  \n",
            " \n",
            "Staffing Manager at Ramekins Culinary School and Special Events           March 2016 -December \n",
            "2018                                                                                                           \n",
            "Sonoma, California          \n",
            "• Staffed all front of the house positions for every  onsite and offsite  event  for Ramekins \n",
            "Culinary School and Catering portfolio  \n",
            "• Led weekly Staff Meeting to determine staffing lev els for all onsite and offsite events  \n",
            "• Interviewed and hired all new front of house employees and processed all new hire \n",
            "paperwork  \n",
            "• Implemented training program for new employees and offered continuing training \n",
            "opportunities for employees who wanted to advan ce their career  \n",
            "• Managed staffing database of over 100 active employees  \n",
            "• Assisted in time keeping , payroll management  \n",
            "Assistant Onsite Production Manager   \n",
            "• Planned and executed all aspects of special events at Ramekins Culinary School and \n",
            "General’s Daughter  \n",
            "• Created B anquet Event Orders, schematics and coordinated rentals  for all onsite events  \n",
            "• Conducted final walk throughs of a venue with the client and event planner  \n",
            "• Managed setup and execution of all onsite events to ensure client satisfaction  \n",
            "• Created  binders for every onsite event with all information necessary for an Event Manager \n",
            "and their staff to successfully run their event  \n",
            " \n",
            "SOFTWARE SKILLS  \n",
            "Proficient in Microsoft Office Word, Excel, PowerPoint, Outlook , Triple Seat, All  Seated , Aisle  Planner . \n",
            " \n",
            " This document is about the job application from Mercy Renteria for a Sales Associate position at San Antonio High School.\n",
            " This document references six people.\n"
          ]
        }
      ],
      "source": [
        "with gr.Blocks() as demo:\n",
        "    gr.Markdown('# Opshub Bot ')\n",
        "\n",
        "    with gr.Tab(\"Input Text Document\"):\n",
        "        text_input = gr.Textbox()\n",
        "        text_output = gr.Textbox()\n",
        "        text_button = gr.Button(\"Add To Opshub Knowledge Base!!!\")\n",
        "        text_button.click(showtext, text_input, text_output)\n",
        "    with gr.Tab(\"Input Load PDF to \"):\n",
        "        inputpdfs = gr.File(file_count=\"multiple\", file_types=[\".pdf\"], label=\"Load PDF files\")\n",
        "        text_output = gr.Textbox()\n",
        "        text_button = gr.Button(\"Add To Opshub Knowledge Base !!!\")\n",
        "        text_button.click(processPDFs, inputpdfs, text_output)\n",
        "    with gr.Tab(\"Opshub Knowledge Bot\"):\n",
        "#          inputbox = gr.Textbox(\"Input your text to build a Q&A Bot here.....\")\n",
        "          chatbot = gr.Chatbot()\n",
        "          message = gr.Textbox (\"What is this document about?\")\n",
        "          message.submit(chat, [chatbot, message], chatbot)\n",
        "\n",
        "demo.queue().launch(debug = True)"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyNJvptfPtK7UBc30wO7dqf2",
      "include_colab_link": true
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}