Lambda-School-Labs/Labs26-StorySquad-DS-TeamB

View on GitHub
notebooks/transcribed_stories.ipynb

Summary

Maintainability
Test Coverage
{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python_defaultSpec_1600716520497",
   "display_name": "Python 3.7.6 64-bit ('base': conda)"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "source": [
    "# Transcribed Stories from Google Cloud Vision API\n",
    "### `transcribed_stories.csv` has columns 'Submission Id' and 'Transcribed Text'\n",
    "### `transcribe` method is used to create `transcription.py` which is used in the app"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Imports\n",
    "import os.path\n",
    "from google.cloud import vision\n",
    "import io\n",
    "from google.oauth2 import service_account\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def transcribe(image_path):\n",
    "    '''\n",
    "    Reads in a single image, connects to Google Cloud Vision API's and returns \n",
    "    the transcribed text as a string\n",
    "    '''\n",
    "\n",
    "    # If image_path is local\n",
    "    with io.open(image_path, 'rb') as image_file:\n",
    "        content = image_file.read()\n",
    "    image = vision.types.Image(content=content)\n",
    "\n",
    "    # # If image_path is a uri\n",
    "    # image = vision.types.Image()\n",
    "    # image.source.image_uri = uri\n",
    "\n",
    "    # Connect to Google API client\n",
    "    creds = service_account.Credentials.from_service_account_file('/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Story Squad-6122da7459cf.json')\n",
    "    client = vision.ImageAnnotatorClient(credentials=creds)\n",
    "    response = client.document_text_detection(image=image)\n",
    "\n",
    "    # Save transcribed text\n",
    "    if response.text_annotations:\n",
    "        transcribed_text = response.text_annotations[0].description.replace('\\n', ' ')\n",
    "    else:\n",
    "        return 'No Text'\n",
    "\n",
    "    return transcribed_text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def full_story(directory):\n",
    "    '''\n",
    "    Runs transcribe method on each page in a given file.\n",
    "    '''\n",
    "    # Create list of paths\n",
    "    paths = []\n",
    "\n",
    "    # For each image in the file, add the path to paths\n",
    "    for item in os.listdir(directory):\n",
    "        if item[-3:] == 'jpg':\n",
    "            path = os.path.join(directory, item)\n",
    "            paths.append(path)\n",
    "    \n",
    "    # Sort so pages are always in the correct order\n",
    "    paths = sorted(paths)\n",
    "    # Empty string to combine multiple pages of text together on\n",
    "    full_text = ''\n",
    "\n",
    "    # Feed each individual image into google api\n",
    "    for image_path in paths:\n",
    "        full_text += transcribe(image_path) \n",
    "\n",
    "    return full_text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "-3201 If you think there is moce embaressing then me your wrong because there's no such thing as luck in my world. And somehow I become the coolest kid in school and the teachers pet. normal day Swiettie Shume hece gizli ile said I paused Jessie peaples at me like I per crazy then hissed Im not spying someone just came out of the haunted house it gets worse every time you that house was abandon facenfocaratian look, cl opened the curtain I saw Swietle then I saw a man walk lout, She looping an people 3201 Next thing you know the lights twen off , I hear a scream I relo iliaed it was Tessie. After that it blank Swiettie saved us and I was the coolest, kid ever. I still don't know what happened but I'm cool for that. ar \n"
    }
   ],
   "source": [
    "# Test a single file\n",
    "directory = os.path.join('/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud', 'Stories Dataset', \"Transcribed Stories\", '32--', '3201')\n",
    "\n",
    "print(full_story(directory))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "['/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--']\n"
    }
   ],
   "source": [
    "# Pull out each parent folder (31--, 32--, 51--, 52--) in the given Stories Dataset\n",
    "parent_path = '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories'\n",
    "\n",
    "folders = []\n",
    "\n",
    "for route in os.listdir(parent_path):\n",
    "    if route != '.DS_Store':\n",
    "        folders.append(os.path.join(parent_path, route))\n",
    "print(folders)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "tags": [
     "outputPrepend"
    ]
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "set/Transcribed Stories/31--/3105', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3129', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3116', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3111', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3118', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3127', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3120', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3121', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3119', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3126', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3110', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3128', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3131', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3109', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3107', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3106', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3101', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3108', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3130', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3112', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3115', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3123', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3124', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3125', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3122', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3114', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3113', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3245', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3216', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3229', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3211', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3227', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3218', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3220', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3243', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3244', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3221', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3226', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3219', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3210', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3217', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3228', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3235', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3232', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3204', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3203', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3202', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3205', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3234', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3212', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3215', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3223', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3224', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3248', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3241', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3246', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3225', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3222', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3214', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3213', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3247', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3240', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3231', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3209', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3236', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3238', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3207', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3239', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3206', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3201', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3208', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3237', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--/3230', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5254', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5253', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5262', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5236', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5209', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5207', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5238', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5263', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5264', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5252', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5255', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5206', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5239', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5230', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5237', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5208', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5215', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5224', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5223', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5248', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5246', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5241', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5222', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5225', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5213', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5214', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5240', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5247', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5249', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5232', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5235', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5203', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5204', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5250', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5257', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5261', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5259', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5205', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5202', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5234', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5233', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5258', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5260', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5256', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5251', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5242', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5245', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5229', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5216', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5220', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5218', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5227', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5244', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5243', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5219', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5221', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5228', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5217', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--/5210', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5115', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5112', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5124', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5123', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5122', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5125', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5113', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5114', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5109', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5131', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5107', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5101', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5106', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5130', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5108', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5111', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5116', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5129', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5120', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5118', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5126', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5119', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5121', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5117', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5110', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5132', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5103', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5104', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5105', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--/5102']\n"
    }
   ],
   "source": [
    "# Pull out each submission path inside the parent folder\n",
    "paths = []\n",
    "\n",
    "for file in folders:\n",
    "    for path in os.listdir(file):\n",
    "        if path != '.DS_Store':\n",
    "            paths.append(os.path.join(file, path))\n",
    "print(paths)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\"3106 D she was very, a berenang The pony that didn't know . Once there was a cong to. Her name is show light Star Chrismas morning she escaped to know why she was treated like thin ain said to hersh where should to The next day she found Os pony Snow light sound She ran to find the head- ก in the tallest tower, she asked can help me find why no one headmiste said thats easy, because I wanted you here. Snowlight said why worden getminant car The head mistress said come on your most of Saipan tha pog. By the way, my The next day she had already made tons of friends. Their names are Elisa Tessa and Lina. One day, because her ' they came right to her and told her the she stopped her hoors and every thing.They were planning a sypin hame 3104 2 party for her and she ruined it being a suprise! She selt so sorry. She learned to wait for her friends to tell her. She also learned she was magical. The End \""
     },
     "metadata": {},
     "execution_count": 72
    }
   ],
   "source": [
    "# Test one returned path\n",
    "test_path = paths[1]\n",
    "\n",
    "full_story(test_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "3132\n3104\n3103\n3117\n3102\n3105\n3129\n3116\n3111\n3118\n3127\n3120\n3121\n3119\n3126\n3110\n3128\n3131\n3109\n3107\n3106\n3101\n3108\n3130\n3112\n3115\n3123\n3124\n3125\n3122\n3114\n3113\n3245\n3216\n3229\n3211\n3227\n3218\n3220\n3243\n3244\n3221\n3226\n3219\n3210\n3217\n3228\n3235\n3232\n3204\n3203\n3202\n3205\n3234\n3212\n3215\n3223\n3224\n3248\n3241\n3246\n3225\n3222\n3214\n3213\n3247\n3240\n3231\n3209\n3236\n3238\n3207\n3239\n3206\n3201\n3208\n3237\n3230\n5254\n5253\n5262\n5236\n5209\n5207\n5238\n5263\n5264\n5252\n5255\n5206\n5239\n5230\n5237\n5208\n5215\n5224\n5223\n5248\n5246\n5241\n5222\n5225\n5213\n5214\n5240\n5247\n5249\n5232\n5235\n5203\n5204\n5250\n5257\n5261\n5259\n5205\n5202\n5234\n5233\n5258\n5260\n5256\n5251\n5242\n5245\n5229\n5216\n5220\n5218\n5227\n5244\n5243\n5219\n5221\n5228\n5217\n5210\n5115\n5112\n5124\n5123\n5122\n5125\n5113\n5114\n5109\n5131\n5107\n5101\n5106\n5130\n5108\n5111\n5116\n5129\n5120\n5118\n5126\n5119\n5121\n5117\n5110\n5132\n5103\n5104\n5105\n5102\n"
    }
   ],
   "source": [
    "# Creates list of entries to convert into a df\n",
    "\n",
    "data_list = []\n",
    "\n",
    "for path in paths:\n",
    "    # Gives the story ID\n",
    "    print(path.strip()[-4:])\n",
    "    transcribed_text = full_story(path)\n",
    "    story_id = path.strip()[-4:]\n",
    "    entry = {\"Submission ID\": story_id, \"Transcribed Text\": transcribed_text}\n",
    "    data_list.append(entry)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "167"
     },
     "metadata": {},
     "execution_count": 98
    }
   ],
   "source": [
    "# Check that all of the stories were transcribed\n",
    "len(data_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "  Submission ID                                   Transcribed Text\n0          3132  Page. I 3132 Once there was a little cheatah a...\n1          3104  3106 D she was very, a berenang The pony that ...\n2          3103  3103 Rainbow the Unica unicom named some een P...\n3          3117  3117 O gum drop land gumdrop. land is prace We...\n4          3102  3102 The secret fifth grade E am Anella I am s...",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Submission ID</th>\n      <th>Transcribed Text</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>3132</td>\n      <td>Page. I 3132 Once there was a little cheatah a...</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>3104</td>\n      <td>3106 D she was very, a berenang The pony that ...</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>3103</td>\n      <td>3103 Rainbow the Unica unicom named some een P...</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>3117</td>\n      <td>3117 O gum drop land gumdrop. land is prace We...</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>3102</td>\n      <td>3102 The secret fifth grade E am Anella I am s...</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 99
    }
   ],
   "source": [
    "# Build df\n",
    "\n",
    "df = pd.DataFrame(data_list)\n",
    "\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save df\n",
    "df.to_csv('transcribed_stories.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ]
}