coursera-dl/edx-dl

View on GitHub
edx_dl/common.py

Summary

Maintainability
A
3 hrs
Test Coverage
# -*- coding: utf-8 -*-

"""
Common type definitions and constants for edx-dl

The classes in this module represent the structure of courses in edX.  The
structure is:

* A Course contains Sections
* Each Section contains Subsections
* Each Subsection contains Units

Notice that we don't represent the full tree structure for both performance
and UX reasons:

Course ->  [Section] -> [SubSection] -> [Unit] -> [Video]

In the script the data structures used are:

1. The data structures to represent the course information:
   Course, Section->[SubSection]

2. The data structures to represent the chosen courses and sections:
   selections = {Course, [Section]}

3. The data structure of all the downloable resources which represent each
   subsection via its URL and the of resources who can be extracted from the
   Units it contains:
   all_units = {Subsection.url: [Unit]}

4. The units can contain multiple videos:
   Unit -> [Video]
"""


class Course(object):
    """
    Course class represents course information.
    """
    def __init__(self, id, name, url, state):
        """
        @param id: The id of a course in edX is composed by the path
            {organization}/{course_number}/{course_run}
        @type id: str or None

        @param name: Name of the course. The name is taken from course page
            h3 header.
        @type name: str

        @param url: URL of the course.
        @type url: str or None

        @param state: State of the course. One of the following values:
            * 'Not yet'
            * 'Started'
        @type state: str
        """
        self.id = id
        self.name = name
        self.url = url
        self.state = state

    def __repr__(self):
        url = self.url if self.url else "None"
        return self.name + ": " + url


class Section(object):
    """
    Representation of a section of the course.
    """
    def __init__(self, position, name, url, subsections):
        """
        @param position: Integer position of the section in the list of
            sections. Starts at 1.
        @type position: int

        @param name: Name of the section.
        @type name: str

        @param url: URL of the section. None when section contains no
            subsections.
        @type url: str or None

        @param subsections: List of subsections.
        @type subsections: [SubSection]
        """
        self.position = position
        self.name = name
        self.url = url
        self.subsections = subsections


class SubSection(object):
    """
    Representation of a subsection in a section.
    """
    def __init__(self, position, name, url):
        """
        @param position: Integer position of the subsection in the subsection
            list. Starts at 1.
        @type position: int

        @param name: Name of the subsection.
        @type name: str

        @param url: URL of the subsection.
        @type url: str
        """
        self.position = position
        self.name = name
        self.url = url

    def __repr__(self):
        return self.name + ": " + self.url

class Unit(object):
    """
    Representation of a single unit of the course.
    """
    def __init__(self, videos, resources_urls):
        """
        @param videos: List of videos present in the unit.
        @type videos: [Video]

        @param resources_urls: List of additional resources that are come along
            with the unit. Resources include files with certain extensions
            and youtube links.
        @type resources_urls: [str]
        """
        self.videos = videos
        self.resources_urls = resources_urls


class Video(object):
    """
    Representation of a single video.
    """
    def __init__(self, video_youtube_url, available_subs_url,
                 sub_template_url, mp4_urls):
        """
        @param video_youtube_url: Youtube link (if any).
        @type video_youtube_url: str or None

        @param available_subs_url: URL to the available subtitles.
        @type available_subs_url: str

        @param sub_template_url: ???
        @type sub_template_url: str

        @param mp4_urls: List of URLs to mp4 video files.
        @type mp4_urls: [str]
        """
        self.video_youtube_url = video_youtube_url
        self.available_subs_url = available_subs_url
        self.sub_template_url = sub_template_url
        self.mp4_urls = mp4_urls


class ExitCode(object):
    """
    Class that contains all exit codes of the program.
    """
    OK = 0
    MISSING_CREDENTIALS = 1
    WRONG_EMAIL_OR_PASSWORD = 2
    MISSING_COURSE_URL = 3
    INVALID_COURSE_URL = 4
    UNKNOWN_PLATFORM = 5
    NO_DOWNLOADABLE_VIDEO = 6


YOUTUBE_DL_CMD = ['youtube-dl', '--ignore-config']
DEFAULT_CACHE_FILENAME = 'edx-dl.cache'
DEFAULT_FILE_FORMATS = ['e?ps', 'pdf', 'txt', 'doc', 'xls', 'ppt',
                        'docx', 'xlsx', 'pptx', 'odt', 'ods', 'odp', 'odg',
                        'zip', 'rar', 'gz', 'mp3', 'R', 'Rmd', 'ipynb', 'py']