ahbeng/NUSMods

View on GitHub
scrapers/nus-v2/src/tasks/GetSemesterModules.ts

Summary

Maintainability
A
1 hr
Test Coverage
import { strict as assert } from 'assert';
import { flatten, partition } from 'lodash';

import { AcademicGrp, AcademicOrg, ModuleInfo } from '../types/api';
import { Semester } from '../types/modules';
import { Task } from '../types/tasks';
import { Cache } from '../types/persist';

import BaseTask from './BaseTask';
import config from '../config';
import { getTermCode, retry, containsNbsps } from '../utils/api';
import { TaskError, UnknownApiError } from '../utils/errors';
import { validateSemester } from '../services/validation';

interface Input {
  departments: AcademicOrg[];
  faculties: AcademicGrp[];
}

type Output = ModuleInfo[];

/**
 * Download modules info for all faculties in a specific semester
 */
export default class GetSemesterModules extends BaseTask implements Task<Input, Output> {
  semester: Semester;
  academicYear: string;

  modulesCache: Cache<Output>;

  get name() {
    return `Get modules info for semester ${this.semester}`;
  }

  constructor(semester: Semester, academicYear: string = config.academicYear) {
    assert(validateSemester(semester), `${semester} is not a valid semester`);

    super(academicYear);

    this.semester = semester;
    this.academicYear = academicYear;

    this.modulesCache = this.getCache<Output>(`semester-${semester}-modules`);

    this.logger = this.rootLogger.child({
      semester,
      task: GetSemesterModules.name,
      year: academicYear,
    });
  }

  async run(input: Input) {
    this.logger.info(`Getting modules for ${this.academicYear} semester ${this.semester}`);

    const term = getTermCode(this.semester, this.academicYear);

    // We make a new request for each department because the API will timeout if
    // we try to request for all of them in one shot
    const requests = input.departments.map(async (department) => {
      try {
        const getModules = () =>
          this.api.getDepartmentModules(term, department.AcademicOrganisation);
        const modules = await retry(getModules, 3, (error) => error instanceof UnknownApiError);

        // Only return modules which are visible in the system
        const [printed, hidden] = partition(
          modules,
          (module: ModuleInfo) => module.PrintCatalog === 'Y',
        );

        this.logger.debug('Downloaded %i modules from %s', printed.length, department.Description);
        if (hidden.length > 0) {
          this.logger.debug('Filtered out %i non-print modules', hidden.length);
        }

        printed.forEach(
          (module) =>
            !!containsNbsps(module.Description) &&
            this.logger.error(
              { moduleCode: `${module.Subject}${module.CatalogNumber}` },
              `${module.Subject}${module.CatalogNumber}: Module description contains non-breaking spaces`,
            ),
        );

        return printed;
      } catch (e) {
        this.logger.error(e, `Cannot get modules from ${department.Description}`);
        throw new TaskError(`Cannot get modules from ${department.Description}`, this, e);
      }
    });

    let modules: ModuleInfo[];
    try {
      modules = flatten<ModuleInfo>(await Promise.all(requests));
    } catch (e) {
      throw new TaskError('Cannot get module list', this, e);
    }

    this.logger.info(`Downloaded ${modules.length} modules in all`);

    // Cache module info to disk
    await this.modulesCache.write(modules);

    return modules;
  }
}