WikiEducationFoundation/WikiEduDashboard

View on GitHub
app/assets/javascripts/utils/mediawiki_revisions_utils.js

Summary

Maintainability
B
5 hrs
Test Coverage
B
87%
/* eslint-disable no-restricted-syntax */
/* eslint-disable no-await-in-loop */

import { flatten, chunk } from 'lodash-es';
import { stringify } from 'query-string';
import request from './request';
import { toWikiDomain } from './wiki_utils';
import { toDate } from './date_utils';
import { subDays, formatISO, subYears, isBefore } from 'date-fns';

// the MediaWiki API sends back revisions in pages
// except the last page, each page has a continue token
// that continue token must be included in the request to fetch the next page
// this helper function exists to fetch and merge all those pages into one
const fetchAll = async (API_URL, params, continue_str) => {
  const allData = [];
  let continueToken;
  let hasMore = true;
  while (hasMore) {
    let response;
    if (continueToken) {
      params[continue_str] = continueToken[continue_str];
      params.continue = continueToken.continue;
    }
    try {
      response = await request(`${API_URL}?${stringify(params)}&origin=*`);
      if (!response.ok) {
        throw response;
      }
    } catch (e) {
      return allData;
    }
    const json = await response.json();
    allData.push(...json.query.usercontribs);
    if (allData.length >= 1000) {
      // we have enough revisions - don't need to burden the API
      return allData;
    }
    if (json.continue) {
      continueToken = json.continue;
    } else {
      hasMore = false;
    }
  }
  return allData;
};

// filter is a function which is used to filter the fetched articles
// currently its used to filter tracked articles
export const fetchRevisionsFromUsers = async (course, users, days, last_date, filter) => {
  const usernames = users.map(user => user.username);

  let revisions = [];
  const wikiPromises = [];

  // request until we find 50 revisions or the date is outside the course duration
  // the last we fetch is up until 5 years ago
  let keepRunning = true;
  while (revisions.length < 50 && keepRunning) {
    for (const wiki of course.wikis) {
      wikiPromises.push(fetchRevisionsFromWiki(days, wiki, usernames, course.start, last_date));
    }
    const resolvedValues = await Promise.all(wikiPromises);
    for (const value of resolvedValues) {
      const { revisions: items, exitNext } = value;
      keepRunning = !exitNext;
      if (filter) {
        revisions.push(...(items.filter(filter)));
      } else {
        // if filter is not passed, simply add all the revisions
        revisions.push(...items);
      }
    }
    last_date = formatISO(subDays(toDate(last_date), days));
    if (revisions.length < 50) {
      // go back at most 2 years
      days = Math.min(365 * 2, 3 * days);
    }
  }
  // remove duplicates
  // they occur because dates overlap and sometimes the same revision is included twice
  revisions = [...new Map(revisions.map(v => [v.id, v])).values()];

  return { revisions, last_date };
};

// fetches revisions from a particular wiki
// days is the time period to fetch for. For example, if it is 7 days, and the last_date
// is today's date, it will look for revisions from the past week
const fetchAllRevisions = async (API_URL, days, usernames, wiki, course_start, last_date) => {
  let ucend;
  let exitNext = false;
  if (isBefore(subDays(toDate(last_date), days), toDate(course_start))) {
    ucend = formatISO(toDate(course_start));
    exitNext = true;
  } else if (isBefore(subDays(toDate(last_date), days), subYears(new Date(), 5))) {
    ucend = formatISO(subYears(new Date(), 5));
    exitNext = true;
  } else {
    ucend = formatISO(subDays(toDate(last_date), days));
  }

  // since a max of 50 users are allowed in one query
  const usernamesChunks = chunk(usernames, 50);
  const usernamePromises = [];

  for (const usernameChunk of usernamesChunks) {
    const params = {
      action: 'query',
      format: 'json',
      list: 'usercontribs',
      ucuser: usernameChunk.join('|'),
      ucprop: 'ids|title|sizediff|timestamp',
      uclimit: 50,
      ucend,
      ucstart: formatISO(toDate(last_date)),
      ucdir: 'older',
    };
    usernamePromises.push(fetchAll(API_URL, params, 'uccontinue'));
  }
  const values = await Promise.all(usernamePromises);
  const revisions = flatten(values);
  return { revisions, exitNext };
};

// wrapper function around fetchAllRevisions. This gets all the revisions returned by that function
// adds some properties we require on the front end, like url, article_url, etc
const fetchRevisionsFromWiki = async (days, wiki, usernames, course_start, last_date) => {
  const prefix = `https://${toWikiDomain(wiki)}`;
  const API_URL = `${prefix}/w/api.php`;
  const { revisions, exitNext } = await fetchAllRevisions(API_URL, days, usernames, wiki, course_start, last_date);
  for (const revision of revisions) {
    revision.wiki = wiki;
    const diff_params = {
      title: revision.title,
      diff: 'prev',
      oldid: revision.parentid
    };

    // url for the diff
    revision.url = `${prefix}/w/index.php?${stringify(diff_params)}`;

    // main article url - we use stringify to ensure that its encoded properly
    revision.article_url = `${prefix}/wiki/${encodeURIComponent(revision.title)}`;

    // to maintain the old structure of the revision object
    revision.characters = revision.sizediff;
    revision.mw_rev_id = revision.revid;
    revision.id = revision.revid;
    revision.revisor = revision.user;
    revision.date = revision.timestamp;
    revision.mw_page_id = revision.pageid;
  }
  return { revisions, wiki, exitNext };
};