src/modules/core/airtable/fetchAirtableDataset.ts from UnlyEd/next-right-now

src/modules/core/airtable/fetchAirtableDataset.ts
Summary

Maintainability

0 mins
Test Coverage

Issues
import { createLogger } from '@/modules/core/logging/logger';
import map from 'lodash.map';
import size from 'lodash.size';
import hybridCache from '../vercelCache/hybridCache';
import fetchAirtableTable from './fetchAirtableTable';
import { AirtableDBTable } from './types/AirtableDBTable';
import { AirtableSchema } from './types/AirtableSchema';
import { FieldSchema } from './types/FieldSchema';
import { GenericAirtableRecordsListApiResponse } from './types/GenericAirtableRecordsListApiResponse';
import { RawAirtableRecordsSet } from './types/RawAirtableRecordsSet';
import { TableSchema } from './types/TableSchema';

const fileLabel = 'modules/core/airtable/fetchAirtableDataset.ts';
const logger = createLogger({
  fileLabel,
});

/**
 * When running on Vercel, wait some time after each API request to avoid running the next API request too fast
 * If we don't do that, we might reach their API rate limit (5 requests per 1 second) and get blocked.
 *
 * XXX Note that you should adapt VERCEL_DISK_CACHE_TTL based on this value.
 *  If the latency between each request is important to you in order to avoid reaching the limit (e.g: 1+ sec between 2 requests),
 *  then depending on how many tables you fetch you should make sure your TTL is high enough to avoid fetching multiple times because TTL has expired.
 *
 * XXX Despite the latency, the requests are executed multiple times anyway, because static pages are generated by batch of 3 pages at a time.
 *  This generates 3 Airtable API requests at the same time, and none of them benefit from the cache because they have the same latency (parallel execution).
 *  This is because Next.js generates pages by batch, so the first batch doesn't benefit from any caching mechanism at all.
 *  All other batches benefit from the cache though. It's still better than no caching,
 *  but it could be reduced even more by pre-fetching the Airtable API even before generating pages, and do only one API request per table.
 */
const FORCED_LATENCY_BETWEEN_AIRTABLE_API_REQUESTS = 2000; // In ms

/**
 * By default, the HybridCache would use 30sec TTL.
 * We override it to make sure all our Airtable API requests are only executed as less often as possible.
 */
const VERCEL_DISK_CACHE_TTL = 180; // In seconds

/**
 * Fetch all tables described in the schema.
 *
 * Promises are executed in parallel to fetch the whole dataset faster, useful when several tables are being fetched.
 * Although, a "preDelay" is applied to make sure not to run all queries at the same time, because of the 5 API request per second Airtable limit.
 *
 * XXX Running with a delay is still faster than running in series, but obviously slower than running all requests in parallel without delaying them.
 *  But there isn't a better way of doing things, as we must not hit the API rate limit or the app will completely crash during the initial build, or when preview mode is enabled.
 *
 * @param airtableSchema
 * @param localesOfLanguagesToFetch Locales/languages (e.g: 'en', 'en_gb') that should be fetched to resolve i18n fields.
 *  There is no point fetching more locales than those that will be used to resolve the best available translation during the Sanitization step.
 *  Whether you use locales or languages is up to you, as it depends how you name your Airtable fields.
 *  Tip: "Underscore" is recommended if using localized locales. (i.e: 'en_gb', not 'en-gb')
 */
export const fetchAirtableDataset = async (airtableSchema: AirtableSchema, localesOfLanguagesToFetch: string[]): Promise<RawAirtableRecordsSet[]> => {
  const promises: Promise<any>[] = [];
  const rawAirtableRecordsSets: RawAirtableRecordsSet[] = [];
  const tableSchemaKeys: AirtableDBTable[] = Object.keys(airtableSchema) as AirtableDBTable[];

  for (let i = 0; i < size(tableSchemaKeys); i++) {
    const tableName: AirtableDBTable = tableSchemaKeys[i];
    const tableSchema: TableSchema = airtableSchema[tableName];
    const tableNamePlural: string = tableSchema.plural ? tableSchema.plural : `${tableName}s`;
    const tableCacheKey = `${tableNamePlural}Table`;
    const filterByFormula: string = tableSchema.filterByFormula;
    const allowedFields: string[] = [];
    const preDelay: number = process.env.NODE_ENV !== 'development' ? i * FORCED_LATENCY_BETWEEN_AIRTABLE_API_REQUESTS : 0;

    // Compute the list of allowed fields that'll be returned by the Airtable API
    // Dynamically allow i18n fields (label => labelEN + labelFR) for all locales/languages necessary to build the current page
    map(tableSchema.fields, (fieldSchema: FieldSchema, fieldName: string) => {
      // Virtual fields aren't fetched (they may not exist on Airtable)
      if (!fieldSchema.isVirtual) {
        if (fieldSchema.isI18n) {
          // Fetch translations for all supported locales, not matter how many there are, because we'll need them all to
          map(localesOfLanguagesToFetch, (supportedLang: string) => allowedFields.push(`${fieldName}${supportedLang.toUpperCase()}`));
        } else {
          allowedFields.push(`${fieldName}`);
        }
      }
    });

    // eslint-disable-next-line no-console
    // console.debug(`(Promise) The table ${tableName} will be fetched in ${preDelay}ms.`);

    if (preDelay > (VERCEL_DISK_CACHE_TTL * 1000)) {
      // eslint-disable-next-line no-console
      console.warn(`[WARNING] Your Vercel cache TTL is lower than the Airtable API request delay for ${tableName} (delay: ${preDelay} > ${VERCEL_DISK_CACHE_TTL * 1000}. This will cause your API requests to be sent multiple times and is probably not what you want. You should increase your TTL value.`);
    }

    // Running all promises but don't await for them (we will await them all later to run them in parallel)
    promises.push(
      hybridCache(
        tableCacheKey,
        async () => await fetchAirtableTable(tableName, {
          fields: allowedFields,
          filterByFormula,
        }) as GenericAirtableRecordsListApiResponse,
        {
          enabled: !!process.env.IS_SERVER_INITIAL_BUILD && process.env.NODE_ENV !== 'development',
          storage: {
            type: 'disk',
            options: {
              filename: tableCacheKey,
            },
          },
          ttl: VERCEL_DISK_CACHE_TTL,

          // Force the parallel requests to run "in series" when executed on Vercel (but not in development)
          preDelay,
        },
      ),
    );
  }

  // Run all promises in parallel and compute results into the dataset
  const results: GenericAirtableRecordsListApiResponse[] = await Promise.all(promises);
  for (let i = 0; i < size(tableSchemaKeys); i++) {
    const tableName: AirtableDBTable = tableSchemaKeys[i];
    const { records } = results[i] as GenericAirtableRecordsListApiResponse;

    rawAirtableRecordsSets.push({
      records,
      __typename: tableName,
    });
  }

  return rawAirtableRecordsSets;
};

export default fetchAirtableDataset;