huridocs/uwazi

View on GitHub
app/api/suggestions/suggestions.ts

Summary

Maintainability
A
0 mins
Test Coverage
A
96%
/* eslint-disable max-lines */
import { ObjectId } from 'mongodb';

import { files } from 'api/files/files';
import { EnforcedWithId } from 'api/odm';
import settings from 'api/settings/settings';
import { IXSuggestionsModel } from 'api/suggestions/IXSuggestionsModel';
import templates from 'api/templates';
import { syncedPromiseLoop } from 'shared/data_utils/promiseUtils';
import {
  ExtractedMetadataSchema,
  LanguagesListSchema,
  ObjectIdSchema,
  PropertySchema,
} from 'shared/types/commonTypes';
import { FileType } from 'shared/types/fileType';
import {
  IXSuggestionAggregation,
  IXSuggestionsFilter,
  IXSuggestionsQuery,
  IXSuggestionType,
  SuggestionCustomFilter,
} from 'shared/types/suggestionType';
import { objectIndex } from 'shared/data_utils/objectIndex';
import {
  getSegmentedFilesIds,
  propertyTypeIsWithoutExtractedMetadata,
} from 'api/services/informationextraction/getFiles';
import { Extractors } from 'api/services/informationextraction/ixextractors';
import { registerEventListeners } from './eventListeners';
import {
  getCurrentValueStage,
  getEntityStage,
  getFileStage,
  getLabeledValueStage,
  getMatchStage,
} from './pipelineStages';
import { postProcessCurrentValues, updateStates } from './updateState';
import {
  AcceptedSuggestion,
  SuggestionAcceptanceError,
  updateEntitiesWithSuggestion,
} from './updateEntities';

const DEFAULT_LIMIT = 30;

const updateExtractedMetadata = async (
  suggestions: IXSuggestionType[],
  property: PropertySchema
) => {
  if (propertyTypeIsWithoutExtractedMetadata(property.type)) return;

  const fetchedFiles = await files.get({ _id: { $in: suggestions.map(s => s.fileId) } });
  const suggestionsByFileId = objectIndex(
    suggestions,
    s => s.fileId?.toString() || '',
    s => s
  );

  await syncedPromiseLoop(fetchedFiles, async (file: EnforcedWithId<FileType>) => {
    const suggestion = suggestionsByFileId[file._id.toString()];
    file.extractedMetadata = file.extractedMetadata ? file.extractedMetadata : [];

    const extractedMetadata = file.extractedMetadata.find(
      (em: any) => em.name === suggestion.propertyName
    ) as ExtractedMetadataSchema;

    if (!extractedMetadata) {
      file.extractedMetadata.push({
        name: suggestion.propertyName,
        timestamp: Date(),
        selection: {
          text: suggestion.suggestedText || suggestion.suggestedValue?.toString(),
          selectionRectangles: suggestion.selectionRectangles,
        },
      });
    } else {
      extractedMetadata.timestamp = Date();
      extractedMetadata.selection = {
        text: suggestion.suggestedText || suggestion.suggestedValue?.toString(),
        selectionRectangles: suggestion.selectionRectangles,
      };
    }

    await files.save(file);
  });
};

const buildListQuery = (
  extractorId: ObjectId,
  customFilter: SuggestionCustomFilter | undefined,
  setLanguages: LanguagesListSchema | undefined,
  options: { page?: IXSuggestionsQuery['page']; sort?: IXSuggestionsQuery['sort'] }
) => {
  const offset = options && options.page ? options.page.size * (options.page.number - 1) : 0;
  const limit = options.page?.size || DEFAULT_LIMIT;
  const { sort } = options;

  const sortOrder = sort?.order === 'desc' ? -1 : 1;
  const sorting = sort?.property ? { [sort.property]: sortOrder } : { date: 1, state: -1 };

  const pipeline = [
    ...getMatchStage(extractorId, customFilter),
    ...getEntityStage(setLanguages!),
    ...getCurrentValueStage(),
    {
      $addFields: {
        entityTitle: '$entity.title',
      },
    },
    { $sort: sorting },
    { $skip: offset },
    { $limit: limit },
    ...getFileStage(),
    ...getLabeledValueStage(),
    {
      $project: {
        entityId: '$entity._id',
        entityTemplateId: '$entity.template',
        sharedId: '$entity.sharedId',
        entityTitle: 1,
        fileId: 1,
        language: 1,
        propertyName: 1,
        extractorId: 1,
        suggestedValue: 1,
        segment: 1,
        currentValue: 1,
        state: 1,
        page: 1,
        date: 1,
        error: 1,
        labeledValue: 1,
        selectionRectangles: 1,
      },
    },
  ];
  return pipeline;
};

const readFilter = (filter: IXSuggestionsFilter) => {
  const { customFilter, extractorId: _extractorId } = filter;
  const extractorId = new ObjectId(_extractorId);
  return { customFilter, extractorId };
};

const postProcessSuggestions = async (_suggestions: any, extractorId: ObjectId) => {
  let suggestions = _suggestions;
  if (suggestions.length > 0) {
    const extractor = await Extractors.getById(extractorId);
    const propertyName = extractor?.property;
    const property = await templates.getPropertyByName(propertyName!);
    const propertyType = property.type;
    suggestions = postProcessCurrentValues(suggestions, propertyType);
  }
  return suggestions;
};

const propertyTypesWithAllLanguages = new Set(['numeric', 'date', 'select', 'multiselect']);

const needsAllLanguages = (propertyType: PropertySchema['type']) =>
  propertyTypesWithAllLanguages.has(propertyType);

const validTypesForPartialAcceptance = new Set(['multiselect', 'relationship']);

const typeIsValidForPartialAcceptance = (propertyType: string) =>
  validTypesForPartialAcceptance.has(propertyType);

const validatePartialAcceptanceTypeConstraint = (
  acceptedSuggestions: AcceptedSuggestion[],
  property: PropertySchema
) => {
  const addedValuesExist = acceptedSuggestions.some(s => s.addedValues);
  const removedValuesExist = acceptedSuggestions.some(s => s.removedValues);
  const partialAcceptanceTriggered = addedValuesExist || removedValuesExist;
  if (!typeIsValidForPartialAcceptance(property.type) && partialAcceptanceTriggered) {
    throw new SuggestionAcceptanceError(
      'Partial acceptance is only allowed for multiselects or relationships.'
    );
  }
};

const Suggestions = {
  getById: async (id: ObjectIdSchema) => IXSuggestionsModel.getById(id),
  getByEntityId: async (sharedId: string) => IXSuggestionsModel.get({ entityId: sharedId }),
  getByExtractor: async (extractorId: ObjectIdSchema) => IXSuggestionsModel.get({ extractorId }),

  get: async (
    filter: IXSuggestionsFilter,
    options: {
      page?: IXSuggestionsQuery['page'];
      sort?: IXSuggestionsQuery['sort'];
    }
  ) => {
    const { languages: setLanguages } = await settings.get();
    const { customFilter, extractorId } = readFilter(filter);

    const count = await IXSuggestionsModel.db
      .aggregate(getMatchStage(extractorId, customFilter, true))
      .then(result => (result?.length ? result[0].count : 0));

    let suggestions = await IXSuggestionsModel.db.aggregate(
      buildListQuery(extractorId, customFilter, setLanguages, options)
    );
    suggestions = await postProcessSuggestions(suggestions, extractorId);

    return {
      suggestions,
      totalPages: Math.ceil(count / (options.page?.size || DEFAULT_LIMIT)),
    };
  },

  aggregate: async (_extractorId: ObjectIdSchema): Promise<IXSuggestionAggregation> => {
    const extractorId = new ObjectId(_extractorId);

    const aggregations: (IXSuggestionAggregation & { _id: ObjectId })[] =
      await IXSuggestionsModel.db.aggregate([
        {
          $match: { extractorId },
        },
        {
          $group: {
            _id: null,
            total: { $sum: 1 },
            labeled: { $sum: { $cond: ['$state.labeled', 1, 0] } },
            nonLabeled: {
              $sum: {
                $cond: [
                  {
                    $and: [
                      { $ne: ['$state.labeled', undefined] },
                      { $ne: ['$state.labeled', null] },
                      { $not: '$state.labeled' },
                    ],
                  },
                  1,
                  0,
                ],
              },
            },
            match: { $sum: { $cond: ['$state.match', 1, 0] } },
            mismatch: {
              $sum: {
                $cond: [
                  {
                    $and: [
                      { $ne: ['$state.match', undefined] },
                      { $ne: ['$state.match', null] },
                      { $not: '$state.match' },
                    ],
                  },
                  1,
                  0,
                ],
              },
            },
            obsolete: { $sum: { $cond: ['$state.obsolete', 1, 0] } },
            error: { $sum: { $cond: ['$state.error', 1, 0] } },
          },
        },
      ]);

    const { _id, ...results } = aggregations[0] || {
      _id: null,
      total: 0,
      labeled: 0,
      nonLabeled: 0,
      match: 0,
      mismatch: 0,
      obsolete: 0,
      error: 0,
    };

    return results;
  },

  updateStates,

  setObsolete: async (query: any) =>
    IXSuggestionsModel.updateMany(query, { $set: { 'state.obsolete': true } }),

  markSuggestionsWithoutSegmentation: async (query: any) => {
    const segmentedFilesIds = await getSegmentedFilesIds();
    await IXSuggestionsModel.updateMany(
      {
        ...query,
        fileId: { $nin: segmentedFilesIds },
      },
      { $set: { 'state.error': true } }
    );
  },

  save: async (suggestion: IXSuggestionType) => Suggestions.saveMultiple([suggestion]),

  saveMultiple: async (_suggestions: IXSuggestionType[]) => {
    const toUpdate = await IXSuggestionsModel.saveMultiple(_suggestions);
    if (toUpdate.length > 0) await updateStates({ _id: { $in: toUpdate.map(s => s._id) } });
  },

  accept: async (acceptedSuggestions: AcceptedSuggestion[]) => {
    const acceptedIds = Array.from(new Set(acceptedSuggestions.map(s => s._id.toString())));
    const suggestions = await IXSuggestionsModel.get({ _id: { $in: acceptedIds } });
    const extractors = new Set(suggestions.map(s => s.extractorId.toString()));
    if (extractors.size > 1) {
      throw new Error('All suggestions must come from the same extractor');
    }
    const foundIds = new Set(suggestions.map(s => s._id.toString()));
    if (!acceptedIds.every(id => foundIds.has(id))) {
      throw new Error('Suggestion(s) not found.');
    }
    if (suggestions.some(s => s.error !== '')) {
      throw new Error('Some Suggestions have an error.');
    }

    const { propertyName } = suggestions[0];
    const property = await templates.getPropertyByName(propertyName);
    validatePartialAcceptanceTypeConstraint(acceptedSuggestions, property);
    const allLanguage = needsAllLanguages(property.type);

    await updateEntitiesWithSuggestion(allLanguage, acceptedSuggestions, suggestions, property);
    await updateExtractedMetadata(suggestions, property);
    await Suggestions.updateStates({ _id: { $in: acceptedIds.map(id => new ObjectId(id)) } });
  },

  deleteByEntityId: async (sharedId: string) => {
    await IXSuggestionsModel.delete({ entityId: sharedId });
  },

  delete: IXSuggestionsModel.delete.bind(IXSuggestionsModel),
  registerEventListeners,
};

export { Suggestions };