GetDKAN/dkan

View on GitHub
modules/common/src/DatasetInfo.php

Summary

Maintainability
A
0 mins
Test Coverage
A
100%
<?php

declare(strict_types = 1);

namespace Drupal\common;

use Drupal\Core\DependencyInjection\ContainerInjectionInterface;
use Drupal\datastore\DatastoreService;
use Drupal\datastore\Service\Info\ImportInfo;
use Drupal\datastore\Service\ResourceLocalizer;
use Drupal\metastore\ResourceMapper;
use Drupal\metastore\Storage\DataFactory;
use Drupal\node\Entity\Node;
use Symfony\Component\DependencyInjection\ContainerInterface;

/**
 * Extract helpful information from a dataset identifier.
 *
 * @package Drupal\common
 */
class DatasetInfo implements ContainerInjectionInterface {

  /**
   * Metastore storage.
   *
   * @var \Drupal\metastore\Storage\Data
   */
  protected $storage;

  /**
   * Datastore.
   *
   * @var \Drupal\datastore\DatastoreService
   */
  protected $datastore;

  /**
   * Resource mapper.
   *
   * @var \Drupal\metastore\ResourceMapper
   */
  protected $resourceMapper;

  /**
   * Import info service.
   *
   * @var \Drupal\datastore\Service\Info\ImportInfo
   */
  protected $importInfo;

  /**
   * Set storage.
   *
   * @param \Drupal\metastore\Storage\DataFactory $dataFactory
   *   Metastore's data factory.
   */
  public function setStorage(DataFactory $dataFactory) {
    $this->storage = $dataFactory->getInstance('dataset');
  }

  /**
   * Set datastore.
   *
   * @param \Drupal\datastore\DatastoreService $datastore
   *   Datastore service.
   */
  public function setDatastore(DatastoreService $datastore) {
    $this->datastore = $datastore;
  }

  /**
   * Set the resource mapper.
   *
   * @param \Drupal\metastore\ResourceMapper $resourceMapper
   *   Resource mapper service.
   */
  public function setResourceMapper(ResourceMapper $resourceMapper) {
    $this->resourceMapper = $resourceMapper;
  }

  /**
   * Set the import info service.
   *
   * @param \Drupal\datastore\Service\Info\ImportInfo $importInfo
   *   Import info service.
   */
  public function setImportInfo(ImportInfo $importInfo) {
    $this->importInfo = $importInfo;
  }

  /**
   * Instantiates a new instance of this class.
   *
   * While the relevant services are each called conditionally, leaving none
   * needed here, this function must still be implemented.
   *
   * @param \Symfony\Component\DependencyInjection\ContainerInterface $container
   *   The service container this instance should use.
   *
   * @return static
   */
  public static function create(ContainerInterface $container) {
    return new static(
    );
  }

  /**
   * Gather info about a dataset from its identifier.
   *
   * @param string $uuid
   *   Dataset identifier.
   *
   * @return array
   *   Dataset information array.
   */
  public function gather(string $uuid) : array {
    if (!$this->storage) {
      $info['notice'] = 'The DKAN Metastore module is not enabled.';
      return $info;
    }

    $latest = $this->storage->getEntityLatestRevision($uuid);
    if (!$latest) {
      $info['notice'] = 'Not found';
      return $info;
    }
    $info['latest_revision'] = $this->getRevisionInfo($latest);

    $latestRevisionIsDraft = 'draft' === $latest->get('moderation_state')->getString();
    $published = $this->storage->getEntityPublishedRevision($uuid);
    if ($latestRevisionIsDraft && isset($published)) {
      $info['published_revision'] = $this->getRevisionInfo($published);
    }

    return $info;
  }

  /**
   * Get the distribution UUID for a dataset.
   *
   * Return the distribution UUID for the most recent published revision
   * of a dataset.
   *
   * @param string $dataset_uuid
   *   The uuid of a dataset.
   * @param string $index
   *   The index of the resource in the dataset array. Defaults to first.
   *
   * @return string
   *   The distribution UUID
   */
  public function getDistributionUuid(string $dataset_uuid, string $index = '0'): string {
    $dataset_info = $this->gather($dataset_uuid);

    if (!isset($dataset_info['latest_revision'])) {
      return '';
    }

    // Default to latest dataset revision.
    $datasetRevision = $dataset_info['latest_revision'];

    // Use the published dataset revision instead if present.
    if (isset($dataset_info['published_revision'])) {
      $datasetRevision = $dataset_info['published_revision'];
    }
    return $datasetRevision['distributions'][$index]['distribution_uuid'] ?? '';
  }

  /**
   * Get various information from a dataset node's specific revision.
   *
   * @param \Drupal\node\Entity\Node $node
   *   Dataset node.
   *
   * @return array
   *   Dataset node revision info.
   */
  protected function getRevisionInfo(Node $node) : array {

    $metadata = json_decode($node->get('field_json_metadata')->getString());

    return [
      'uuid' => $node->uuid(),
      'node_id' => $node->id(),
      'revision_id' => $node->getRevisionId(),
      'moderation_state' => $node->get('moderation_state')->getString(),
      'title' => $metadata->title ?? 'Not found',
      'modified_date_metadata' => $metadata->modified ?? 'Not found',
      'modified_date_dkan' => $metadata->{'%modified'} ?? 'Not found',
      'distributions' => $this->getDistributionsInfo($metadata),
    ];
  }

  /**
   * Get distributions info.
   *
   * @param object $metadata
   *   Dataset metadata object.
   *
   * @return array
   *   Distributions.
   */
  protected function getDistributionsInfo(\stdClass $metadata) : array {
    $distributions = [];

    if (!isset($metadata->{'%Ref:distribution'})) {
      return ['Not found'];
    }

    foreach ($metadata->{'%Ref:distribution'} as $distribution) {
      $distributions[] = $this->getResourcesInfo($distribution);
    }

    return $distributions;
  }

  /**
   * Get the storage object for a resource.
   *
   * @param string $identifier
   *   Resource identifier.
   * @param string $version
   *   Resource version timestamp.
   *
   * @return null|\Drupal\datastore\Storage\DatabaseTable
   *   The Database table object, or NULL.
   */
  protected function getStorage(string $identifier, string $version) {
    try {
      $storage = $this->datastore->getStorage($identifier, $version);
    }
    catch (\Exception $e) {
      $storage = NULL;
    }
    return $storage;
  }

  /**
   * Get resources information.
   *
   * @param object $distribution
   *   A distribution object extracted from dataset metadata.
   *
   * @return array
   *   Resources information.
   */
  protected function getResourcesInfo(\stdClass $distribution) : array {

    if (!isset($distribution->data->{'%Ref:downloadURL'})) {
      return ['No resource found'];
    }

    // A distribution's first resource, regardless of perspective or index,
    // should provide the information needed.
    $resource = array_shift($distribution->data->{'%Ref:downloadURL'});
    $identifier = $resource->data->identifier;
    $version = $resource->data->version;

    $info = $this->importInfo->getItem($identifier, $version);
    $fileMapper = $this->resourceMapper->get($identifier, ResourceLocalizer::LOCAL_FILE_PERSPECTIVE, $version);
    $source = $this->resourceMapper->get($identifier, DataResource::DEFAULT_SOURCE_PERSPECTIVE, $version);

    return [
      'distribution_uuid' => $distribution->identifier,
      'resource_id' => $identifier,
      'resource_version' => $version,
      'fetcher_status' => $info->fileFetcherStatus,
      'fetcher_percent_done' => $info->fileFetcherPercentDone ?? 0,
      'file_path' => isset($fileMapper) ? $fileMapper->getFilePath() : 'not found',
      'source_path' => isset($source) ? $source->getFilePath() : '',
      'importer_percent_done' => $info->importerPercentDone ?? 0,
      'importer_status' => $info->importerStatus,
      'importer_error' => $info->importerError,
      'table_name' => ($storage = $this->getStorage($identifier, $version)) ? $storage->getTableName() : 'not found',
    ];
  }

}