owncloud/core

View on GitHub
lib/private/Repair/RepairMismatchFileCachePath.php

Summary

Maintainability
D
1 day
Test Coverage
<?php
/**
 * @author Vincent Petry <pvince81@owncloud.com>
 *
 * @copyright Copyright (c) 2018, ownCloud GmbH
 * @license AGPL-3.0
 *
 * This code is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License, version 3,
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License, version 3,
 * along with this program.  If not, see <http://www.gnu.org/licenses/>
 *
 */

namespace OC\Repair;

use OCP\IConfig;
use OCP\ILogger;
use OCP\Migration\IOutput;
use OCP\Migration\IRepairStep;
use Doctrine\DBAL\Platforms\MySqlPlatform;
use Doctrine\DBAL\Platforms\OraclePlatform;
use OCP\Files\IMimeTypeLoader;
use OCP\IDBConnection;
use Doctrine\DBAL\Exception\UniqueConstraintViolationException;

/**
 * Repairs file cache entry which path do not match the parent-child relationship
 */
class RepairMismatchFileCachePath implements IRepairStep {
    public const CHUNK_SIZE = 10000;

    /** @var IDBConnection */
    protected $connection;

    /** @var IMimeTypeLoader */
    protected $mimeLoader;

    /** @var int */
    protected $dirMimeTypeId;

    /** @var int */
    protected $dirMimePartId;

    /** @var int|null */
    protected $storageNumericId = null;

    /** @var bool */
    protected $countOnly = true;

    /** @var ILogger  */
    protected $logger;

    /** @var IConfig */
    protected $config;

    /**
     * @param \OCP\IDBConnection $connection
     */
    public function __construct(
        IDBConnection $connection,
        IMimeTypeLoader $mimeLoader,
        ILogger $logger,
        IConfig $config
    ) {
        $this->connection = $connection;
        $this->mimeLoader = $mimeLoader;
        $this->logger = $logger;
        $this->config = $config;
    }

    public function getName() {
        if ($this->countOnly) {
            return 'Detect file cache entries with path that does not match parent-child relationships';
        } else {
            return 'Repair file cache entries with path that does not match parent-child relationships';
        }
    }

    /**
     * Sets the numeric id of the storage to process or null to process all.
     *
     * @param int $storageNumericId numeric id of the storage
     */
    public function setStorageNumericId($storageNumericId) {
        $this->storageNumericId = $storageNumericId;
    }

    /**
     * Sets whether to actually repair or only count entries
     *
     * @param bool $countOnly count only
     */
    public function setCountOnly($countOnly) {
        $this->countOnly = $countOnly;
    }

    /**
     * Fixes the broken entry's path.
     *
     * @param IOutput $out repair output
     * @param int $fileId file id of the entry to fix
     * @param string $wrongPath wrong path of the entry to fix
     * @param int $correctStorageNumericId numeric idea of the correct storage
     * @param string $correctPath value to which to set the path of the entry
     */
    private function fixEntryPath(IOutput $out, $fileId, $wrongPath, $correctStorageNumericId, $correctPath) {
        // delete target if exists
        $qb = $this->connection->getQueryBuilder();
        $qb->delete('filecache')
            ->where($qb->expr()->eq('storage', $qb->createNamedParameter($correctStorageNumericId)));

        if ($correctPath === '' && $this->connection->getDatabasePlatform() instanceof OraclePlatform) {
            $qb->andWhere($qb->expr()->isNull('path'));
        } else {
            $qb->andWhere($qb->expr()->eq('path_hash', $qb->createNamedParameter(\md5($correctPath))));
        }
        $entryExisted = $qb->execute() > 0;

        $qb = $this->connection->getQueryBuilder();
        $qb->update('filecache')
            ->set('path', $qb->createNamedParameter($correctPath))
            ->set('path_hash', $qb->createNamedParameter(\md5($correctPath)))
            ->set('storage', $qb->createNamedParameter($correctStorageNumericId))
            ->where($qb->expr()->eq('fileid', $qb->createNamedParameter($fileId)));
        $qb->execute();

        $text = "Fixed file cache entry with fileid $fileId, set wrong path \"$wrongPath\" to \"$correctPath\"";
        if ($entryExisted) {
            $text = " (replaced an existing entry)";
        }
        $out->advance(1, $text);
    }

    private function addQueryConditionsParentIdWrongPath($qb) {
        // thanks, VicDeo!
        if ($this->connection->getDatabasePlatform() instanceof MySqlPlatform) {
            $concatFunction = $qb->createFunction("CONCAT(fcp.path, '/', fc.name)");
        } else {
            $concatFunction = $qb->createFunction("(fcp.`path` || '/' || fc.`name`)");
        }

        if ($this->connection->getDatabasePlatform() instanceof OraclePlatform) {
            $emptyPathExpr = $qb->expr()->isNotNull('fcp.path');
        } else {
            $emptyPathExpr = $qb->expr()->neq('fcp.path', $qb->expr()->literal(''));
        }

        $qb
            ->from('filecache', 'fc')
            ->from('filecache', 'fcp')
            ->where($qb->expr()->eq('fc.parent', 'fcp.fileid'))
            ->andWhere(
                $qb->expr()->orX(
                    $qb->expr()->neq(
                        $qb->createFunction($concatFunction),
                        'fc.path'
                    ),
                    $qb->expr()->neq('fc.storage', 'fcp.storage')
                )
            )
            ->andWhere($emptyPathExpr)
            // yes, this was observed in the wild...
            ->andWhere($qb->expr()->neq('fc.fileid', 'fcp.fileid'));

        if ($this->storageNumericId !== null) {
            // use the target storage of the failed move when filtering
            $qb->andWhere(
                $qb->expr()->eq('fc.storage', $qb->createNamedParameter($this->storageNumericId))
            );
        }
    }

    private function addQueryConditionsNonExistingParentIdEntry($qb, $storageNumericId = null) {
        // Subquery for parent existence
        $qbe = $this->connection->getQueryBuilder();
        $qbe->select($qbe->expr()->literal('1'))
            ->from('filecache', 'fce')
            ->where($qbe->expr()->eq('fce.fileid', 'fc.parent'));

        // Find entries to repair
        // select fc.storage,fc.fileid,fc.parent as "wrongparent",fc.path,fc.etag
        // and not exists (select 1 from oc_filecache fc2 where fc2.fileid = fc.parent)
        $qb->select('storage', 'fileid', 'path', 'parent')
            // from oc_filecache fc
            ->from('filecache', 'fc')
            // where fc.parent <> -1
            ->where($qb->expr()->neq('fc.parent', $qb->createNamedParameter(-1)))
            // and not exists (select 1 from oc_filecache fc2 where fc2.fileid = fc.parent)
            ->andWhere(
                $qb->expr()->orX(
                    $qb->expr()->eq('fc.fileid', 'fc.parent'),
                    $qb->createFunction('NOT EXISTS (' . $qbe->getSQL() . ')')
                )
            );

        if ($storageNumericId !== null) {
            // filter on destination storage of a failed move
            $qb->andWhere($qb->expr()->eq('fc.storage', $qb->createNamedParameter($storageNumericId)));
        }
    }

    private function countResultsToProcessParentIdWrongPath() {
        $qb = $this->connection->getQueryBuilder();
        $qb->select($qb->createFunction('COUNT(*)'));
        $this->addQueryConditionsParentIdWrongPath($qb);
        $results = $qb->execute();
        $count = $results->fetchColumn(0);
        $results->closeCursor();
        return $count;
    }

    private function countResultsToProcessNonExistingParentIdEntry($storageNumericId = null) {
        $qb = $this->connection->getQueryBuilder();
        $qb->select($qb->createFunction('COUNT(*)'));
        $this->addQueryConditionsNonExistingParentIdEntry($qb, $storageNumericId);
        $results = $qb->execute();
        $count = $results->fetchColumn(0);
        $results->closeCursor();
        return $count;
    }

    /**
     * Outputs a report about storages with wrong path that need repairing in the file cache
     */
    private function reportAffectedStoragesParentIdWrongPath(IOutput $out) {
        $qb = $this->connection->getQueryBuilder();
        $qb->selectDistinct('fc.storage');
        $this->addQueryConditionsParentIdWrongPath($qb);

        // TODO: max results + paginate ?
        // TODO: join with oc_storages / oc_mounts to deliver user id ?

        $results = $qb->execute();
        $rows = $results->fetchAll();
        $results->closeCursor();

        $storageIds = [];
        foreach ($rows as $row) {
            $storageIds[$row['storage']] = true;
        }

        $storageIds = \array_keys($storageIds);
        if (!empty($storageIds)) {
            $out->warning('The file cache contains entries with invalid path values for the following storage numeric ids: ' . \implode(' ', $storageIds));
            $out->warning('Please run `occ files:scan --all --repair` to repair'
            .'all affected storages or run `occ files:scan userid --repair for '
            .'each user with affected storages');
        }
    }

    /**
     * Outputs a report about storages with non existing parents that need repairing in the file cache
     */
    private function reportAffectedStoragesNonExistingParentIdEntry(IOutput $out) {
        $qb = $this->connection->getQueryBuilder();
        $qb->selectDistinct('fc.storage');
        $this->addQueryConditionsNonExistingParentIdEntry($qb);

        // TODO: max results + paginate ?
        // TODO: join with oc_storages / oc_mounts to deliver user id ?

        $results = $qb->execute();
        $rows = $results->fetchAll();
        $results->closeCursor();

        $storageIds = [];
        foreach ($rows as $row) {
            $storageIds[$row['storage']] = true;
        }

        $storageIds = \array_keys($storageIds);
        if (!empty($storageIds)) {
            $out->warning('The file cache contains entries where the parent id does not point to any existing entry for the following storage numeric ids: ' . \implode(' ', $storageIds));
            $out->warning('Please run `occ files:scan --all --repair` to repair all affected storages');
        }
    }

    /**
     * Repair all entries for which the parent entry exists but the path
     * value doesn't match the parent's path.
     *
     * @param IOutput $out
     * @param int|null $storageNumericId storage to fix or null for all
     * @return int[] storage numeric ids that were targets to a move and needs further fixing
     */
    private function fixEntriesWithCorrectParentIdButWrongPath(IOutput $out, $storageNumericId = null) {
        $totalResultsCount = 0;
        $affectedStorages = [$storageNumericId => true];

        // find all entries where the path entry doesn't match the path value that would
        // be expected when following the parent-child relationship, basically
        // concatenating the parent's "path" value with the name of the child
        $qb = $this->connection->getQueryBuilder();
        $qb->select('fc.storage', 'fc.fileid', 'fc.name')
            ->selectAlias('fc.path', 'path')
            ->selectAlias('fc.parent', 'wrongparentid')
            ->selectAlias('fcp.storage', 'parentstorage')
            ->selectAlias('fcp.path', 'parentpath');
        $this->addQueryConditionsParentIdWrongPath($qb);
        $qb->setMaxResults(self::CHUNK_SIZE);

        do {
            $results = $qb->execute();
            // since we're going to operate on fetched entry, better cache them
            // to avoid DB lock ups
            $rows = $results->fetchAll();
            $results->closeCursor();

            $this->connection->beginTransaction();
            $lastResultsCount = 0;
            foreach ($rows as $row) {
                $wrongPath = $row['path'];
                $correctPath = $row['parentpath'] . '/' . $row['name'];
                // make sure the target is on a different subtree
                if (\substr($correctPath, 0, \strlen($wrongPath)) === $wrongPath) {
                    // the path based parent entry is referencing one of its own children,
                    // fix the entry's parent id instead
                    // note: fixEntryParent cannot fail to find the parent entry by path
                    // here because the reason we reached this code is because we already
                    // found it
                    $this->fixEntryParent(
                        $out,
                        $row['storage'],
                        $row['fileid'],
                        $row['path'],
                        $row['wrongparentid'],
                        true
                    );
                } else {
                    $this->fixEntryPath(
                        $out,
                        $row['fileid'],
                        $wrongPath,
                        $row['parentstorage'],
                        $correctPath
                    );
                    // we also need to fix the target storage
                    $affectedStorages[$row['parentstorage']] = true;
                }
                $lastResultsCount++;
            }
            $this->connection->commit();

            $totalResultsCount += $lastResultsCount;

            // note: this is not pagination but repeating the query over and over again
            // until all possible entries were fixed
        } while ($lastResultsCount > 0);

        if ($totalResultsCount > 0) {
            $out->info("Fixed $totalResultsCount file cache entries with wrong path");
        }

        return \array_keys($affectedStorages);
    }

    /**
     * Gets the file id of the entry. If none exists, create it
     * up to the root if needed.
     *
     * @param int $storageId storage id
     * @param string $path path for which to create the parent entry
     * @return int file id of the newly created parent
     */
    private function getOrCreateEntry($storageId, $path, $reuseFileId = null) {
        if ($path === '.') {
            $path = '';
        }
        // find the correct parent
        $qb = $this->connection->getQueryBuilder();
        // select fileid as "correctparentid"
        $qb->select('fileid')
            // from oc_filecache
            ->from('filecache')
            // where storage=$storage and path='$parentPath'
            ->where($qb->expr()->eq('storage', $qb->createNamedParameter($storageId)));

        if ($path === '' && $this->connection->getDatabasePlatform() instanceof OraclePlatform) {
            $qb->andWhere($qb->expr()->isNull('path'));
        } else {
            $qb->andWhere($qb->expr()->eq('path_hash', $qb->createNamedParameter(\md5($path))));
        }
        $results = $qb->execute();
        $rows = $results->fetchAll();
        $results->closeCursor();

        if (!empty($rows)) {
            return $rows[0]['fileid'];
        }

        if ($path !== '') {
            $parentId = $this->getOrCreateEntry($storageId, \dirname($path));
        } else {
            // root entry missing, create it
            $parentId = -1;
        }

        $qb = $this->connection->getQueryBuilder();
        $values = [
            'storage' => $qb->createNamedParameter($storageId),
            'path' => $qb->createNamedParameter($path),
            'path_hash' => $qb->createNamedParameter(\md5($path)),
            'name' => $qb->createNamedParameter(\basename($path)),
            'parent' => $qb->createNamedParameter($parentId),
            'size' => $qb->createNamedParameter(-1),
            'etag' => $qb->createNamedParameter('zombie'),
            'mimetype' => $qb->createNamedParameter($this->dirMimeTypeId),
            'mimepart' => $qb->createNamedParameter($this->dirMimePartId),
        ];

        if ($reuseFileId !== null) {
            // purpose of reusing the fileid of the parent is to salvage potential
            // metadata that might have previously been linked to this file id
            $values['fileid'] = $qb->createNamedParameter($reuseFileId);
        }
        $qb->insert('filecache')->values($values);
        try {
            $qb->execute();
        } catch (UniqueConstraintViolationException $e) {
            // This situation should no happen - need debugging information if it does
            \OC::$server->getLogger()->logException($e);
            \OC::$server->getLogger()->error("Filecache repair step tried to insert row that already existed with fileid: {$values['fileid']}");
            // Skip if the entry already exists
        }

        // If we reused the fileid then this is the id to return
        if ($reuseFileId !== null) {
            // with Oracle, the trigger gets in the way and does not let us specify
            // a fileid value on insert
            if ($this->connection->getDatabasePlatform() instanceof OraclePlatform) {
                $lastFileId = $this->connection->lastInsertId('*PREFIX*filecache');
                if ($reuseFileId !== $lastFileId) {
                    // use update to set it directly
                    $qb = $this->connection->getQueryBuilder();
                    $qb->update('filecache')
                        ->set('fileid', $qb->createNamedParameter($reuseFileId))
                        ->where($qb->expr()->eq('fileid', $qb->createNamedParameter($lastFileId)));
                    $qb->execute();
                }
            }

            return $reuseFileId;
        } else {
            // Else we inserted a new row with auto generated id, use that
            return $this->connection->lastInsertId('*PREFIX*filecache');
        }
    }

    /**
     * Fixes the broken entry's path.
     *
     * @param IOutput $out repair output
     * @param int $storageId storage id of the entry to fix
     * @param int $fileId file id of the entry to fix
     * @param string $path path from the entry to fix
     * @param int $wrongParentId wrong parent id
     * @param bool $parentIdExists true if the entry from the $wrongParentId exists (but is the wrong one),
     * false if it doesn't
     * @return bool true if the entry was fixed, false otherwise
     */
    private function fixEntryParent(IOutput $out, $storageId, $fileId, $path, $wrongParentId, $parentIdExists = false) {
        if (!$parentIdExists) {
            // if the parent doesn't exist, let us reuse its id in case there is metadata to salvage
            $correctParentId = $this->getOrCreateEntry($storageId, \dirname($path), $wrongParentId);
        } else {
            // parent exists and is the wrong one, so recreating would need a new fileid
            $correctParentId = $this->getOrCreateEntry($storageId, \dirname($path));
        }

        $this->connection->beginTransaction();

        $qb = $this->connection->getQueryBuilder();
        $qb->update('filecache')
            ->set('parent', $qb->createNamedParameter($correctParentId))
            ->where($qb->expr()->eq('fileid', $qb->createNamedParameter($fileId)));
        $qb->execute();

        $text = "Fixed file cache entry with fileid $fileId, set wrong parent \"$wrongParentId\" to \"$correctParentId\"";
        $out->advance(1, $text);

        $this->connection->commit();

        return true;
    }

    /**
     * Repair entries where the parent id doesn't point to any existing entry
     * by finding the actual parent entry matching the entry's path dirname.
     *
     * @param IOutput $out output
     * @param int|null $storageNumericId storage to fix or null for all
     * @return int number of results that were fixed
     */
    private function fixEntriesWithNonExistingParentIdEntry(IOutput $out, $storageNumericId = null) {
        $qb = $this->connection->getQueryBuilder();
        $this->addQueryConditionsNonExistingParentIdEntry($qb, $storageNumericId);
        $qb->setMaxResults(self::CHUNK_SIZE);

        $totalResultsCount = 0;
        do {
            $results = $qb->execute();
            // since we're going to operate on fetched entry, better cache them
            // to avoid DB lock ups
            $rows = $results->fetchAll();
            $results->closeCursor();

            $lastResultsCount = 0;
            foreach ($rows as $row) {
                $this->fixEntryParent(
                    $out,
                    $row['storage'],
                    $row['fileid'],
                    $row['path'],
                    $row['parent'],
                    // in general the parent doesn't exist except
                    // for the one condition where parent=fileid
                    $row['parent'] === $row['fileid']
                );
                $lastResultsCount++;
            }

            $totalResultsCount += $lastResultsCount;

            // note: this is not pagination but repeating the query over and over again
            // until all possible entries were fixed
        } while ($lastResultsCount > 0);

        if ($totalResultsCount > 0) {
            $out->info("Fixed $totalResultsCount file cache entries with wrong path");
        }

        return $totalResultsCount;
    }

    /**
     * The purpose of this function is to let execute the run method
     * irrespective of version. For example when triggered from files:scan
     * this repair step shouldn't be blocked.
     *
     * @param IOutput $out
     */
    public function doRepair(IOutput $out) {
        $this->dirMimeTypeId = $this->mimeLoader->getId('httpd/unix-directory');
        $this->dirMimePartId = $this->mimeLoader->getId('httpd');

        if ($this->countOnly) {
            $this->reportAffectedStoragesParentIdWrongPath($out);
            $this->reportAffectedStoragesNonExistingParentIdEntry($out);
        } else {
            $brokenPathEntries = $this->countResultsToProcessParentIdWrongPath();
            $brokenParentIdEntries = $this->countResultsToProcessNonExistingParentIdEntry($this->storageNumericId);
            $out->startProgress($brokenPathEntries + $brokenParentIdEntries);

            /*
             * This repair itself might overwrite existing target parent entries and create
             * orphans where the parent entry of the parent id doesn't exist but the path matches.
             * This needs to be repaired by fixEntriesWithNonExistingParentIdEntry(), this is why
             * we need to keep this specific order of repair.
             */
            $affectedStorages = $this->fixEntriesWithCorrectParentIdButWrongPath($out, $this->storageNumericId);

            if ($this->storageNumericId !== null) {
                foreach ($affectedStorages as $storageNumericId) {
                    $this->fixEntriesWithNonExistingParentIdEntry($out, $storageNumericId);
                }
            } else {
                // just fix all
                $this->fixEntriesWithNonExistingParentIdEntry($out);
            }
            $out->finishProgress();
            $out->info('');
        }
    }

    /**
     * Run the repair step
     *
     * @param IOutput $out output
     */
    public function run(IOutput $out) {
        $currentVersion = $this->config->getSystemValue('version', '0.0.0');
        $versionCompareStatus = \version_compare($currentVersion, '10.0.4', '<');
        //Execute repair step if version is less than 10.0.4 during upgrade
        //This is not applicable when called from file scan command
        if ($versionCompareStatus) {
            $this->doRepair($out);
        }
    }
}