app/Module/FixSearchAndReplace.php
<?php
/**
* webtrees: online genealogy
* Copyright (C) 2023 webtrees development team
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
declare(strict_types=1);
namespace Fisharebest\Webtrees\Module;
use Fisharebest\Webtrees\DB;
use Fisharebest\Webtrees\Family;
use Fisharebest\Webtrees\GedcomRecord;
use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException;
use Fisharebest\Webtrees\I18N;
use Fisharebest\Webtrees\Individual;
use Fisharebest\Webtrees\Location;
use Fisharebest\Webtrees\Media;
use Fisharebest\Webtrees\Note;
use Fisharebest\Webtrees\Repository;
use Fisharebest\Webtrees\Services\DataFixService;
use Fisharebest\Webtrees\Source;
use Fisharebest\Webtrees\Submitter;
use Fisharebest\Webtrees\Tree;
use Illuminate\Database\Query\Builder;
use Illuminate\Support\Collection;
use Throwable;
use function addcslashes;
use function asort;
use function preg_match;
use function preg_quote;
use function preg_replace;
use function view;
/**
* Class FixSearchAndReplace
*/
class FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface
{
use ModuleDataFixTrait;
// A regular expression that never matches.
private const INVALID_REGEX = '/(?!)/';
private DataFixService $data_fix_service;
/**
* @param DataFixService $data_fix_service
*/
public function __construct(DataFixService $data_fix_service)
{
$this->data_fix_service = $data_fix_service;
}
/**
* How should this module be identified in the control panel, etc.?
*
* @return string
*/
public function title(): string
{
/* I18N: Name of a module */
return I18N::translate('Search and replace');
}
/**
* A sentence describing what this module does.
*
* @return string
*/
public function description(): string
{
/* I18N: Description of a “Data fix” module */
return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.');
}
/**
* Options form.
*
* @param Tree $tree
*
* @return string
*/
public function fixOptions(Tree $tree): string
{
$methods = [
'exact' => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'),
'words' => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'),
'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'),
/* I18N: https://en.wikipedia.org/wiki/Regular_expression */
'regex' => I18N::translate('Regular expression'),
];
$types = [
Family::RECORD_TYPE => I18N::translate('Families'),
Individual::RECORD_TYPE => I18N::translate('Individuals'),
Location::RECORD_TYPE => I18N::translate('Locations'),
Media::RECORD_TYPE => I18N::translate('Media objects'),
Note::RECORD_TYPE => I18N::translate('Notes'),
Repository::RECORD_TYPE => I18N::translate('Repositories'),
Source::RECORD_TYPE => I18N::translate('Sources'),
Submitter::RECORD_TYPE => I18N::translate('Submitters'),
];
asort($types);
return view('modules/fix-search-and-replace/options', [
'default_method' => 'exact',
'default_type' => Individual::RECORD_TYPE,
'methods' => $methods,
'types' => $types,
]);
}
/**
* A list of all records that need examining. This may include records
* that do not need updating, if we can't detect this quickly using SQL.
*
* @param Tree $tree
* @param array<string,string> $params
*
* @return Collection<int,string>|null
*/
protected function familiesToFix(Tree $tree, array $params): Collection|null
{
if ($params['type'] !== Family::RECORD_TYPE || $params['search-for'] === '') {
return null;
}
$query = DB::table('families')->where('f_file', '=', $tree->id());
$this->recordQuery($query, 'f_gedcom', $params);
return $query->pluck('f_id');
}
/**
* A list of all records that need examining. This may include records
* that do not need updating, if we can't detect this quickly using SQL.
*
* @param Tree $tree
* @param array<string,string> $params
*
* @return Collection<int,string>|null
*/
protected function individualsToFix(Tree $tree, array $params): Collection|null
{
if ($params['type'] !== Individual::RECORD_TYPE || $params['search-for'] === '') {
return null;
}
$query = DB::table('individuals')
->where('i_file', '=', $tree->id());
$this->recordQuery($query, 'i_gedcom', $params);
return $query->pluck('i_id');
}
/**
* A list of all records that need examining. This may include records
* that do not need updating, if we can't detect this quickly using SQL.
*
* @param Tree $tree
* @param array<string,string> $params
*
* @return Collection<int,string>|null
*/
protected function locationsToFix(Tree $tree, array $params): Collection|null
{
if ($params['type'] !== Location::RECORD_TYPE || $params['search-for'] === '') {
return null;
}
$query = DB::table('other')
->where('o_file', '=', $tree->id())
->where('o_type', '=', Location::RECORD_TYPE);
$this->recordQuery($query, 'o_gedcom', $params);
return $query->pluck('o_id');
}
/**
* A list of all records that need examining. This may include records
* that do not need updating, if we can't detect this quickly using SQL.
*
* @param Tree $tree
* @param array<string,string> $params
*
* @return Collection<int,string>|null
*/
protected function mediaToFix(Tree $tree, array $params): Collection|null
{
if ($params['type'] !== Media::RECORD_TYPE || $params['search-for'] === '') {
return null;
}
$query = DB::table('media')
->where('m_file', '=', $tree->id());
$this->recordQuery($query, 'm_gedcom', $params);
return $query->pluck('m_id');
}
/**
* A list of all records that need examining. This may include records
* that do not need updating, if we can't detect this quickly using SQL.
*
* @param Tree $tree
* @param array<string,string> $params
*
* @return Collection<int,string>|null
*/
protected function notesToFix(Tree $tree, array $params): Collection|null
{
if ($params['type'] !== Note::RECORD_TYPE || $params['search-for'] === '') {
return null;
}
$query = DB::table('other')
->where('o_file', '=', $tree->id())
->where('o_type', '=', Note::RECORD_TYPE);
$this->recordQuery($query, 'o_gedcom', $params);
return $query->pluck('o_id');
}
/**
* A list of all records that need examining. This may include records
* that do not need updating, if we can't detect this quickly using SQL.
*
* @param Tree $tree
* @param array<string,string> $params
*
* @return Collection<int,string>|null
*/
protected function repositoriesToFix(Tree $tree, array $params): Collection|null
{
if ($params['type'] !== Repository::RECORD_TYPE || $params['search-for'] === '') {
return null;
}
$query = DB::table('other')
->where('o_file', '=', $tree->id())
->where('o_type', '=', Repository::RECORD_TYPE);
$this->recordQuery($query, 'o_gedcom', $params);
return $query->pluck('o_id');
}
/**
* A list of all records that need examining. This may include records
* that do not need updating, if we can't detect this quickly using SQL.
*
* @param Tree $tree
* @param array<string,string> $params
*
* @return Collection<int,string>|null
*/
protected function sourcesToFix(Tree $tree, array $params): Collection|null
{
if ($params['type'] !== Source::RECORD_TYPE || $params['search-for'] === '') {
return null;
}
$query = $this->sourcesToFixQuery($tree, $params);
$this->recordQuery($query, 's_gedcom', $params);
return $query->pluck('s_id');
}
/**
* A list of all records that need examining. This may include records
* that do not need updating, if we can't detect this quickly using SQL.
*
* @param Tree $tree
* @param array<string,string> $params
*
* @return Collection<int,string>|null
*/
protected function submittersToFix(Tree $tree, array $params): Collection|null
{
if ($params['type'] !== Submitter::RECORD_TYPE || $params['search-for'] === '') {
return null;
}
$query = $this->submittersToFixQuery($tree, $params);
$this->recordQuery($query, 'o_gedcom', $params);
return $query->pluck('o_id');
}
/**
* Does a record need updating?
*
* @param GedcomRecord $record
* @param array<string,string> $params
*
* @return bool
*/
public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool
{
return preg_match($this->createRegex($params), $record->gedcom()) === 1;
}
/**
* Show the changes we would make
*
* @param GedcomRecord $record
* @param array<string,string> $params
*
* @return string
*/
public function previewUpdate(GedcomRecord $record, array $params): string
{
$old = $record->gedcom();
$new = $this->updateGedcom($record, $params);
return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new);
}
/**
* Fix a record
*
* @param GedcomRecord $record
* @param array<string,string> $params
*
* @return void
*/
public function updateRecord(GedcomRecord $record, array $params): void
{
$record->updateRecord($this->updateGedcom($record, $params), false);
}
/**
* @param GedcomRecord $record
* @param array<string,string> $params
*
* @return string
*/
private function updateGedcom(GedcomRecord $record, array $params): string
{
// Allow "\n" to indicate a line-feed in replacement text.
// Back-references such as $1, $2 are handled automatically.
$replace = strtr($params['replace-with'], ['\n' => "\n"]);
$regex = $this->createRegex($params);
return preg_replace($regex, $replace, $record->gedcom());
}
/**
* Create a regular expression from the search pattern.
*
* @param array<string,string> $params
*
* @return string
*/
private function createRegex(array $params): string
{
$search = $params['search-for'];
$method = $params['method'];
$case = $params['case'];
switch ($method) {
case 'exact':
return '/' . preg_quote($search, '/') . '/u' . $case;
case 'words':
return '/\b' . preg_quote($search, '/') . '\b/u' . $case;
case 'wildcards':
return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/u' . $case;
case 'regex':
$regex = '/' . addcslashes($search, '/') . '/u' . $case;
try {
// A valid regex on an empty string returns zero.
// An invalid regex on an empty string returns false and throws a warning.
preg_match($regex, '');
} catch (Throwable) {
$regex = self::INVALID_REGEX;
}
return $regex;
}
throw new HttpNotFoundException();
}
/**
* Create a regular expression from the search pattern.
*
* @param Builder $query
* @param string $column
* @param array<string,string> $params
*
* @return void
*/
private function recordQuery(Builder $query, string $column, array $params): void
{
$search = $params['search-for'];
$method = $params['method'];
$like = '%' . addcslashes($search, '\\%_') . '%';
switch ($method) {
case 'exact':
case 'words':
$query->where($column, 'LIKE', $like);
break;
case 'wildcards':
$like = strtr($like, ['?' => '_', '*' => '%']);
$query->where($column, 'LIKE', $like);
break;
case 'regex':
// Substituting newlines seems to be necessary on *some* versions
// of MySQL (e.g. 5.7), and harmless on others (e.g. 8.0).
$search = strtr($search, ['\n' => "\n"]);
$query->where($column, DB::regexOperator(), $search);
break;
}
}
}