src/SimilarText.php
<?php
declare(strict_types=1);
/*
* This file is part of gpupo/similarity
* Created by Gilmar Pupo <contact@gpupo.com>
* For the information of copyright and license you should read the file
* LICENSE which is distributed with this source code.
* Para a informação dos direitos autorais e de licença você deve ler o arquivo
* LICENSE que é distribuído com este código-fonte.
* Para obtener la información de los derechos de autor y la licencia debe leer
* el archivo LICENSE que se distribuye con el código fuente.
* For more information, see <https://opensource.gpupo.com/>.
*
*/
namespace Gpupo\Similarity;
class SimilarText extends SimilarityAbstract
{
public function __toArray()
{
return array_merge(parent::__toArray(), [
'percentage' => $this->getPercent(),
'isApproximate' => $this->isApproximate(),
'proximityCalculation' => $this->getProximityCalculation(),
]);
}
public function hasSimilarity()
{
if ($this->getPercent() > $this->getAccuracy()) {
return true;
}
if ($this->isApproximate()) {
return true;
}
return false;
}
public function getPercent()
{
return $this->calculatePercentExtended(
$this->getInput()->getFirst(),
$this->getInput()->getSecond()
);
}
public function getLevenshteinDistance()
{
return levenshtein(
$this->getInput()->getFirst(),
$this->getInput()->getSecond(),
$this->getInput()->getCosts()->insertion,
$this->getInput()->getCosts()->replacement,
$this->getInput()->getCosts()->deletion
);
}
public function getLevenshteinHardDistance()
{
return levenshtein(
$this->getInput()->getFirst(),
$this->getInput()->getSecond()
);
}
public function getProximityCalculation()
{
$calc = [
'first' => $this->getInput()->getFirst(),
'second' => $this->getInput()->getSecond(),
];
$calc['limit'] = $this->getLimitOfProximity($calc['first'], $calc['second']);
$calc['ld'] = $this->getLevenshteinDistance();
$calc['hardDistance'] = $this->getLevenshteinHardDistance();
$calc['hardDifference'] = ($calc['hardDistance'] / $calc['limit']['hardDivider']) + 0.5;
if ($calc['hardDifference'] > $calc['ld']
&& $calc['hardDifference'] >= ($calc['limit']['maxDifference'] - 1)) {
$calc['mode'] = 'hard';
$calc['difference'] = $calc['hardDifference'];
} else {
$calc['mode'] = 'soft';
$calc['difference'] = $calc['ld'];
}
return $calc;
}
public function calculatePercent($stringA, $stringB)
{
$percent = 0;
similar_text($stringA, $stringB, $percent);
return $percent;
}
public function calculatePercentExtended($stringA, $stringB)
{
$a = [];
foreach ([
[$stringA, mb_strtolower($stringB)],
[mb_strtolower($stringA), mb_strtolower($stringB)],
[mb_strtolower($stringA), $stringB],
] as $item) {
$a[] = $this->calculatePercent($item[0], $item[1]);
}
return max($a);
}
protected function getLimitOfProximity($first, $second)
{
$calc = [
'chars' => \mb_strlen($first.$second),
'divider' => (20 - ($this->getAccuracy() / 10)),
'hardDivider' => (12 - floor(($this->getAccuracy() / 10))),
];
$calc['maxDifference'] = ($calc['chars'] / $calc['divider']);
return $calc;
}
}