reliqarts/laravel-scavenger

View on GitHub
src/Service/Scanner.php

Summary

Maintainability
A
2 hrs
Test Coverage
<?php

declare(strict_types=1);

namespace ReliqArts\Scavenger\Service;

class Scanner
{
    /**
     * List of words (regex) we don't want in our scraps.'.
     *
     * @var array
     */
    protected array $badWords = [];

    /**
     * Scanner constructor.
     */
    public function __construct(array $badWords = [])
    {
        $this->badWords = $badWords;
    }

    /**
     * Determine whether a scrap data has bad words and therefore is unwanted.
     *
     * @param array $badWords List of words (regex) we don't want in our scraps.'.
     */
    public function hasBadWords(array $scrapData, array $badWords = []): bool
    {
        $invalid = false;
        $badWords = array_merge($this->badWords, $badWords);

        if (count($badWords)) {
            $badWordsRegex = sprintf('/(%s)/i', implode(')|(', $badWords));

            // check for bad words
            foreach ($scrapData as $attr => $value) {
                if (!ConfigProvider::isSpecialKey($attr) && preg_match($badWordsRegex, $value)) {
                    $invalid = true;

                    break;
                }
            }
        }

        return $invalid;
    }

    /**
     * Scour a string and pluck details.
     *
     * @param string $string the string to be scoured
     * @param array  $map    map to use for detail scouring
     * @param bool   $retain whether to leave match in source string
     */
    public static function pluckDetails(string &$string, array $map = [], bool $retain = false): array
    {
        $details = [];

        // Pluck mapped details from string
        foreach ($map as $attr => $regex) {
            // match and replace details in string
            $string = preg_replace_callback($regex, static function ($m) use ($attr, &$details, $retain) {
                // grab match
                $match = trim($m[0]);
                $details[$attr] = $match;
                // return match if it should be left in string
                if ($retain) {
                    return $match;
                }
                // @noinspection PhpInconsistentReturnPointsInspection
            }, $string);
        }

        return $details;
    }

    /**
     * Searches array for needles. The first one found is returned.
     * If needles aren't supplied the first non-empty item in array is returned.
     *
     * @param array $haystack array to search
     * @param array $needles  optional list of items to check for
     */
    public static function firstNonEmpty(array $haystack, array $needles = [])
    {
        if (!empty($needles)) {
            foreach ($needles as $value) {
                if (!empty($haystack[$value])) {
                    return $haystack[$value];
                }
            }
        } else {
            foreach ($haystack as $value) {
                if (!empty($value)) {
                    return $value;
                }
            }
        }

        return null;
    }

    /**
     * Convert <br/> to newlines.
     */
    public static function br2nl(string $text): string
    {
        return preg_replace('/<br[\\/]?>/', "\n", $text);
    }

    public static function cleanText(string $text): string
    {
        return self::removeReturnsAndTabs(strip_tags($text));
    }

    /**
     * Remove tabs and newlines from text.
     */
    private static function removeReturnsAndTabs(string $text): string
    {
        $text = preg_replace('/\\s{2,}/', ' ', preg_replace("/[\r\n\t]+/", ' ', $text));

        return str_replace(' / ', '', $text);
    }
}