cmgmyr/title-formatter

View on GitHub
src/TitleFormatter.php

Summary

Maintainability
A
25 mins
Test Coverage
<?php

namespace Cmgmyr\TitleFormatter;

class TitleFormatter
{
    /**
     * The title that we need to format and return.
     */
    protected ?string $title = null;

    /**
     * The separator character for in between words.
     */
    protected string $separator = ' ';

    /**
     * Encoding used for mb_ functions.
     */
    protected string $encoding = 'UTF-8';

    /**
     * Collection of words generated from the original title.
     */
    protected array $indexedWords = [];

    /**
     * Words that should be ignored from capitalization.
     */
    protected array $ignoredWords = [
        'a',
        'an',
        'and',
        'as',
        'at',
        'but',
        'by',
        'en',
        'for',
        'if',
        'in',
        'is',
        'of',
        'on',
        'or',
        'to',
        'the',
        'vs',
        'via',
    ];

    private function __construct(?string $title = null, string $separator = ' ')
    {
        $this->setTitle($title);
        $this->separator = $separator;
    }

    /**
     * Converts the initial title to a correctly formatted one.
     */
    public function convertTitle(): string
    {
        if ($this->title === null) {
            return '';
        }

        foreach ($this->splitWords() as $index => $word) {
            if ($this->wordShouldBeUppercase($index, $word)) {
                $this->rebuildTitle($index, $this->uppercaseWord($word));
            }
        }

        return $this->title ?? '';
    }

    /**
     * Returns the newly formatted title.
     */
    public static function titleCase(?string $title = null, string $separator = ' '): string
    {
        return (new self($title, $separator))->convertTitle();
    }

    /**
     * Sets the title after cleaning up extra spaces.
     */
    protected function setTitle(?string $title = null): void
    {
        $title = trim(preg_replace('/\s\s+/', ' ', str_replace("\n", ' ', $title)));

        if ($title !== '') {
            $this->title = $title;
        }
    }

    /**
     * Creates an array of words from the title to be formatted.
     */
    protected function splitWords(): array
    {
        $indexedWords = [];
        $offset = 0;

        foreach (explode($this->separator, $this->title) as $word) {
            if (mb_strlen($word, $this->encoding) === 0) {
                continue;
            }

            $wordIndex = $this->getWordIndex($word, $offset);

            if ($this->hasDash($word)) {
                $word = self::titleCase($word, '-');
                $this->rebuildTitle($wordIndex, $word);
            }

            $indexedWords[$wordIndex] = $word;
            $offset += mb_strlen($word, $this->encoding) + 1; // plus space
        }

        return $this->indexedWords = $indexedWords;
    }

    /**
     * Finds the correct index of the word within the title.
     */
    protected function getWordIndex(string $word, int $offset): int
    {
        $index = mb_strpos($this->title, $word, $offset, $this->encoding);

        return $this->correctIndexOffset($index);
    }

    /**
     * Corrects the potential offset issue with some UTF-8 characters.
     */
    protected function correctIndexOffset(?int $index): int
    {
        return mb_strlen(mb_substr($this->title, 0, $index, $this->encoding), $this->encoding);
    }

    /**
     * Replaces a formatted word within the current title.
     */
    protected function rebuildTitle(int $index, string $word): void
    {
        $this->title =
            mb_substr($this->title, 0, $index, $this->encoding) .
            $word .
            mb_substr(
                $this->title,
                $index + mb_strlen($word, $this->encoding),
                mb_strlen($this->title, $this->encoding),
                $this->encoding
            );
    }

    /**
     * Performs the uppercase action on the given word.
     */
    protected function uppercaseWord(string $word): string
    {
        // see if first characters are special
        $prefix = '';
        $hasPunctuation = true;
        do {
            $first = mb_substr($word, 0, 1, $this->encoding);
            if ($this->isPunctuation($first)) {
                $prefix .= $first;
                $word = mb_substr($word, 1, -1, $this->encoding);
            } else {
                $hasPunctuation = false;
            }
        } while ($hasPunctuation === true);

        return $prefix . ucwords($word);
    }

    /**
     * Condition to see if the given word should be uppercase.
     */
    protected function wordShouldBeUppercase(int $index, string $word): bool
    {
        return
            (
                $this->isFirstWordOfSentence($index) ||
                $this->isLastWord($word) ||
                ! $this->isIgnoredWord($word)
            ) &&
            (
                ! $this->hasUppercaseLetter($word)
            );
    }

    /**
     * Checks to see if the word is the last word in the title.
     */
    protected function isLastWord(string $word): bool
    {
        if ($word === end($this->indexedWords)) {
            return true;
        }

        return false;
    }

    /**
     * Checks to see if the word is the start of a new sentence.
     */
    protected function isFirstWordOfSentence(int $index): bool
    {
        if ($index === 0) {
            return true;
        }

        $twoCharactersBack = mb_substr($this->title, $index - 2, 1, $this->encoding);

        if ($this->isPunctuation($twoCharactersBack)) {
            return true;
        }

        return false;
    }

    /**
     * Checks to see if the given string is a punctuation character.
     */
    protected function isPunctuation(string $string): int
    {
        return preg_match("/[\p{P}\p{S}]/u", $string);
    }

    /**
     * Checks if the given word should be ignored.
     */
    protected function isIgnoredWord(string $word): bool
    {
        return in_array($word, $this->ignoredWords, true);
    }

    /**
     * Checks to see if a word has an uppercase letter.
     */
    protected function hasUppercaseLetter(string $word): int
    {
        return preg_match('/[A-Z]/', $word);
    }

    /**
     * Checks to see if the word has a dash.
     */
    protected function hasDash(string $word): bool
    {
        $wordWithoutDashes = str_replace('-', '', $word);

        return (bool) preg_match("/\-/", $word) && mb_strlen($wordWithoutDashes, $this->encoding) > 1;
    }
}