zedseven/Romanization.NET

View on GitHub
Romanization/Languages/Japanese/ModifiedHepburn.cs

Summary

Maintainability
A
2 hrs
Test Coverage
A
99%
using Romanization.Internal;
using System;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
using System.Globalization;

// ReSharper disable CheckNamespace
// ReSharper disable CommentTypo
// ReSharper disable IdentifierTypo
// ReSharper disable StringLiteralTypo
// ReSharper disable InconsistentNaming

namespace Romanization
{
    public static partial class Japanese
    {
        /// <summary>
        /// The Modified Hepburn Japanese romanization system.<br />
        /// For more information, visit:
        /// <a href='https://en.wikipedia.org/wiki/Hepburn_romanization'>https://en.wikipedia.org/wiki/Hepburn_romanization</a>
        /// </summary>
        public sealed class ModifiedHepburn : IRomanizationSystem
        {
            /// <inheritdoc />
            public SystemType Type => SystemType.PhonemicTranscription;

            // System-Specific Constants
            private readonly Dictionary<string, string> GojuonChart = new Dictionary<string, string>();
            private readonly Dictionary<string, string> YoonChart = new Dictionary<string, string>();

            private readonly CharSub LongASub = new CharSub($"a{Choonpu}", Constants.MacronA, false);
            private readonly CharSub LongESub = new CharSub($"e{Choonpu}", Constants.MacronE, false);
            private readonly CharSub LongISub = new CharSub($"i{Choonpu}", Constants.MacronI, false);
            private readonly CharSub LongOSub = new CharSub($"o{Choonpu}", Constants.MacronO, false);
            private readonly CharSub LongUSub = new CharSub($"u{Choonpu}", Constants.MacronU, false);

            private readonly CharSub SyllabicNVowelsSub =
                new CharSub($"[{SyllabicNHiragana}{SyllabicNKatakana}]([{Constants.LatinVowels}])", "n'${1}");
            private readonly CharSub SyllabicNConsonantsSub =
                new CharSub($"[{SyllabicNHiragana}{SyllabicNKatakana}]([{Constants.LatinConsonants}])", "n${1}");

            private readonly CharSub SokuonGeneralCaseSub =
                new CharSub($"[{SokuonHiragana}{SokuonKatakana}]([{Constants.LatinConsonants}])", "${1}${1}", false);
            private readonly CharSub SokuonChCaseSub = new CharSub($"[{SokuonHiragana}{SokuonKatakana}]ch", "tch", false);

            /// <summary>
            /// Instantiates a copy of the system to process romanizations.
            /// </summary>
            public ModifiedHepburn()
            {
                #region Romanization Chart
                // Sourced from https://en.wikipedia.org/wiki/Hepburn_romanization#Romanization_charts
                // Each pair is hiragana, then katakana, respectively

                // Gojūon
                // Column 1
                GojuonChart["あ"] = "a";
                GojuonChart["ア"] = "a";
                GojuonChart["か"] = "ka";
                GojuonChart["カ"] = "ka";
                GojuonChart["さ"] = "sa";
                GojuonChart["サ"] = "sa";
                GojuonChart["た"] = "ta";
                GojuonChart["タ"] = "ta";
                GojuonChart["な"] = "na";
                GojuonChart["ナ"] = "na";
                GojuonChart["は"] = "ha";
                GojuonChart["ハ"] = "ha";
                GojuonChart["ま"] = "ma";
                GojuonChart["マ"] = "ma";
                GojuonChart["や"] = "ya";
                GojuonChart["ヤ"] = "ya";
                GojuonChart["ら"] = "ra";
                GojuonChart["ラ"] = "ra";
                GojuonChart["わ"] = "wa";
                GojuonChart["ワ"] = "wa";
                GojuonChart["が"] = "ga";
                GojuonChart["ガ"] = "ga";
                GojuonChart["ざ"] = "za";
                GojuonChart["ザ"] = "za";
                GojuonChart["だ"] = "da";
                GojuonChart["ダ"] = "da";
                GojuonChart["ば"] = "ba";
                GojuonChart["バ"] = "ba";
                GojuonChart["ぱ"] = "pa";
                GojuonChart["パ"] = "pa";
                // Column 2
                GojuonChart["い"] = "i";
                GojuonChart["イ"] = "i";
                GojuonChart["き"] = "ki";
                GojuonChart["キ"] = "ki";
                GojuonChart["し"] = "shi";
                GojuonChart["シ"] = "shi";
                GojuonChart["ち"] = "chi";
                GojuonChart["チ"] = "chi";
                GojuonChart["に"] = "ni";
                GojuonChart["ニ"] = "ni";
                GojuonChart["ひ"] = "hi";
                GojuonChart["ヒ"] = "hi";
                GojuonChart["み"] = "mi";
                GojuonChart["ミ"] = "mi";
                GojuonChart["り"] = "ri";
                GojuonChart["リ"] = "ri";
                GojuonChart["ゐ"] = "i"; // historical character
                GojuonChart["ヰ"] = "i"; // historical character
                GojuonChart["ぎ"] = "gi";
                GojuonChart["ギ"] = "gi";
                GojuonChart["じ"] = "ji";
                GojuonChart["ジ"] = "ji";
                GojuonChart["ぢ"] = "ji";
                GojuonChart["ヂ"] = "ji";
                GojuonChart["び"] = "bi";
                GojuonChart["ビ"] = "bi";
                GojuonChart["ぴ"] = "pi";
                GojuonChart["ピ"] = "pi";
                // Column 3
                GojuonChart["う"] = "u";
                GojuonChart["ウ"] = "u";
                GojuonChart["く"] = "ku";
                GojuonChart["ク"] = "ku";
                GojuonChart["す"] = "su";
                GojuonChart["ス"] = "su";
                GojuonChart["つ"] = "tsu";
                GojuonChart["ツ"] = "tsu";
                GojuonChart["ぬ"] = "nu";
                GojuonChart["ヌ"] = "nu";
                GojuonChart["ふ"] = "fu";
                GojuonChart["フ"] = "fu";
                GojuonChart["む"] = "mu";
                GojuonChart["ム"] = "mu";
                GojuonChart["ゆ"] = "yu";
                GojuonChart["ユ"] = "yu";
                GojuonChart["る"] = "ru";
                GojuonChart["ル"] = "ru";
                GojuonChart["ぐ"] = "gu";
                GojuonChart["グ"] = "gu";
                GojuonChart["ず"] = "zu";
                GojuonChart["ズ"] = "zu";
                GojuonChart["づ"] = "zu";
                GojuonChart["ヅ"] = "zu";
                GojuonChart["ぶ"] = "bu";
                GojuonChart["ブ"] = "bu";
                GojuonChart["ぷ"] = "pu";
                GojuonChart["プ"] = "pu";
                // Column 4
                GojuonChart["え"] = "e";
                GojuonChart["エ"] = "e";
                GojuonChart["け"] = "ke";
                GojuonChart["ケ"] = "ke";
                GojuonChart["せ"] = "se";
                GojuonChart["セ"] = "se";
                GojuonChart["て"] = "te";
                GojuonChart["テ"] = "te";
                GojuonChart["ね"] = "ne";
                GojuonChart["ネ"] = "ne";
                GojuonChart["へ"] = "he";
                GojuonChart["ヘ"] = "he";
                GojuonChart["め"] = "me";
                GojuonChart["メ"] = "me";
                GojuonChart["れ"] = "re";
                GojuonChart["レ"] = "re";
                GojuonChart["ゑ"] = "e"; // historical character
                GojuonChart["ヱ"] = "e"; // historical character
                GojuonChart["げ"] = "ge";
                GojuonChart["ゲ"] = "ge";
                GojuonChart["ぜ"] = "ze";
                GojuonChart["ゼ"] = "ze";
                GojuonChart["で"] = "de";
                GojuonChart["デ"] = "de";
                GojuonChart["べ"] = "be";
                GojuonChart["ベ"] = "be";
                GojuonChart["ぺ"] = "pe";
                GojuonChart["ペ"] = "pe";
                // Column 5
                GojuonChart["お"] = "o";
                GojuonChart["オ"] = "o";
                GojuonChart["こ"] = "ko";
                GojuonChart["コ"] = "ko";
                GojuonChart["そ"] = "so";
                GojuonChart["ソ"] = "so";
                GojuonChart["と"] = "to";
                GojuonChart["ト"] = "to";
                GojuonChart["の"] = "no";
                GojuonChart["ノ"] = "no";
                GojuonChart["ほ"] = "ho";
                GojuonChart["ホ"] = "ho";
                GojuonChart["も"] = "mo";
                GojuonChart["モ"] = "mo";
                GojuonChart["よ"] = "yo";
                GojuonChart["ヨ"] = "yo";
                GojuonChart["ろ"] = "ro";
                GojuonChart["ロ"] = "ro";
                GojuonChart["を"] = "o"; // rare particle
                GojuonChart["ヲ"] = "o"; // rare particle
                /*GojuonChart["ん"] = "n";
                GojuonChart["ン"] = "n";*/
                GojuonChart["ご"] = "go";
                GojuonChart["ゴ"] = "go";
                GojuonChart["ぞ"] = "zo";
                GojuonChart["ゾ"] = "zo";
                GojuonChart["ど"] = "do";
                GojuonChart["ド"] = "do";
                GojuonChart["ぼ"] = "bo";
                GojuonChart["ボ"] = "bo";
                GojuonChart["ぽ"] = "po";
                GojuonChart["ポ"] = "po";

                // Yōon
                // Column 1
                YoonChart["きゃ"] = "kya";
                YoonChart["キャ"] = "kya";
                YoonChart["しゃ"] = "sha";
                YoonChart["シャ"] = "sha";
                YoonChart["ちゃ"] = "cha";
                YoonChart["チャ"] = "cha";
                YoonChart["にゃ"] = "nya";
                YoonChart["ニャ"] = "nya";
                YoonChart["ひゃ"] = "hya";
                YoonChart["ヒャ"] = "hya";
                YoonChart["みゃ"] = "mya";
                YoonChart["ミャ"] = "mya";
                YoonChart["りゃ"] = "rya";
                YoonChart["リャ"] = "rya";
                YoonChart["ぎゃ"] = "gya";
                YoonChart["ギャ"] = "gya";
                YoonChart["じゃ"] = "ja";
                YoonChart["ジャ"] = "ja";
                YoonChart["ぢゃ"] = "ja";
                YoonChart["ヂャ"] = "ja";
                YoonChart["びゃ"] = "bya";
                YoonChart["ビャ"] = "bya";
                YoonChart["ぴゃ"] = "pya";
                YoonChart["ピャ"] = "pya";
                // Column 2
                YoonChart["きゅ"] = "kyu";
                YoonChart["キュ"] = "kyu";
                YoonChart["しゅ"] = "shu";
                YoonChart["シュ"] = "shu";
                YoonChart["ちゅ"] = "chu";
                YoonChart["チュ"] = "chu";
                YoonChart["にゅ"] = "nyu";
                YoonChart["ニュ"] = "nyu";
                YoonChart["ひゅ"] = "hyu";
                YoonChart["ヒュ"] = "hyu";
                YoonChart["みゅ"] = "myu";
                YoonChart["ミュ"] = "myu";
                YoonChart["りゅ"] = "ryu";
                YoonChart["リュ"] = "ryu";
                YoonChart["ぎゅ"] = "gyu";
                YoonChart["ギュ"] = "gyu";
                YoonChart["じゅ"] = "ju";
                YoonChart["ジュ"] = "ju";
                YoonChart["ぢゅ"] = "ju";
                YoonChart["ヂュ"] = "ju";
                YoonChart["びゅ"] = "byu";
                YoonChart["ビュ"] = "byu";
                YoonChart["ぴゅ"] = "pyu";
                YoonChart["ピュ"] = "pyu";
                // Column 3
                YoonChart["きょ"] = "kyo";
                YoonChart["キョ"] = "kyo";
                YoonChart["しょ"] = "sho";
                YoonChart["ショ"] = "sho";
                YoonChart["ちょ"] = "cho";
                YoonChart["チョ"] = "cho";
                YoonChart["にょ"] = "nyo";
                YoonChart["ニョ"] = "nyo";
                YoonChart["ひょ"] = "hyo";
                YoonChart["ヒョ"] = "hyo";
                YoonChart["みょ"] = "myo";
                YoonChart["ミョ"] = "myo";
                YoonChart["りょ"] = "ryo";
                YoonChart["リョ"] = "ryo";
                YoonChart["ぎょ"] = "gyo";
                YoonChart["ギョ"] = "gyo";
                YoonChart["じょ"] = "jo";
                YoonChart["ジョ"] = "jo";
                YoonChart["ぢょ"] = "jo";
                YoonChart["ヂョ"] = "jo";
                YoonChart["びょ"] = "byo";
                YoonChart["ビョ"] = "byo";
                YoonChart["ぴょ"] = "pyo";
                YoonChart["ピョ"] = "pyo";

                #endregion
            }

            /// <summary>
            /// Performs Modified Hepburn romanization on the given text.<br />
            /// Note this supports Hiragana and Katakana, but not Kanji.<br />
            /// Due to limits of machine romanization, certain details of the system are left out - namely vowel
            /// combinations, as the rules for combining vowels depend on whether or not the vowels share a
            /// morpheme (something not known to the program).
            /// </summary>
            /// <param name="text">The text to romanize.</param>
            /// <returns>A romanized version of the text, leaving unrecognized characters untouched. Note that all romanized text will be lowercase.</returns>
            [Pure]
            public string Process(string text)
                => CulturalOperations.RunWithCulture(CultureInfo.GetCultureInfo("ja-JP"), () => text
                    // Replace common alternate characters
                    .ReplaceCommonAlternates()
                    // Insert spaces at boundaries between Latin characters and Japanese ones (ie. ニンテンドーDSiブラウザー)
                    .SeparateLanguageBoundaries()
                    // Do multi-char combinations first (Yōon)
                    .ReplaceFromChartWithSameCase(YoonChart, StringComparison.CurrentCulture)
                    // Then single-char replacements (Gojūon)
                    .ReplaceFromChartWithSameCase(GojuonChart, StringComparison.CurrentCulture)
                    // Do special subsitutions
                    .ReplaceMany(
                        // Convert chōonpu usage in original text into macrons to mark long vowels in a romanized manner
                        LongASub, LongESub, LongISub, LongOSub, LongUSub,
                        // Render syllabic n as either "n'" or "n" based on whether or not it preceeds a vowel or consonant, respectively
                        SyllabicNVowelsSub, SyllabicNConsonantsSub,
                        // Take sokuon usage into account (repeating the following consonant to mark long consonants)
                        SokuonChCaseSub, SokuonGeneralCaseSub));
        }
    }
}