123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- using BeatLyrics.Tool.Models;
- using System;
- using System.Collections;
- using System.Collections.Generic;
- using System.Linq.Expressions;
- using System.Reflection;
- using System.Text.RegularExpressions;
- namespace BeatLyrics.Tool.Utils
- {
- internal static class JapanesePhonetic
- {
- private static readonly Type ClassType;
- private static readonly Func<object, string> GetDisplayText;
- private static readonly Func<object, string> GetYomiText;
- private static readonly Regex RegexPassPattern1 = new Regex(@"^[0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz,.:;!?"'`^~ ̄_&@#%+-*=<>()[]{}⦅⦆|¦/\$]+$", RegexOptions.Compiled);
- private static readonly Regex RegexPassPattern2 = new Regex(@"^[\p{IsKatakana}\p{IsHiragana}…]+$", RegexOptions.Compiled);
- private static readonly Regex RegexSplitPattern1 = new Regex(@"^(\p{IsCJKUnifiedIdeographs}+)(\p{IsHiragana}+)(\p{IsCJKUnifiedIdeographs}+)$", RegexOptions.Compiled);
- private static readonly Regex RegexSplitPattern2 = new Regex(@"^(\p{IsHiragana}+)(\p{IsCJKUnifiedIdeographs}+)$", RegexOptions.Compiled);
- private static readonly Regex RegexSplitPattern3 = new Regex(@"^(\p{IsCJKUnifiedIdeographs}+)(\p{IsHiragana}+)$", RegexOptions.Compiled);
- private static readonly Regex RegexSplitPattern4 = new Regex(@"^(\p{IsHiragana}+)(\p{IsCJKUnifiedIdeographs}+)(\p{IsHiragana}+)$", RegexOptions.Compiled);
- private static readonly Regex RegexSplitPattern5 = new Regex(@"^(\p{IsCJKUnifiedIdeographs}+)(\p{IsHiragana}+)(\p{IsCJKUnifiedIdeographs}+)(\p{IsHiragana}+)$", RegexOptions.Compiled);
- static JapanesePhonetic()
- {
- ClassType = Type.GetType("Windows.Globalization.JapanesePhoneticAnalyzer," +
- " Windows.Globalization, Version=255.255.255.255," +
- " Culture=neutral, PublicKeyToken=null, ContentType=WindowsRuntime");
- var itemType = Type.GetType("Windows.Globalization.JapanesePhoneme," +
- " Windows.Globalization, Version=255.255.255.255," +
- " Culture=neutral, PublicKeyToken=null, ContentType=WindowsRuntime");
- var pArg = Expression.Parameter(typeof(object));
- var pArgConv = Expression.Convert(pArg, itemType);
- GetDisplayText = Expression.Lambda<Func<object, string>>(Expression.Property(pArgConv, "DisplayText"), pArg).Compile();
- GetYomiText = Expression.Lambda<Func<object, string>>(Expression.Property(pArgConv, "YomiText"), pArg).Compile();
- }
- public static TextTuple[] GetWords(string text)
- {
- var ret = (IEnumerable)ClassType.InvokeMember("GetWords", BindingFlags.InvokeMethod, null, null,
- new object[] { text });
- var lst = new List<TextTuple>();
- foreach (var item in ret)
- {
- var displayText = GetDisplayText(item);
- if (displayText == " ")
- {
- lst.Add(new TextTuple { Text = " ", Ruby = " " });
- continue;
- }
- var yomiText = GetYomiText(item);
- {
- var match = RegexPassPattern1.Match(displayText);
- if (displayText == yomiText && match.Success)
- {
- var passed = displayText.Normalize(System.Text.NormalizationForm.FormKC);
- lst.Add(new TextTuple { Text = passed, Ruby = passed });
- continue;
- }
- }
- {
- if (RegexPassPattern2.IsMatch(displayText))
- {
- lst.Add(new TextTuple { Text = displayText, Ruby = displayText });
- continue;
- }
- }
- {
- var match = RegexSplitPattern1.Match(displayText);
- if (match.Success)
- {
- var middleHira = match.Groups[2].Value;
- var split = yomiText.Split(new[] { middleHira }, StringSplitOptions.RemoveEmptyEntries);
- lst.Add(new TextTuple { Text = match.Groups[1].Value, Ruby = split[0] });
- lst.Add(new TextTuple { Text = middleHira, Ruby = middleHira });
- lst.Add(new TextTuple { Text = match.Groups[3].Value, Ruby = split[1] });
- continue;
- }
- }
- {
- var match = RegexSplitPattern2.Match(displayText);
- if (match.Success)
- {
- var leftHira = match.Groups[1].Value;
- var split = yomiText.Split(new[] { leftHira }, StringSplitOptions.RemoveEmptyEntries);
- lst.Add(new TextTuple { Text = leftHira, Ruby = leftHira });
- lst.Add(new TextTuple { Text = match.Groups[2].Value, Ruby = split[0] });
- continue;
- }
- }
- {
- var match = RegexSplitPattern3.Match(displayText);
- if (match.Success)
- {
- var hira = match.Groups[2].Value;
- var split = yomiText.Split(new[] { hira }, StringSplitOptions.RemoveEmptyEntries);
- lst.Add(new TextTuple { Text = match.Groups[1].Value, Ruby = split[0] });
- lst.Add(new TextTuple { Text = hira, Ruby = hira });
- continue;
- }
- }
- {
- var match = RegexSplitPattern4.Match(displayText);
- if (match.Success)
- {
- var leadingHira = match.Groups[1].Value;
- var suffixHira = match.Groups[3].Value;
- var split = yomiText.Substring(leadingHira.Length);
- split = split.Substring(0, split.Length - suffixHira.Length);
- lst.Add(new TextTuple { Text = leadingHira, Ruby = leadingHira });
- lst.Add(new TextTuple { Text = match.Groups[2].Value, Ruby = split });
- lst.Add(new TextTuple { Text = suffixHira, Ruby = suffixHira });
- continue;
- }
- }
- {
- var match = RegexSplitPattern5.Match(displayText);
- if (match.Success)
- {
- var kan1 = match.Groups[1].Value;
- var hira1 = match.Groups[2].Value;
- var kan2 = match.Groups[3].Value;
- var hira2 = match.Groups[4].Value;
- var split = yomiText.Split(new[] { hira1, hira2 }, StringSplitOptions.RemoveEmptyEntries);
- lst.Add(new TextTuple { Text = kan1, Ruby = split[0] });
- lst.Add(new TextTuple { Text = hira1, Ruby = hira1 });
- lst.Add(new TextTuple { Text = kan2, Ruby = split[1] });
- lst.Add(new TextTuple { Text = hira2, Ruby = hira2 });
- continue;
- }
- }
- lst.Add(new TextTuple { Text = displayText, Ruby = yomiText });
- }
- var arr = lst.ToArray();
- return arr;
- }
- }
- }
|