JapanesePhonetic.cs 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. using BeatLyrics.Tool.Models;
  2. using System;
  3. using System.Collections;
  4. using System.Collections.Generic;
  5. using System.Linq;
  6. using System.Linq.Expressions;
  7. using System.Reflection;
  8. using System.Text.RegularExpressions;
  9. namespace BeatLyrics.Tool.Utils
  10. {
  11. internal static class JapanesePhonetic
  12. {
  13. private static readonly Type ClassType;
  14. private static readonly Func<object, string> GetDisplayText;
  15. private static readonly Func<object, string> GetYomiText;
  16. private static readonly Regex RegexPassPattern1 = new Regex(@"^[0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz,.:;!?"'`^~ ̄_&@#%+-*=<>()[]{}⦅⦆|¦/\$]+$", RegexOptions.Compiled);
  17. private static readonly Regex RegexPassPattern2 = new Regex(@"^[\p{IsKatakana}\p{IsHiragana}…]+$", RegexOptions.Compiled);
  18. private static readonly Regex RegexSplitPattern1 = new Regex(@"^(\p{IsCJKUnifiedIdeographs}+)(\p{IsHiragana}+)(\p{IsCJKUnifiedIdeographs}+)$", RegexOptions.Compiled);
  19. private static readonly Regex RegexSplitPattern2 = new Regex(@"^(\p{IsHiragana}+)(\p{IsCJKUnifiedIdeographs}+)$", RegexOptions.Compiled);
  20. private static readonly Regex RegexSplitPattern3 = new Regex(@"^(\p{IsCJKUnifiedIdeographs}+)(\p{IsHiragana}+)$", RegexOptions.Compiled);
  21. private static readonly Regex RegexSplitPattern4 = new Regex(@"^(\p{IsHiragana}+)(\p{IsCJKUnifiedIdeographs}+)(\p{IsHiragana}+)$", RegexOptions.Compiled);
  22. private static readonly Regex RegexSplitPattern5 = new Regex(@"^(\p{IsCJKUnifiedIdeographs}+)(\p{IsHiragana}+)(\p{IsCJKUnifiedIdeographs}+)(\p{IsHiragana}+)$", RegexOptions.Compiled);
  23. static JapanesePhonetic()
  24. {
  25. ClassType = Type.GetType("Windows.Globalization.JapanesePhoneticAnalyzer," +
  26. " Windows.Globalization, Version=255.255.255.255," +
  27. " Culture=neutral, PublicKeyToken=null, ContentType=WindowsRuntime");
  28. var itemType = Type.GetType("Windows.Globalization.JapanesePhoneme," +
  29. " Windows.Globalization, Version=255.255.255.255," +
  30. " Culture=neutral, PublicKeyToken=null, ContentType=WindowsRuntime");
  31. var pArg = Expression.Parameter(typeof(object));
  32. var pArgConv = Expression.Convert(pArg, itemType);
  33. GetDisplayText = Expression.Lambda<Func<object, string>>(Expression.Property(pArgConv, "DisplayText"), pArg).Compile();
  34. GetYomiText = Expression.Lambda<Func<object, string>>(Expression.Property(pArgConv, "YomiText"), pArg).Compile();
  35. }
  36. public static TextTuple[] GetWords(string text)
  37. {
  38. var ret = (IEnumerable)ClassType.InvokeMember("GetWords", BindingFlags.InvokeMethod, null, null,
  39. new object[] { text });
  40. var lst = new List<TextTuple>();
  41. foreach (var item in ret)
  42. {
  43. var displayText = GetDisplayText(item);
  44. if (displayText == " ")
  45. {
  46. lst.Add(new TextTuple { Text = " ", Ruby = " " });
  47. continue;
  48. }
  49. var yomiText = GetYomiText(item);
  50. {
  51. var match = RegexPassPattern1.Match(displayText);
  52. if (displayText == yomiText && match.Success)
  53. {
  54. var passed = displayText.Normalize(System.Text.NormalizationForm.FormKC);
  55. lst.Add(new TextTuple { Text = passed, Ruby = passed });
  56. continue;
  57. }
  58. }
  59. {
  60. if (RegexPassPattern2.IsMatch(displayText))
  61. {
  62. lst.Add(new TextTuple { Text = displayText, Ruby = displayText });
  63. continue;
  64. }
  65. }
  66. {
  67. var match = RegexSplitPattern1.Match(displayText);
  68. if (match.Success)
  69. {
  70. var middleHira = match.Groups[2].Value;
  71. var split = yomiText.Split(new[] { middleHira }, StringSplitOptions.RemoveEmptyEntries);
  72. lst.Add(new TextTuple { Text = match.Groups[1].Value, Ruby = split[0] });
  73. lst.Add(new TextTuple { Text = middleHira, Ruby = middleHira });
  74. lst.Add(new TextTuple { Text = match.Groups[3].Value, Ruby = split[1] });
  75. continue;
  76. }
  77. }
  78. {
  79. var match = RegexSplitPattern2.Match(displayText);
  80. if (match.Success)
  81. {
  82. var leftHira = match.Groups[1].Value;
  83. var split = yomiText.Split(new[] { leftHira }, StringSplitOptions.RemoveEmptyEntries);
  84. lst.Add(new TextTuple { Text = leftHira, Ruby = leftHira });
  85. lst.Add(new TextTuple { Text = match.Groups[2].Value, Ruby = split[0] });
  86. continue;
  87. }
  88. }
  89. {
  90. var match = RegexSplitPattern3.Match(displayText);
  91. if (match.Success)
  92. {
  93. var hira = match.Groups[2].Value;
  94. var split = yomiText.Split(new[] { hira }, StringSplitOptions.RemoveEmptyEntries);
  95. lst.Add(new TextTuple { Text = match.Groups[1].Value, Ruby = split[0] });
  96. lst.Add(new TextTuple { Text = hira, Ruby = hira });
  97. continue;
  98. }
  99. }
  100. {
  101. var match = RegexSplitPattern4.Match(displayText);
  102. if (match.Success)
  103. {
  104. var leadingHira = match.Groups[1].Value;
  105. var suffixHira = match.Groups[3].Value;
  106. var split = yomiText.Substring(leadingHira.Length);
  107. split = split.Substring(0, split.Length - suffixHira.Length);
  108. lst.Add(new TextTuple { Text = leadingHira, Ruby = leadingHira });
  109. lst.Add(new TextTuple { Text = match.Groups[2].Value, Ruby = split });
  110. lst.Add(new TextTuple { Text = suffixHira, Ruby = suffixHira });
  111. continue;
  112. }
  113. }
  114. {
  115. var match = RegexSplitPattern5.Match(displayText);
  116. if (match.Success)
  117. {
  118. var kan1 = match.Groups[1].Value;
  119. var hira1 = match.Groups[2].Value;
  120. var kan2 = match.Groups[3].Value;
  121. var hira2 = match.Groups[4].Value;
  122. // 聞き飽き
  123. if (match.Groups.Count == 5
  124. && Enumerable.Range(1, 4).All(p => match.Groups[p].Value.Length == 1)
  125. && hira1 == hira2 && displayText.Length == yomiText.Length)
  126. {
  127. lst.Add(new TextTuple { Text = displayText[0].ToString(), Ruby = yomiText[0].ToString() });
  128. lst.Add(new TextTuple { Text = hira1, Ruby = hira1 });
  129. lst.Add(new TextTuple { Text = displayText[2].ToString(), Ruby = yomiText[2].ToString() });
  130. lst.Add(new TextTuple { Text = hira2, Ruby = hira2 });
  131. continue;
  132. }
  133. //聞き届 け
  134. //ききとどけ
  135. if (match.Groups.Count == 5
  136. && Enumerable.Range(1, 4).All(p => match.Groups[p].Value.Length == 1)
  137. && displayText.Length + 1 == yomiText.Length && yomiText[0] == yomiText[1])
  138. {
  139. lst.Add(new TextTuple { Text = displayText[0].ToString(), Ruby = yomiText[0].ToString() });
  140. lst.Add(new TextTuple { Text = hira1, Ruby = hira1 });
  141. lst.Add(new TextTuple { Text = displayText[2].ToString(), Ruby = yomiText[2].ToString() + yomiText[3] });
  142. lst.Add(new TextTuple { Text = hira2, Ruby = hira2 });
  143. continue;
  144. }
  145. var split = yomiText.Split(new[] { hira1, hira2 }, StringSplitOptions.RemoveEmptyEntries);
  146. lst.Add(new TextTuple { Text = kan1, Ruby = split[0] });
  147. lst.Add(new TextTuple { Text = hira1, Ruby = hira1 });
  148. lst.Add(new TextTuple { Text = kan2, Ruby = split[1] });
  149. lst.Add(new TextTuple { Text = hira2, Ruby = hira2 });
  150. continue;
  151. }
  152. }
  153. lst.Add(new TextTuple { Text = displayText, Ruby = yomiText });
  154. }
  155. var arr = lst.ToArray();
  156. return arr;
  157. }
  158. }
  159. }