Created
January 16, 2013 21:53
-
-
Save hodzanassredin/4551294 to your computer and use it in GitHub Desktop.
metaphone implementation for russian language
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Text; | |
using System.Text.RegularExpressions; | |
using System.Threading.Tasks; | |
namespace Metaphone | |
{ | |
static class Helpers | |
{ | |
public static string ReplaceLastChar(this string s, char c) | |
{ | |
return s.Substring(0, s.Length - 1) + c; | |
} | |
public static char LastChar(this string s) | |
{ | |
return s[s.Length - 1]; | |
} | |
} | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
Write("Аввакумов"); | |
Write("Авакумов"); | |
Write("Петрокова"); | |
Write("Петрушкина"); | |
Write("я"); | |
Write("устал"); | |
Console.ReadKey(); | |
} | |
static void Write(string str) | |
{ | |
Console.WriteLine(str + " " + MetaphoneRu(str)); | |
} | |
const string alf = "ОЕАИУЭЮЯПСТРКЛМНБВГДЖЗЙФХЦЧШЩЫЁ";//алфавит кроме исключаемых букв | |
const string zvonkie = "БЗДВГ";//звонкие | |
const string gluhie = "ПСТФК";//глухие | |
const string soglasnie = "ПСТКБВГДЖЗФХЦЧШЩ";//согласные, перед которыми звонкие оглушаются | |
const string glasnie = "ОЮЕЭЯЁЫ";//образец гласных | |
const string ct = "АУИИАИА";// замена гласных | |
static Dictionary<string, string> suffixMap = new Dictionary<string, string> | |
{ | |
{ "ОВСКИЙ", "@" }, | |
{ "ЕВСКИЙ", "#" }, | |
{ "ОВСКАЯ", "$" }, | |
{ "ЕВСКАЯ", "%" }, | |
{ "ИЕВА", "9" }, | |
{ "ЕЕВА", "9" }, | |
{ "ОВА", "9" }, | |
{ "ЕВА", "9" }, | |
{ "ИНА", "1" }, | |
{ "ИЕВ", "4" }, | |
{ "ЕЕВ", "4" }, | |
{ "НКО", "3" }, | |
{ "ОВ", "4" }, | |
{ "ЕВ", "4" }, | |
{ "АЯ", "6" }, | |
{ "ИЙ", "7" }, | |
{ "ЫЙ", "7" }, | |
{ "ЫХ", "5" }, | |
{ "ИХ", "5" }, | |
{ "ИН", "8" }, | |
{ "ИК", "2" }, | |
{ "ЕК", "2" }, | |
{ "УК", "0" }, | |
{ "ЮК", "0" } | |
}; | |
static string MetaphoneRu(string w) | |
{ | |
//в верхний регистр | |
w = w.ToUpper(); | |
var sb = new StringBuilder(" "); | |
//оставили только символы из alf | |
for (int i = 0; i < w.Length; i++) | |
{ | |
if (alf.Contains(w[i])) sb.Append(w[i]); | |
} | |
var s = sb.ToString(); | |
//компрессия окончаний | |
foreach (var item in suffixMap) | |
{ | |
if (!s.EndsWith(item.Key)) continue; | |
s = Regex.Replace(s, item.Key + "$", item.Value); | |
} | |
//Оглушаем последний символ, если он - звонкий согласный | |
var idx = zvonkie.IndexOf(s.LastChar()); | |
if (idx != -1) s = s.ReplaceLastChar(gluhie[idx]); | |
var old_c = ' '; | |
string V = ""; | |
for (int i = 0; i < s.Length; i++) | |
{ | |
var c = s[i]; | |
var B = glasnie.IndexOf(c); | |
if (B != -1) | |
{//гласная | |
if (old_c == 'Й' || old_c == 'И') | |
{ | |
if (c == 'О' || c == 'Е')//'Буквосочетания с гласной | |
{ | |
old_c = 'И'; | |
V.ReplaceLastChar(old_c); | |
} | |
else//Если не буквосочетания с гласной, а просто гласная | |
{ | |
if (c != old_c) V = V + ct[B]; | |
} | |
} | |
else//Если не буквосочетания с гласной, а просто гласная | |
{ | |
if (c != old_c) V = V + ct[B]; | |
} | |
} | |
else | |
{//согласная | |
if (c != old_c)//для «Аввакумов» | |
{ | |
if (soglasnie.Contains(c)) | |
{ // 'Оглушение согласных | |
B = zvonkie.IndexOf(old_c); | |
if (B != -1) | |
{ | |
old_c = gluhie[B]; | |
V = V.ReplaceLastChar(old_c); | |
} | |
} | |
if (c != old_c) V = V + c;//для «Шмидт» | |
} | |
} | |
old_c = c; | |
} | |
return V; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment