Skip to content

Instantly share code, notes, and snippets.

@rousso
Created March 5, 2013 00:11
Show Gist options
  • Save rousso/5086881 to your computer and use it in GitHub Desktop.
Save rousso/5086881 to your computer and use it in GitHub Desktop.
using System;
using System.Text.RegularExpressions;
namespace rousso
{
/// <summary>
/// GreekEuroVerbalizer is a class that cannot be instanciated (all its members are static).
/// It exposes the GetVerbal() static overloaded method that gets the Greek Verbal for a given decimal value.
/// </summary>
/// <remarks>
/// Everything is declared as static so that it does not need repetitive initalisation on every call. This should speed up iterations.
/// Also it does not realy need to be instaciated.
/// For an instatiated approach see GreekEuro struct at the end of this source file.
/// </remarks>
public class GreekEuroVerbalizer
{
#region Verbal Map Tables
/// <summary>
/// This array holds verbals for Units.
/// </summary>
static private readonly string[] vrb_001 = { "", "ένα", "δύο", "τρία", "τέσσερα", "πέντε", "έξη", "επτά", "οκτώ", "εννέα"};
/// <summary>
/// This array holds verbals for Tens.
/// </summary>
static private readonly string[] vrb_010 = { "", "δέκα", "είκοσι", "τριάντα", "σαράντα", "πενήντα", "εξήντα", "εβδομήντα", "ογδόντα", "εννενήντα"};
/// <summary>
/// This array holds verbals for Hundreds.
/// </summary>
static private readonly string[] vrb_100 = { " ", "εκατό ", "διακόσια ", "τριακόσια ", "τετρακόσια ", "πεντακόσια ", "εξακόσια ", "επτακόσια ", "οκτακόσια ", "εννιακόσια "};
/// <summary>
/// This array holds verbals for Thousands/Millions.
/// </summary>
/// <remarks>
/// Notice the space appended to some verbals.
/// </remarks>
static private readonly string[] vrb_mil = { "χιλιάδες", "", "δισ", "τρισ", "τετράκις ", "πεντάκις ", "εξάκις ", "επτάκις ", "οκτάκις ", "εννιάκις "};
/// <summary>
/// This is a constant holding the verbal for "million".
/// </summary>
static private readonly string million = "εκατομύριο";
/// <summary>
/// This is a constant holding a verbal for "millions" (plural).
/// </summary>
static private readonly string millions = "εκατομύρια";
#endregion Verbal Map Tables
#region RegEx Patterns
/// <summary>
/// Match Units
/// </summary>
/// <remarks>
/// Note that we are matching from right to left
/// </remarks>
static Regex reg_001 = new Regex("(?<=\\.[ά-ώ ]*|\\A(?=\\.|,|\\Z)|\\A[ά-ώ ]*|,[ά-ώ ]*)\\d{1}(?=\\.|,|\\Z)", RegexOptions.RightToLeft);
/// <summary>
/// Match Tens
/// </summary>
/// <remarks>
/// Note that we are matching from right to left
/// </remarks>
static Regex reg_010 = new Regex("(?<=\\.[ά-ώ]* |\\A[ά-ώ]* |\\A(?=\\d{1})|,)\\d{1}(?=\\d{1}\\.|\\d{1},|\\d{1}\\Z)", RegexOptions.RightToLeft);
/// <summary>
/// Match Hundreds
/// </summary>
/// <remarks>
/// Note that we are matching from right to left
/// </remarks>
static Regex reg_100 = new Regex("(?<=\\.|\\A)\\d{1}(?=\\d{2}\\.|\\d{2},|\\d{2}\\Z)", RegexOptions.RightToLeft);
/// <summary>
/// Thousand plural replacement.
/// </summary>
/// <remarks>
/// Note that we are matching from right to left
/// </remarks>
static Regex reg_plural = new Regex("σια [ά-ώ]* " + vrb_mil[0], RegexOptions.RightToLeft);
/// <summary>
/// Thousand ace replacement.
/// </summary>
/// <remarks>
/// Note that we are matching from right to left
/// </remarks>
static Regex reg_thousandGenre = new Regex("ένα " + vrb_mil[0], RegexOptions.RightToLeft);
/// <summary>
/// One Thousand replacement.
/// </summary>
/// <remarks>
/// Note that we are matching from right to left
/// </remarks>
static Regex reg_singleThousand = new Regex(String.Format("(?<={0} *|\\A){1} *{2}", millions, vrb_001[1], vrb_mil[0]), RegexOptions.RightToLeft);
/// <summary>
/// One million replacement.
/// </summary>
/// <remarks>
/// Note that we are matching from right to left
/// </remarks>
static Regex reg_singleMillion = new Regex(String.Format("(?<={0} *|\\A){1} *{2}?{3}", millions, vrb_001[1], Array2Group(vrb_mil), millions), RegexOptions.RightToLeft);
/// <summary>
/// Matches the first letter of each word.
/// </summary>
static private Regex reg_wrd = new Regex("\\W{1}\\w{1}", RegexOptions.None);
/// <summary>
/// Matches the any improper final greek sigma.
/// </summary>
static private Regex reg_s = new Regex("σ{1}\\W{1}", RegexOptions.None);
/// <summary>
/// Matches the first greek accented letter in a word with more than one accented letters.
/// </summary>
static private Regex reg_acc = new Regex("((?<=(\\W+|\\A)[α-ω]*)[άέίόήύώ]{1}(?=[α-ω]+[άέίόήύώ]+[α-ω]+))", RegexOptions.None);
#endregion RegEx Patterns
#region Regex Match Evaluators (event handlers)
/// <summary>
/// Used for most Translations.
/// </summary>
/// <remarks>
/// Looks-up mapped value to relevant (passed) lookup table.
/// Assumes m.Value is an int.
/// </remarks>
/// <param name="m">The RegEx Match.</param>
/// <param name="vrb">The lookup table.</param>
/// <returns>Verbal on success or original value on failure.</returns>
static private string Translate(Match m, string[] vrb)
{
try { return vrb[int.Parse(m.Value)]; }
catch { return m.Value; }
}
/// <summary>
/// Event Handler used to Translate Units.
/// Called through Regex.Replace.
/// </summary>
/// <param name="m">The Match to process.</param>
/// <returns>The replacement string <see cref="Translate" /></returns>
static private string Translate_001(Match m)
{
return Translate(m , vrb_001);
}
/// <summary>
/// Event Handler used to Translate Tens.
/// Called through Regex.Replace.
/// </summary>
/// <param name="m">The Match to process.</param>
/// <returns>The replacement string <see cref="Translate" /></returns>
static private string Translate_010(Match m)
{
return Translate(m , vrb_010);
}
/// <summary>
/// Event Handler used to Translate Hundreds.
/// Called through Regex.Replace.
/// </summary>
/// <param name="m">The Match to process.</param>
/// <returns>The replacement string <see cref="Translate" /></returns>
static private string Translate_100(Match m)
{
return Translate(m , vrb_100);
}
/// <summary>
/// Used to change plural genre for thousands.
/// </summary>
/// <param name="m">The Match to process.</param>
/// <returns>The replacement string.</returns>
static private string Translate_plural(Match m)
{
return m.Value.Replace("σια ", "σιες ");
}
/// <summary>
/// Used to change numerical adjective genre for thousands.
/// </summary>
/// <param name="m">The Match to process.</param>
/// <returns>The replacement string.</returns>
static private string Translate_thousandGenre(Match m)
{
return m.Value.Replace("ένα ", "μία ");
}
/// <summary>
/// Used to change verbal for single thousand.
/// </summary>
/// <param name="m">The Match to process.</param>
/// <returns>The replacement string.</returns>
static private string Translate_singleThousand(Match m)
{
return m.Value.Replace(vrb_001[1] + " " + vrb_mil[0], "χίλια");
}
/// <summary>
/// Used to change verval for single million billion etc...
/// </summary>
/// <param name="m">The Match to process.</param>
/// <returns>The replacement string.</returns>
static private string Translate_singleMillion(Match m)
{
return m.Value.Replace(millions, million);
}
/// <summary>
/// Used to convert a character to upper case.
/// </summary>
/// <param name="m">The Match to process.</param>
/// <returns>The replacement string.</returns>
static private string ToUpper(Match m)
{
return m.Value.ToUpper();
}
/// <summary>
/// Used to fix the final sigma.
/// Not really used in this context but got left over from a copy/paste of a Proper casing method I had in another library.
/// I thought although useless here it would be a petty to delete it...
/// </summary>
/// <param name="m">The Match to process.</param>
/// <returns>The replacement string.</returns>
static private string FixSigma(Match m)
{
if (m.Value[0] == 'σ')
return "ς" + m.Value.Substring(1);
return m.Value;
}
/// <summary>
/// Used to remove the accent from an accented letter.
/// </summary>
/// <param name="m">The Match to process.</param>
/// <returns>The replacement string.</returns>
static private string FixAccent(Match m)
{
return m.Value.Replace('ά', 'α').Replace('έ', 'ε').Replace('ί', 'ι').Replace('ό', 'ο').Replace('ή', 'η').Replace('ύ', 'υ').Replace('ώ', 'ώ');
}
#endregion Regex Match Evaluators (event handlers)
#region Public interface methods
/// <summary>
/// This is an overload used to permit default values for Curency and Cent Verbal.
/// If called "Ευρώ" & "Λεπτά" are used accodringlly.
/// </summary>
/// <param name="Amount">The amount to parse nad translate</param>
/// <returns>A string representing the passed Amount in greek text.</returns>
static public string GetVerbal(decimal Amount)
{
return GetVerbal(Amount, "Ευρώ", "Λεπτά");
}
/// <summary>
/// This method does the job.
/// </summary>
/// <param name="Amount">The amount to parse nad translate</param>
/// <param name="CurrencyVerbal">The verbal for the Currency (i.e. "Δολάρια")</param>
/// <param name="CentVerbal">The verbal for the Cents (i.e. "Λεπτά")</param>
/// <returns>A string representing the passed Amount in greek text.</returns>
static public string GetVerbal(decimal Amount, string CurrencyVerbal, string CentVerbal)
{
//
// Make sure we don't have any space around passed text
// and that both strings are different to each other
//
CurrencyVerbal = CurrencyVerbal.Trim();
CentVerbal = CentVerbal.Trim();
if (CurrencyVerbal == CentVerbal)
throw new ApplicationException("Please pass a String for Currency name and a different string Cent name. Example: GreekEuroVerbalizer.GetVerbal(Amount, \"Ευρώ\", \"Λεπτά\");.");
//
// Quickly handle Zero amount
//
if (Amount == 0)
return ToProper(String.Format("μηδέν {0} και μηδέν {1}", CurrencyVerbal, CentVerbal));
//
// Convert the Amount to a formated string using specific culture (so that it works the same on every system)
//
string formatedAmount = Amount.ToString("###,###,###,###,###,##0.00", new System.Globalization.CultureInfo("el-GR")).Trim();
//
// Handle negative numbers
//
string prefix = String.Empty;
if (formatedAmount.StartsWith("-") /* || Amount < 0 */)
{
prefix = "μείον ";
formatedAmount = formatedAmount.TrimStart('-');
}
//
// Start formating by converting the numerical symbols to text.
//
// Start with thousands
formatedAmount = reg_100.Replace(formatedAmount, new MatchEvaluator(Translate_100));
//
// then tens
formatedAmount = reg_010.Replace(formatedAmount, new MatchEvaluator(Translate_010));
//
// then hundreds
formatedAmount = reg_001.Replace(formatedAmount, new MatchEvaluator(Translate_001));
//
// Now take care of the dots (thousand separators)
//
int i = 0; // Keep an index to keep track of where we are
int dotPos = formatedAmount.LastIndexOf('.'); // The position of the last dot in the string
while (dotPos > -1)
{
// replace the dot with the verbal.
formatedAmount = formatedAmount.Insert(dotPos + 1, " " + vrb_mil[i] + (i > 0 ? millions : String.Empty) + " ");
// remove the dot.
formatedAmount = formatedAmount.Remove(dotPos, 1);
// Move on
i++; // Advance the index
dotPos = formatedAmount.LastIndexOf('.'); // Get the next dot position
}
//
// Take care of the plural form needed in thousands
//
formatedAmount = reg_plural.Replace(formatedAmount, new MatchEvaluator(Translate_plural));
formatedAmount = reg_singleThousand.Replace(formatedAmount, new MatchEvaluator(Translate_singleThousand));
formatedAmount = reg_singleMillion.Replace(formatedAmount, new MatchEvaluator(Translate_singleMillion));
formatedAmount = reg_thousandGenre.Replace(formatedAmount, new MatchEvaluator(Translate_thousandGenre));
//
// Add curency verbal ("Ευρώ" by default)
//
formatedAmount = formatedAmount.Replace(",", " " + CurrencyVerbal);
//
// and also add cents if applicable ("Λεπτά" by default)
if (!formatedAmount.Trim().EndsWith(CurrencyVerbal))
{
formatedAmount = formatedAmount.Replace(" " + CurrencyVerbal, " " + CurrencyVerbal + " και ");
formatedAmount += " " + CentVerbal;
}
//
// Now for the fun part...
// Replace invalid concatenations (δέκαένα, δέκαδύο)
//
formatedAmount = formatedAmount.Replace("δέκαένα", "ένδεκα").Replace("δέκαμία", "ένδεκα").Replace("δέκαδύο", "δώδεκα");
//
// Finally convert it to proper case, and fix accent in concatenated words....
// after adding the prefix (for negative numbers)
//
// Done.
//
return ToProper(prefix + formatedAmount);
}
#endregion Public interface methods
#region Private helper methods
/// <summary>
/// This is used to convert the text to a more correct and readable form.
/// </summary>
/// <param name="text">Text to make proper.</param>
/// <returns>The text made proper.</returns>
static private string ToProper(string text)
{
//
// Let's start with lower case
//
text = " " + text.Trim().ToLower();
//
// Remove double accent from sigle words
// i.e. "τριάντατέσσερα"(which comes sfrom concatenating "τριάντα" with "τέσσερα")
// becomes "τριαντατέσσερα"
//
text = reg_acc.Replace(text, new MatchEvaluator(FixAccent));
//
// Now convert it to ProperCase (aslo known as Pascal case)
//
text = reg_wrd.Replace(text, new MatchEvaluator(ToUpper));
//
// Finaly make sure there is no final sigma left behind
//
text += " ";
text = reg_s.Replace(text, new MatchEvaluator(FixSigma));
//
// Now, remove potential double spaces
//
while (text.IndexOf(" ") > -1)
text = text.Replace(" ", " ");
// done...
return text.Trim();
}
/// <summary>
/// I am using this to convert a string[] to a Regex alternate selector group.
/// </summary>
/// <param name="array">String array to convert.</param>
/// <returns>A string that can be used in a Regex to match one of the strings in the array.</returns>
static private string Array2Group(string[] array)
{
string grpExr = String.Empty;
for (int i = 0; i < array.Length; i++)
grpExr += (array[i] == String.Empty ? String.Empty : array[i] + "|");
return String.Format("({0})", grpExr);
}
#endregion Private helper methods
}
/// <summary>
/// GreekEuro is a struct created to demonstrate an alternate method of using and calling GreekEuroVerbalizer.
/// </summary>
/// <remarks>
/// This implementation is not complete but it demonstrates the concept.
/// It is implemented as a struct so that it is a value-type. You can also use a class instead.
/// I implement implicit conversion operators to and from decimal.
/// Also an implicit conversion to string is supported.
/// I do not like implicit conversions and do not recomend them but nevertheless I use them here to demonstrate their flexibility in such a task.
/// </remarks>
public struct GreekEuro
{
/// <summary>
/// We hold the real decimal value here.
/// </summary>
private decimal Amount;
/// <summary>
/// The constructor is private. We do not want to users to call it.
/// </summary>
/// <param name="Amount"></param>
private GreekEuro(decimal Amount)
{
this.Amount = Amount;
}
/// <summary>
/// We override Object.ToString() so that it returns the Greek Verbal Form.
/// </summary>
/// <returns></returns>
override public string ToString()
{
return GreekEuroVerbalizer.GetVerbal(this.Amount);
}
/// <summary>
/// This operator is used when a GreekEuro value is assigned to a decimal variable.
/// </summary>
/// <param name="Value">The GreekEuro value being assigned. assign</param>
/// <returns>The decimal value that is to be stored in the decimal.</returns>
static public implicit operator decimal (GreekEuro Value)
{
return Value.Amount;
}
/// <summary>
/// This operator is used when a decimal is assigned to a GreekEuro variable.
/// </summary>
/// <param name="Value">The decimal value being assigned.</param>
/// <returns>The GreekEuro value to be stored in the GreekEuro variable being assigned to.</returns>
static public implicit operator GreekEuro (decimal Value)
{
return new GreekEuro(Value);
}
/// <summary>
/// This operator is used when a GreekEuro value is assigned to a string variable.
/// </summary>
/// <param name="Value">The GreekEuro value being assigned.</param>
/// <returns>The Greek Verbal String that the Value represents.</returns>
static public implicit operator string (GreekEuro Value)
{
return Value.ToString();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment