Created
March 26, 2012 08:28
-
-
Save tommy-carlier/2203905 to your computer and use it in GitHub Desktop.
Text prettifier (C#)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Text; | |
using System.Text.RegularExpressions; | |
namespace TC | |
{ | |
/// <summary>Makes text pretty, replacing certain patterns with their typographically correct equivalent.</summary> | |
public static class TextPrettifier | |
{ | |
private static readonly Regex | |
regexLeftArrow = new Regex(@"<-+", RegexOptions.Compiled), | |
regexRightArrow = new Regex(@"-+>", RegexOptions.Compiled), | |
regexEllipsis = new Regex(@"\.\.\.+", RegexOptions.Compiled), | |
regexEnDash = new Regex(@"(?<=[\w\s])--(?=[\w\s])", RegexOptions.Compiled), | |
regexEmDash = new Regex(@"(?<=[\w\s])---(?=[\w\s])", RegexOptions.Compiled), | |
regexApostrophe = new Regex( | |
@"(?<=[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}])'(?=[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}])", | |
RegexOptions.Compiled), | |
regexSingleQuotes = new Regex( | |
@"(?<=^|\W)'(\w(?:.*?(?:[\w.?!]))?)'(?=$|\W)", | |
RegexOptions.Compiled | RegexOptions.Singleline), | |
regexDoubleQuotes = new Regex( | |
@"(?<=^|\W)""(\w(?:.*?(?:[\w.?!]))?)""(?=$|\W)", | |
RegexOptions.Compiled | RegexOptions.Singleline); | |
#region public extension method Prettifications.IsSelected | |
/// <summary>Determines whether all the bits of <paramref name="selected"/> are selected in <paramref name="p"/>.</summary> | |
/// <param name="p">The <see cref="T:Prettifications"/> to check.</param> | |
/// <param name="selected">The <see cref="T:Prettifications"/> that contain the bits to check.</param> | |
/// <returns>If all the bits of <paramref name="selected"/> are selected in <paramref name="p"/>, <c>true</c>; otherwise, <c>false</c>.</returns> | |
public static bool IsSelected(this Prettifications p, Prettifications selected) | |
{ | |
return (p & selected) == selected; | |
} | |
#endregion | |
#region public extension method String.Prettify | |
/// <summary>Makes the specified text pretty.</summary> | |
/// <param name="text">The text to make pretty.</param> | |
/// <returns>A pretty version of <paramref name="text"/>.</returns> | |
/// <remarks>All prettifications are performed, curly quotes are used.</remarks> | |
public static string Prettify(this string text) | |
{ | |
return Prettify(text, Prettifications.All, QuotationMarks.CurlyQuotes); | |
} | |
/// <summary>Makes the specified text pretty.</summary> | |
/// <param name="text">The text to make pretty.</param> | |
/// <param name="prettifications">The <see cref="T:Prettifications"/> to perform.</param> | |
/// <returns>A pretty version of <paramref name="text"/>.</returns> | |
/// <remarks>Curly quotes are used if the single or double quotes prettifications are selected.</remarks> | |
public static string Prettify(this string text, Prettifications prettifications) | |
{ | |
return Prettify(text, prettifications, QuotationMarks.CurlyQuotes); | |
} | |
/// <summary>Makes the specified text pretty.</summary> | |
/// <param name="text">The text to make pretty.</param> | |
/// <param name="quotationMarks">The <see cref="T:QuotationMarks"/> to use.</param> | |
/// <returns>A pretty version of <paramref name="text"/>.</returns> | |
/// <remarks>All prettifications are performed.</remarks> | |
public static string Prettify(this string text, QuotationMarks quotationMarks) | |
{ | |
return Prettify(text, Prettifications.All, quotationMarks); | |
} | |
/// <summary>Makes the specified text pretty.</summary> | |
/// <param name="text">The text to make pretty.</param> | |
/// <param name="prettifications">The <see cref="T:Prettifications"/> to perform.</param> | |
/// <param name="quotationMarks">The <see cref="T:QuotationMarks"/> to use.</param> | |
/// <returns>A pretty version of <paramref name="text"/>.</returns> | |
public static string Prettify(this string text, Prettifications prettifications, QuotationMarks quotationMarks) | |
{ | |
if (string.IsNullOrEmpty(text)) | |
return text; | |
text = text.Trim(); | |
if (prettifications == Prettifications.None || text.Length == 0) | |
return text; | |
SpecialChars chars = SpecialChars.None; | |
int dotCount = 0, hyphenCount = 0, singleQuoteCount = 0, doubleQuoteCount = 0; | |
// detect all the special characters that indicate which prettifications should be performed | |
foreach (char c in text) | |
switch (c) | |
{ | |
case '=': chars |= SpecialChars.Equals; continue; | |
case '<': chars |= SpecialChars.LessThan; continue; | |
case '>': chars |= SpecialChars.GreaterThan; continue; | |
case '!': chars |= SpecialChars.Exclamation; continue; | |
case '.': dotCount += 1; continue; | |
case '-': hyphenCount += 1; continue; | |
case '\'': singleQuoteCount += 1; continue; | |
case '"': doubleQuoteCount += 1; continue; | |
} | |
// perform all the selected prettifications | |
return text | |
.PrettifyEqualityChars(prettifications, chars) | |
.PrettifyHyphens(prettifications, chars, hyphenCount) | |
.PrettifyDots(prettifications, dotCount) | |
.PrettifySingleQuotes(prettifications, singleQuoteCount, quotationMarks) | |
.PrettifyDoubleQuotes(prettifications, doubleQuoteCount, quotationMarks); | |
} | |
#endregion | |
#region private implementation methods | |
[Flags] | |
private enum SpecialChars | |
{ | |
None = 0, | |
Equals = 1, | |
LessThan = 2, | |
GreaterThan = 4, | |
Exclamation = 8, | |
} | |
private static bool IsSelected(this SpecialChars chars, SpecialChars c) | |
{ | |
return (chars & c) == c; | |
} | |
private static string PrettifyEqualityChars(this string text, Prettifications p, SpecialChars c) | |
{ | |
if (p.IsSelected(Prettifications.Equality) && c.IsSelected(SpecialChars.Equals)) | |
{ | |
if (c.IsSelected(SpecialChars.LessThan)) | |
text = text.Replace("<=", "\u2264"); // less than or equals | |
if (c.IsSelected(SpecialChars.GreaterThan)) | |
text = text.Replace(">=", "\u2265"); // greater than or equals | |
if (c.IsSelected(SpecialChars.Exclamation)) | |
text = text.Replace("!=", "\u2260"); // does not equal | |
} | |
return text; | |
} | |
private static string PrettifyHyphens(this string text, Prettifications p, SpecialChars c, int count) | |
{ | |
if (count >= 1) | |
{ | |
if (p.IsSelected(Prettifications.EnDash)) | |
text = text.Replace(" - ", " \u2013 "); // en-dash | |
if (count >= 2) | |
{ | |
if (p.IsSelected(Prettifications.EnDash)) | |
text = regexEnDash.Replace(text, "\u2013"); // en-dash | |
if (count >= 3 && p.IsSelected(Prettifications.EmDash)) | |
text = regexEmDash.Replace(text, "\u2014"); // em-dash | |
} | |
if (p.IsSelected(Prettifications.Arrows)) | |
{ | |
if (c.IsSelected(SpecialChars.LessThan)) | |
text = regexLeftArrow.Replace(text, "\u2190"); // left arrow | |
if (c.IsSelected(SpecialChars.GreaterThan)) | |
text = regexRightArrow.Replace(text, "\u2192"); // right arrow | |
} | |
} | |
return text; | |
} | |
private static string PrettifyDots(this string text, Prettifications p, int count) | |
{ | |
return count >= 3 && p.IsSelected(Prettifications.Ellipsis) | |
? regexEllipsis.Replace(text, "\u2026") // ellipsis | |
: text; | |
} | |
private static string PrettifySingleQuotes(this string text, Prettifications p, int count, QuotationMarks q) | |
{ | |
if (count >= 1) | |
{ | |
if (p.IsSelected(Prettifications.Apostrophe)) | |
text = regexApostrophe.Replace(text, "\u2019"); // apostrophe between letters | |
if (count >= 2 && p.IsSelected(Prettifications.SingleQuotes)) | |
text = regexSingleQuotes.Replace(text, GetSingleQuotesPattern(q)); | |
} | |
return text; | |
} | |
private static string PrettifyDoubleQuotes(this string text, Prettifications p, int count, QuotationMarks q) | |
{ | |
return count >= 2 && p.IsSelected(Prettifications.DoubleQuotes) | |
? regexDoubleQuotes.Replace(text, GetDoubleQuotesPattern(q)) | |
: text; | |
} | |
private static string GetSingleQuotesPattern(QuotationMarks q) | |
{ | |
switch (q) | |
{ | |
case QuotationMarks.CurlyQuotes: | |
return "\u2018$1\u2019"; | |
case QuotationMarks.AngularQuotes: | |
return "\u2039$1\u203A"; | |
case QuotationMarks.SpacedAngularQuotes: | |
return "\u2039\u00A0$1\u00A0\u203A"; | |
default: | |
goto case QuotationMarks.CurlyQuotes; | |
} | |
} | |
private static string GetDoubleQuotesPattern(QuotationMarks q) | |
{ | |
switch (q) | |
{ | |
case QuotationMarks.CurlyQuotes: | |
return "\u201C$1\u201D"; | |
case QuotationMarks.AngularQuotes: | |
return "\u00AB$1\u00BB"; | |
case QuotationMarks.SpacedAngularQuotes: | |
return "\u00AB\u00A0$1\u00A0\u00BB"; | |
default: | |
goto case QuotationMarks.CurlyQuotes; | |
} | |
} | |
#endregion | |
#region inner enum Prettifications | |
/// <summary>Indicates the types of prettifications that should be performed.</summary> | |
[Flags] | |
public enum Prettifications | |
{ | |
/// <summary>No prettifications will be performed.</summary> | |
None = 0, | |
/// <summary>Equality operators <=, >= and != will be prettified.</summary> | |
Equality = 1, | |
/// <summary>Hyphens will be replaced with en-dashes, where appropriate.</summary> | |
EnDash = 2, | |
/// <summary>Hyphens will be replaced with em-dashes, where appropriate.</summary> | |
EmDash = 4, | |
/// <summary><- and -> will be replaced with proper arrows.</summary> | |
Arrows = 8, | |
/// <summary>... will be replaced with proper ellipsis.</summary> | |
Ellipsis = 16, | |
/// <summary>The single quote character between letters will be replaced with a proper apostrophe.</summary> | |
Apostrophe = 32, | |
/// <summary>Regular single quote characters will be replaced with pretty ones (curly or angular).</summary> | |
SingleQuotes = 64, | |
/// <summary>Regular double quote characters will be replaced with pretty ones (curly or angular).</summary> | |
DoubleQuotes = 128, | |
/// <summary>Combination of <see cref="F:EnDash"/> and <see cref="F:EmDash"/>.</summary> | |
Dashes = EnDash | EmDash, | |
/// <summary>Combination of <see cref="F:SingleQuotes"/> and <see cref="F:DoubleQuotes"/>.</summary> | |
Quotes = SingleQuotes | DoubleQuotes, | |
/// <summary>All prettifications will be performed.</summary> | |
All = Equality | Dashes | Arrows | Ellipsis | Apostrophe | Quotes, | |
} | |
#endregion | |
#region inner enum QuotationMarks | |
/// <summary>Indicates what type of quotation marks to use for single and double quotes.</summary> | |
public enum QuotationMarks | |
{ | |
/// <summary>Curly quotes will be used.</summary> | |
CurlyQuotes = 0, | |
/// <summary>Angular quotes will be used.</summary> | |
AngularQuotes, | |
/// <summary>Angular quotes with non-breaking spaces will be used (e.g. for texts in French).</summary> | |
SpacedAngularQuotes, | |
} | |
#endregion | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment