Skip to content

Instantly share code, notes, and snippets.

@tommy-carlier
Created March 26, 2012 08:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tommy-carlier/2203905 to your computer and use it in GitHub Desktop.
Save tommy-carlier/2203905 to your computer and use it in GitHub Desktop.
Text prettifier (C#)
using System;
using System.Text;
using System.Text.RegularExpressions;
namespace TC
{
/// <summary>Makes text pretty, replacing certain patterns with their typographically correct equivalent.</summary>
public static class TextPrettifier
{
private static readonly Regex
regexLeftArrow = new Regex(@"<-+", RegexOptions.Compiled),
regexRightArrow = new Regex(@"-+>", RegexOptions.Compiled),
regexEllipsis = new Regex(@"\.\.\.+", RegexOptions.Compiled),
regexEnDash = new Regex(@"(?<=[\w\s])--(?=[\w\s])", RegexOptions.Compiled),
regexEmDash = new Regex(@"(?<=[\w\s])---(?=[\w\s])", RegexOptions.Compiled),
regexApostrophe = new Regex(
@"(?<=[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}])'(?=[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}])",
RegexOptions.Compiled),
regexSingleQuotes = new Regex(
@"(?<=^|\W)'(\w(?:.*?(?:[\w.?!]))?)'(?=$|\W)",
RegexOptions.Compiled | RegexOptions.Singleline),
regexDoubleQuotes = new Regex(
@"(?<=^|\W)""(\w(?:.*?(?:[\w.?!]))?)""(?=$|\W)",
RegexOptions.Compiled | RegexOptions.Singleline);
#region public extension method Prettifications.IsSelected
/// <summary>Determines whether all the bits of <paramref name="selected"/> are selected in <paramref name="p"/>.</summary>
/// <param name="p">The <see cref="T:Prettifications"/> to check.</param>
/// <param name="selected">The <see cref="T:Prettifications"/> that contain the bits to check.</param>
/// <returns>If all the bits of <paramref name="selected"/> are selected in <paramref name="p"/>, <c>true</c>; otherwise, <c>false</c>.</returns>
public static bool IsSelected(this Prettifications p, Prettifications selected)
{
return (p & selected) == selected;
}
#endregion
#region public extension method String.Prettify
/// <summary>Makes the specified text pretty.</summary>
/// <param name="text">The text to make pretty.</param>
/// <returns>A pretty version of <paramref name="text"/>.</returns>
/// <remarks>All prettifications are performed, curly quotes are used.</remarks>
public static string Prettify(this string text)
{
return Prettify(text, Prettifications.All, QuotationMarks.CurlyQuotes);
}
/// <summary>Makes the specified text pretty.</summary>
/// <param name="text">The text to make pretty.</param>
/// <param name="prettifications">The <see cref="T:Prettifications"/> to perform.</param>
/// <returns>A pretty version of <paramref name="text"/>.</returns>
/// <remarks>Curly quotes are used if the single or double quotes prettifications are selected.</remarks>
public static string Prettify(this string text, Prettifications prettifications)
{
return Prettify(text, prettifications, QuotationMarks.CurlyQuotes);
}
/// <summary>Makes the specified text pretty.</summary>
/// <param name="text">The text to make pretty.</param>
/// <param name="quotationMarks">The <see cref="T:QuotationMarks"/> to use.</param>
/// <returns>A pretty version of <paramref name="text"/>.</returns>
/// <remarks>All prettifications are performed.</remarks>
public static string Prettify(this string text, QuotationMarks quotationMarks)
{
return Prettify(text, Prettifications.All, quotationMarks);
}
/// <summary>Makes the specified text pretty.</summary>
/// <param name="text">The text to make pretty.</param>
/// <param name="prettifications">The <see cref="T:Prettifications"/> to perform.</param>
/// <param name="quotationMarks">The <see cref="T:QuotationMarks"/> to use.</param>
/// <returns>A pretty version of <paramref name="text"/>.</returns>
public static string Prettify(this string text, Prettifications prettifications, QuotationMarks quotationMarks)
{
if (string.IsNullOrEmpty(text))
return text;
text = text.Trim();
if (prettifications == Prettifications.None || text.Length == 0)
return text;
SpecialChars chars = SpecialChars.None;
int dotCount = 0, hyphenCount = 0, singleQuoteCount = 0, doubleQuoteCount = 0;
// detect all the special characters that indicate which prettifications should be performed
foreach (char c in text)
switch (c)
{
case '=': chars |= SpecialChars.Equals; continue;
case '<': chars |= SpecialChars.LessThan; continue;
case '>': chars |= SpecialChars.GreaterThan; continue;
case '!': chars |= SpecialChars.Exclamation; continue;
case '.': dotCount += 1; continue;
case '-': hyphenCount += 1; continue;
case '\'': singleQuoteCount += 1; continue;
case '"': doubleQuoteCount += 1; continue;
}
// perform all the selected prettifications
return text
.PrettifyEqualityChars(prettifications, chars)
.PrettifyHyphens(prettifications, chars, hyphenCount)
.PrettifyDots(prettifications, dotCount)
.PrettifySingleQuotes(prettifications, singleQuoteCount, quotationMarks)
.PrettifyDoubleQuotes(prettifications, doubleQuoteCount, quotationMarks);
}
#endregion
#region private implementation methods
[Flags]
private enum SpecialChars
{
None = 0,
Equals = 1,
LessThan = 2,
GreaterThan = 4,
Exclamation = 8,
}
private static bool IsSelected(this SpecialChars chars, SpecialChars c)
{
return (chars & c) == c;
}
private static string PrettifyEqualityChars(this string text, Prettifications p, SpecialChars c)
{
if (p.IsSelected(Prettifications.Equality) && c.IsSelected(SpecialChars.Equals))
{
if (c.IsSelected(SpecialChars.LessThan))
text = text.Replace("<=", "\u2264"); // less than or equals
if (c.IsSelected(SpecialChars.GreaterThan))
text = text.Replace(">=", "\u2265"); // greater than or equals
if (c.IsSelected(SpecialChars.Exclamation))
text = text.Replace("!=", "\u2260"); // does not equal
}
return text;
}
private static string PrettifyHyphens(this string text, Prettifications p, SpecialChars c, int count)
{
if (count >= 1)
{
if (p.IsSelected(Prettifications.EnDash))
text = text.Replace(" - ", " \u2013 "); // en-dash
if (count >= 2)
{
if (p.IsSelected(Prettifications.EnDash))
text = regexEnDash.Replace(text, "\u2013"); // en-dash
if (count >= 3 && p.IsSelected(Prettifications.EmDash))
text = regexEmDash.Replace(text, "\u2014"); // em-dash
}
if (p.IsSelected(Prettifications.Arrows))
{
if (c.IsSelected(SpecialChars.LessThan))
text = regexLeftArrow.Replace(text, "\u2190"); // left arrow
if (c.IsSelected(SpecialChars.GreaterThan))
text = regexRightArrow.Replace(text, "\u2192"); // right arrow
}
}
return text;
}
private static string PrettifyDots(this string text, Prettifications p, int count)
{
return count >= 3 && p.IsSelected(Prettifications.Ellipsis)
? regexEllipsis.Replace(text, "\u2026") // ellipsis
: text;
}
private static string PrettifySingleQuotes(this string text, Prettifications p, int count, QuotationMarks q)
{
if (count >= 1)
{
if (p.IsSelected(Prettifications.Apostrophe))
text = regexApostrophe.Replace(text, "\u2019"); // apostrophe between letters
if (count >= 2 && p.IsSelected(Prettifications.SingleQuotes))
text = regexSingleQuotes.Replace(text, GetSingleQuotesPattern(q));
}
return text;
}
private static string PrettifyDoubleQuotes(this string text, Prettifications p, int count, QuotationMarks q)
{
return count >= 2 && p.IsSelected(Prettifications.DoubleQuotes)
? regexDoubleQuotes.Replace(text, GetDoubleQuotesPattern(q))
: text;
}
private static string GetSingleQuotesPattern(QuotationMarks q)
{
switch (q)
{
case QuotationMarks.CurlyQuotes:
return "\u2018$1\u2019";
case QuotationMarks.AngularQuotes:
return "\u2039$1\u203A";
case QuotationMarks.SpacedAngularQuotes:
return "\u2039\u00A0$1\u00A0\u203A";
default:
goto case QuotationMarks.CurlyQuotes;
}
}
private static string GetDoubleQuotesPattern(QuotationMarks q)
{
switch (q)
{
case QuotationMarks.CurlyQuotes:
return "\u201C$1\u201D";
case QuotationMarks.AngularQuotes:
return "\u00AB$1\u00BB";
case QuotationMarks.SpacedAngularQuotes:
return "\u00AB\u00A0$1\u00A0\u00BB";
default:
goto case QuotationMarks.CurlyQuotes;
}
}
#endregion
#region inner enum Prettifications
/// <summary>Indicates the types of prettifications that should be performed.</summary>
[Flags]
public enum Prettifications
{
/// <summary>No prettifications will be performed.</summary>
None = 0,
/// <summary>Equality operators &lt;=, &gt;= and != will be prettified.</summary>
Equality = 1,
/// <summary>Hyphens will be replaced with en-dashes, where appropriate.</summary>
EnDash = 2,
/// <summary>Hyphens will be replaced with em-dashes, where appropriate.</summary>
EmDash = 4,
/// <summary>&lt;- and -&gt; will be replaced with proper arrows.</summary>
Arrows = 8,
/// <summary>... will be replaced with proper ellipsis.</summary>
Ellipsis = 16,
/// <summary>The single quote character between letters will be replaced with a proper apostrophe.</summary>
Apostrophe = 32,
/// <summary>Regular single quote characters will be replaced with pretty ones (curly or angular).</summary>
SingleQuotes = 64,
/// <summary>Regular double quote characters will be replaced with pretty ones (curly or angular).</summary>
DoubleQuotes = 128,
/// <summary>Combination of <see cref="F:EnDash"/> and <see cref="F:EmDash"/>.</summary>
Dashes = EnDash | EmDash,
/// <summary>Combination of <see cref="F:SingleQuotes"/> and <see cref="F:DoubleQuotes"/>.</summary>
Quotes = SingleQuotes | DoubleQuotes,
/// <summary>All prettifications will be performed.</summary>
All = Equality | Dashes | Arrows | Ellipsis | Apostrophe | Quotes,
}
#endregion
#region inner enum QuotationMarks
/// <summary>Indicates what type of quotation marks to use for single and double quotes.</summary>
public enum QuotationMarks
{
/// <summary>Curly quotes will be used.</summary>
CurlyQuotes = 0,
/// <summary>Angular quotes will be used.</summary>
AngularQuotes,
/// <summary>Angular quotes with non-breaking spaces will be used (e.g. for texts in French).</summary>
SpacedAngularQuotes,
}
#endregion
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment