Skip to content

Instantly share code, notes, and snippets.

@peppy
Created November 30, 2017 16:31
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save peppy/fa0c51bfc14a7d553f8c75d61d59bbb6 to your computer and use it in GitHub Desktop.
Save peppy/fa0c51bfc14a7d553f8c75d61d59bbb6 to your computer and use it in GitHub Desktop.
LinkFormatter class from osu-stable. Released under public domain; use as necessary.
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
namespace osu_common.Helpers
{
public static class LinkFormatter
{
//[[Performance Points]] -> wiki:Performance Points (https://osu.ppy.sh/wiki/Performance_Points)
static Regex regexWiki = new Regex(@"\[\[([^\]]+)\]\]");
//(test)[https://osu.ppy.sh/b/1234] -> test (https://osu.ppy.sh/b/1234)
static Regex oldFormatLink = new Regex(@"\(([^\)]*)\)\[([a-z]+://[^ ]+)\]");
//[https://osu.ppy.sh/b/1234 Beatmap [Hard] (poop)] -> Beatmap [hard] (poop) (https://osu.ppy.sh/b/1234)
static Regex newFormatLink = new Regex(@"\[([a-z]+://[^ ]+) ([^\[\]]*(((?<open>\[)[^\[\]]*)+((?<close-open>\])[^\[\]]*)+)*(?(open)(?!)))\]");
//https://osu.ppy.sh -> https://osu.ppy.sh (https://osu.ppy.sh)
//static Regex basicLink = new Regex(@"[a-z]+://[^ ]+[a-zA-Z0-9=/\?]");
// advanced, RFC-compatible version of basicLink that matches any possible URL, *but* allows certain invalid characters that are widely used
// This is in the format (<required>, [optional]):
// http[s]://<domain>.<tld>[:port][/path][?query][#fragment]
static Regex advancedLink = new Regex(@"(?<paren>\([^)]*)?" +
@"(?<link>https?:\/\/" +
@"(?<domain>(?:[a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*[a-z][a-z0-9-]*[a-z0-9]" + // domain, TLD
@"(?::\d+)?)" + // port
@"(?<path>(?:(?:\/+(?:[a-z0-9$_\.\+!\*\',;:\(\)@&~=-]|%[0-9a-f]{2})*)*" + // path
@"(?:\?(?:[a-z0-9$_\+!\*\',;:\(\)@&=\/~-]|%[0-9a-f]{2})*)?)?" + // query
@"(?:#(?:[a-z0-9$_\+!\*\',;:\(\)@&=\/~-]|%[0-9a-f]{2})*)?)?)", // fragment
RegexOptions.IgnoreCase);
//00:00:000 (1,2,3) - test
static Regex timeMatch = new Regex(@"\d\d:\d\d:\d\d\d? [^-]*");
//#osu
static Regex channelMatch = new Regex(@"#[a-zA-Z]+[a-zA-Z0-9]+");
// \:01
//static Regex emoji = new Regex(@"\\\:\d\d");
static Regex emoji = new Regex(@"(\uD83D[\uDC00-\uDE4F])");
private static void handleAdvanced(Regex against, LinkFormatterResult result, int startIndex = 0)
{
foreach (Match m in against.Matches(result.Text, startIndex))
{
int index = m.Index;
string prefix = m.Groups["paren"].Value;
string link = m.Groups["link"].Value;
int indexLength = link.Length;
if (!String.IsNullOrEmpty(prefix))
{
index += prefix.Length;
if (link.EndsWith(")"))
{
indexLength = indexLength - 1;
link = link.Remove(link.Length - 1);
}
}
result.Links.Add(new Link(link, index, indexLength));
}
}
private static void handleMatches(Regex against, string display, string link, LinkFormatterResult result, int startIndex = 0)
{
int captureOffset = 0;
foreach (Match m in against.Matches(result.Text, startIndex))
{
int index = m.Index - captureOffset;
string displayText = string.Format(display,
m.Groups[0],
m.Groups.Count > 1 ? m.Groups[1].Value : "",
m.Groups.Count > 2 ? m.Groups[2].Value : "").Trim();
string linkText = string.Format(link,
m.Groups[0],
m.Groups.Count > 1 ? m.Groups[1].Value : "",
m.Groups.Count > 2 ? m.Groups[2].Value : "").Trim();
if (displayText.Length == 0 || linkText.Length == 0) continue;
//ensure we don't have encapsulated links.
if (result.Links.Find(l => (l.Index <= index && l.Index + l.Length >= index + m.Length) || (index <= l.Index && index + m.Length >= l.Index + l.Length)) == null)
{
result.Text = result.Text.Remove(index, m.Length).Insert(index, displayText);
//since we just changed the line display text, offset any already processed links.
result.Links.ForEach(l => l.Index -= l.Index > index ? m.Length - displayText.Length : 0);
result.Links.Add(new Link(linkText, index, displayText.Length));
//adjust the offset for processing the current matches group.
captureOffset += (m.Length - displayText.Length);
}
}
}
public static LinkFormatterResult Format(string input, int startIndex = 0, int space = 3)
{
LinkFormatterResult result = new LinkFormatterResult(input);
// handle the [link display] format
handleMatches(newFormatLink, "{2}", "{1}", result, startIndex);
// handle the ()[] link format
handleMatches(oldFormatLink, "{1}", "{2}", result, startIndex);
// handle wiki links
handleMatches(regexWiki, "wiki:{1}", "https://osu.ppy.sh/wiki/{1}", result, startIndex);
// handle bare links
handleAdvanced(advancedLink, result, startIndex);
// handle editor times
handleMatches(timeMatch, "{0}", "osu://edit/{0}", result, startIndex);
// handle channels
handleMatches(channelMatch, "{0}", "osu://chan/{0}", result, startIndex);
string empty = "";
while (space-- > 0)
empty += "\0";
handleMatches(emoji, empty, "{0}", result, startIndex); // 3 space,handleMatches will trim all empty char except \0
//result.Text = result.Text.Replace('\0', ' ');
return result;
}
}
public class Link
{
public string Url;
public int Index;
public int Length;
public Link(string url, int startIndex, int length)
{
Url = url;
Index = startIndex;
Length = length;
}
}
public class LinkFormatterResult : ICloneable
{
public List<Link> Links = new List<Link>();
public string Text;
public string OriginalText;
public LinkFormatterResult(string text)
{
OriginalText = Text = text;
}
#region ICloneable Members
public object Clone()
{
return this.MemberwiseClone();
}
#endregion
}
}
using Microsoft.VisualStudio.TestTools.UnitTesting;
using osu_common.Helpers;
using System.Text.RegularExpressions;
namespace osu_common
{
[TestClass]
public class LinkFormatterTests
{
[TestMethod]
public void TestBareLink()
{
LinkFormatterResult result = LinkFormatter.Format("This is a http://www.basic-link.com/?test=test.");
Assert.AreEqual("This is a http://www.basic-link.com/?test=test.", result.Text);
Assert.AreEqual(1, result.Links.Count);
Assert.AreEqual("http://www.basic-link.com/?test=test", result.Links[0].Url);
Assert.AreEqual(10, result.Links[0].Index);
Assert.AreEqual(36, result.Links[0].Length);
}
[TestMethod]
public void TestMultipleComplexLinks()
{
LinkFormatterResult result = LinkFormatter.Format("This is a http://test.io/link#fragment. (see https://twitter.com). Also, This string should not be altered. http://example.com/");
Assert.AreEqual("This is a http://test.io/link#fragment. (see https://twitter.com). Also, This string should not be altered. http://example.com/", result.Text);
Assert.AreEqual(3, result.Links.Count);
Assert.AreEqual("http://test.io/link#fragment", result.Links[0].Url);
Assert.AreEqual(10, result.Links[0].Index);
Assert.AreEqual(28, result.Links[0].Length);
Assert.AreEqual("https://twitter.com", result.Links[1].Url);
Assert.AreEqual(45, result.Links[1].Index);
Assert.AreEqual(19, result.Links[1].Length);
Assert.AreEqual("http://example.com/", result.Links[2].Url);
Assert.AreEqual(108, result.Links[2].Index);
Assert.AreEqual(19, result.Links[2].Length);
}
[TestMethod]
public void TestAjaxLinks()
{
LinkFormatterResult result = LinkFormatter.Format("https://twitter.com/#!/hashbanglinks");
Assert.AreEqual("https://twitter.com/#!/hashbanglinks", result.Text);
Assert.AreEqual(0, result.Links[0].Index);
Assert.AreEqual(36, result.Links[0].Length);
}
[TestMethod]
public void TestUnixHomeLinks()
{
LinkFormatterResult result = LinkFormatter.Format("http://www.chiark.greenend.org.uk/~sgtatham/putty/");
Assert.AreEqual("http://www.chiark.greenend.org.uk/~sgtatham/putty/", result.Text);
Assert.AreEqual(0, result.Links[0].Index);
Assert.AreEqual(50, result.Links[0].Length);
}
[TestMethod]
public void TestCaseInsensitiveLinks()
{
LinkFormatterResult result = LinkFormatter.Format("look: http://puu.sh/7Ggh8xcC6/asf0asd9876.NEF");
Assert.AreEqual("look: http://puu.sh/7Ggh8xcC6/asf0asd9876.NEF", result.Text);
Assert.AreEqual(6, result.Links[0].Index);
Assert.AreEqual(39, result.Links[0].Length);
}
[TestMethod]
public void TestWikiLink()
{
LinkFormatterResult result = LinkFormatter.Format("This is a [[Wiki Link]].");
Assert.AreEqual("This is a wiki:Wiki Link.", result.Text);
Assert.AreEqual(1, result.Links.Count);
Assert.AreEqual("https://osu.ppy.sh/wiki/Wiki Link", result.Links[0].Url);
Assert.AreEqual(10, result.Links[0].Index);
Assert.AreEqual(14, result.Links[0].Length);
}
[TestMethod]
public void TestMultiWikiLink()
{
LinkFormatterResult result = LinkFormatter.Format("This is a [[Wiki Link]] [[Wiki:Link]][[Wiki.Link]].");
Assert.AreEqual("This is a wiki:Wiki Link wiki:Wiki:Linkwiki:Wiki.Link.", result.Text);
Assert.AreEqual(3, result.Links.Count);
Assert.AreEqual("https://osu.ppy.sh/wiki/Wiki Link", result.Links[0].Url);
Assert.AreEqual(10, result.Links[0].Index);
Assert.AreEqual(14, result.Links[0].Length);
Assert.AreEqual("https://osu.ppy.sh/wiki/Wiki:Link", result.Links[1].Url);
Assert.AreEqual(25, result.Links[1].Index);
Assert.AreEqual(14, result.Links[1].Length);
Assert.AreEqual("https://osu.ppy.sh/wiki/Wiki.Link", result.Links[2].Url);
Assert.AreEqual(39, result.Links[2].Index);
Assert.AreEqual(14, result.Links[2].Length);
}
[TestMethod]
public void TestOldFormatLink()
{
LinkFormatterResult result = LinkFormatter.Format("This is a (simple test)[https://osu.ppy.sh].");
Assert.AreEqual("This is a simple test.", result.Text);
Assert.AreEqual(1, result.Links.Count);
Assert.AreEqual("https://osu.ppy.sh", result.Links[0].Url);
Assert.AreEqual(10, result.Links[0].Index);
Assert.AreEqual(11, result.Links[0].Length);
}
[TestMethod]
public void TestNewFormatLink()
{
LinkFormatterResult result = LinkFormatter.Format("This is a [https://osu.ppy.sh simple test].");
Assert.AreEqual("This is a simple test.", result.Text);
Assert.AreEqual(1, result.Links.Count);
Assert.AreEqual("https://osu.ppy.sh", result.Links[0].Url);
Assert.AreEqual(10, result.Links[0].Index);
Assert.AreEqual(11, result.Links[0].Length);
}
[TestMethod]
public void TestRecursiveBreaking()
{
LinkFormatterResult result = LinkFormatter.Format("This is a [https://osu.ppy.sh [[simple test]]].");
Assert.AreEqual("This is a [[simple test]].", result.Text);
Assert.AreEqual(1, result.Links.Count);
Assert.AreEqual("https://osu.ppy.sh", result.Links[0].Url);
Assert.AreEqual(10, result.Links[0].Index);
Assert.AreEqual(15, result.Links[0].Length);
}
[TestMethod]
public void TestStartOffset()
{
LinkFormatterResult result = LinkFormatter.Format("username[[test]] nothing to see here.", 15);
Assert.AreEqual(0, result.Links.Count);
result = LinkFormatter.Format("username[http://test nothing] to [[see]] here.", 21);
Assert.AreEqual(1, result.Links.Count);
}
[TestMethod]
public void TestLinkComplex()
{
LinkFormatterResult result = LinkFormatter.Format("This is a [http://www.simple-test.com simple test] with some [traps] and [[wiki links]]. Don't forget to visit https://osu.ppy.sh (now!)[http://google.com]\uD83D\uDE12");
Assert.AreEqual("This is a simple test with some [traps] and wiki:wiki links. Don't forget to visit https://osu.ppy.sh now!\0\0\0", result.Text);
Assert.AreEqual(5, result.Links.Count);
Link f = result.Links.Find(l => l.Url == "https://osu.ppy.sh/wiki/wiki links");
Assert.AreEqual(44, f.Index);
Assert.AreEqual(15, f.Length);
f = result.Links.Find(l => l.Url == "http://www.simple-test.com");
Assert.AreEqual(10, f.Index);
Assert.AreEqual(11, f.Length);
f = result.Links.Find(l => l.Url == "http://google.com");
Assert.AreEqual(101, f.Index);
Assert.AreEqual(4, f.Length);
f = result.Links.Find(l => l.Url == "https://osu.ppy.sh");
Assert.AreEqual(83, f.Index);
Assert.AreEqual(17, f.Length);
f = result.Links.Find(l => l.Url == "\uD83D\uDE12");
Assert.AreEqual(105, f.Index);
Assert.AreEqual(3, f.Length);
}
[TestMethod]
public void TestEmoji()
{
LinkFormatterResult result = LinkFormatter.Format("Hello world\uD83D\uDE12<--This is an emoji,There are more:\uD83D\uDE10\uD83D\uDE00,\uD83D\uDE20");
Assert.AreEqual("Hello world\0\0\0<--This is an emoji,There are more:\0\0\0\0\0\0,\0\0\0", result.Text);
Assert.AreEqual(result.Links.Count, 4);
Assert.AreEqual(result.Links[0].Index, 11);
Assert.AreEqual(result.Links[1].Index, 49);
Assert.AreEqual(result.Links[2].Index, 52);
Assert.AreEqual(result.Links[3].Index, 56);
Assert.AreEqual(result.Links[0].Url, "\uD83D\uDE12");
Assert.AreEqual(result.Links[1].Url, "\uD83D\uDE10");
Assert.AreEqual(result.Links[2].Url, "\uD83D\uDE00");
Assert.AreEqual(result.Links[3].Url, "\uD83D\uDE20");
}
[TestMethod]
public void TestSearchReg()
{
Regex reg = new Regex(@"(\w*)([\>|\<|\=]\=?)(\d+)");
string test1 = "od>8";
string test3 = "AR>=9";
Match m1 = reg.Match(test1);
Assert.AreEqual(m1.Groups.Count, 4);
Assert.AreEqual(m1.Groups[1].Value, "od");
Assert.AreEqual(m1.Groups[2].Value, ">");
Assert.AreEqual(m1.Groups[3].Value, "8");
m1 = reg.Match(test3);
Assert.AreEqual(m1.Groups.Count, 4);
Assert.AreEqual(m1.Groups[1].Value, "AR");
Assert.AreEqual(m1.Groups[2].Value, ">=");
Assert.AreEqual(m1.Groups[3].Value, "9");
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment