Created
November 30, 2017 16:31
-
-
Save peppy/fa0c51bfc14a7d553f8c75d61d59bbb6 to your computer and use it in GitHub Desktop.
LinkFormatter class from osu-stable. Released under public domain; use as necessary.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Text; | |
using System.Text.RegularExpressions; | |
namespace osu_common.Helpers | |
{ | |
public static class LinkFormatter | |
{ | |
//[[Performance Points]] -> wiki:Performance Points (https://osu.ppy.sh/wiki/Performance_Points) | |
static Regex regexWiki = new Regex(@"\[\[([^\]]+)\]\]"); | |
//(test)[https://osu.ppy.sh/b/1234] -> test (https://osu.ppy.sh/b/1234) | |
static Regex oldFormatLink = new Regex(@"\(([^\)]*)\)\[([a-z]+://[^ ]+)\]"); | |
//[https://osu.ppy.sh/b/1234 Beatmap [Hard] (poop)] -> Beatmap [hard] (poop) (https://osu.ppy.sh/b/1234) | |
static Regex newFormatLink = new Regex(@"\[([a-z]+://[^ ]+) ([^\[\]]*(((?<open>\[)[^\[\]]*)+((?<close-open>\])[^\[\]]*)+)*(?(open)(?!)))\]"); | |
//https://osu.ppy.sh -> https://osu.ppy.sh (https://osu.ppy.sh) | |
//static Regex basicLink = new Regex(@"[a-z]+://[^ ]+[a-zA-Z0-9=/\?]"); | |
// advanced, RFC-compatible version of basicLink that matches any possible URL, *but* allows certain invalid characters that are widely used | |
// This is in the format (<required>, [optional]): | |
// http[s]://<domain>.<tld>[:port][/path][?query][#fragment] | |
static Regex advancedLink = new Regex(@"(?<paren>\([^)]*)?" + | |
@"(?<link>https?:\/\/" + | |
@"(?<domain>(?:[a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*[a-z][a-z0-9-]*[a-z0-9]" + // domain, TLD | |
@"(?::\d+)?)" + // port | |
@"(?<path>(?:(?:\/+(?:[a-z0-9$_\.\+!\*\',;:\(\)@&~=-]|%[0-9a-f]{2})*)*" + // path | |
@"(?:\?(?:[a-z0-9$_\+!\*\',;:\(\)@&=\/~-]|%[0-9a-f]{2})*)?)?" + // query | |
@"(?:#(?:[a-z0-9$_\+!\*\',;:\(\)@&=\/~-]|%[0-9a-f]{2})*)?)?)", // fragment | |
RegexOptions.IgnoreCase); | |
//00:00:000 (1,2,3) - test | |
static Regex timeMatch = new Regex(@"\d\d:\d\d:\d\d\d? [^-]*"); | |
//#osu | |
static Regex channelMatch = new Regex(@"#[a-zA-Z]+[a-zA-Z0-9]+"); | |
// \:01 | |
//static Regex emoji = new Regex(@"\\\:\d\d"); | |
static Regex emoji = new Regex(@"(\uD83D[\uDC00-\uDE4F])"); | |
private static void handleAdvanced(Regex against, LinkFormatterResult result, int startIndex = 0) | |
{ | |
foreach (Match m in against.Matches(result.Text, startIndex)) | |
{ | |
int index = m.Index; | |
string prefix = m.Groups["paren"].Value; | |
string link = m.Groups["link"].Value; | |
int indexLength = link.Length; | |
if (!String.IsNullOrEmpty(prefix)) | |
{ | |
index += prefix.Length; | |
if (link.EndsWith(")")) | |
{ | |
indexLength = indexLength - 1; | |
link = link.Remove(link.Length - 1); | |
} | |
} | |
result.Links.Add(new Link(link, index, indexLength)); | |
} | |
} | |
private static void handleMatches(Regex against, string display, string link, LinkFormatterResult result, int startIndex = 0) | |
{ | |
int captureOffset = 0; | |
foreach (Match m in against.Matches(result.Text, startIndex)) | |
{ | |
int index = m.Index - captureOffset; | |
string displayText = string.Format(display, | |
m.Groups[0], | |
m.Groups.Count > 1 ? m.Groups[1].Value : "", | |
m.Groups.Count > 2 ? m.Groups[2].Value : "").Trim(); | |
string linkText = string.Format(link, | |
m.Groups[0], | |
m.Groups.Count > 1 ? m.Groups[1].Value : "", | |
m.Groups.Count > 2 ? m.Groups[2].Value : "").Trim(); | |
if (displayText.Length == 0 || linkText.Length == 0) continue; | |
//ensure we don't have encapsulated links. | |
if (result.Links.Find(l => (l.Index <= index && l.Index + l.Length >= index + m.Length) || (index <= l.Index && index + m.Length >= l.Index + l.Length)) == null) | |
{ | |
result.Text = result.Text.Remove(index, m.Length).Insert(index, displayText); | |
//since we just changed the line display text, offset any already processed links. | |
result.Links.ForEach(l => l.Index -= l.Index > index ? m.Length - displayText.Length : 0); | |
result.Links.Add(new Link(linkText, index, displayText.Length)); | |
//adjust the offset for processing the current matches group. | |
captureOffset += (m.Length - displayText.Length); | |
} | |
} | |
} | |
public static LinkFormatterResult Format(string input, int startIndex = 0, int space = 3) | |
{ | |
LinkFormatterResult result = new LinkFormatterResult(input); | |
// handle the [link display] format | |
handleMatches(newFormatLink, "{2}", "{1}", result, startIndex); | |
// handle the ()[] link format | |
handleMatches(oldFormatLink, "{1}", "{2}", result, startIndex); | |
// handle wiki links | |
handleMatches(regexWiki, "wiki:{1}", "https://osu.ppy.sh/wiki/{1}", result, startIndex); | |
// handle bare links | |
handleAdvanced(advancedLink, result, startIndex); | |
// handle editor times | |
handleMatches(timeMatch, "{0}", "osu://edit/{0}", result, startIndex); | |
// handle channels | |
handleMatches(channelMatch, "{0}", "osu://chan/{0}", result, startIndex); | |
string empty = ""; | |
while (space-- > 0) | |
empty += "\0"; | |
handleMatches(emoji, empty, "{0}", result, startIndex); // 3 space,handleMatches will trim all empty char except \0 | |
//result.Text = result.Text.Replace('\0', ' '); | |
return result; | |
} | |
} | |
public class Link | |
{ | |
public string Url; | |
public int Index; | |
public int Length; | |
public Link(string url, int startIndex, int length) | |
{ | |
Url = url; | |
Index = startIndex; | |
Length = length; | |
} | |
} | |
public class LinkFormatterResult : ICloneable | |
{ | |
public List<Link> Links = new List<Link>(); | |
public string Text; | |
public string OriginalText; | |
public LinkFormatterResult(string text) | |
{ | |
OriginalText = Text = text; | |
} | |
#region ICloneable Members | |
public object Clone() | |
{ | |
return this.MemberwiseClone(); | |
} | |
#endregion | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Microsoft.VisualStudio.TestTools.UnitTesting; | |
using osu_common.Helpers; | |
using System.Text.RegularExpressions; | |
namespace osu_common | |
{ | |
[TestClass] | |
public class LinkFormatterTests | |
{ | |
[TestMethod] | |
public void TestBareLink() | |
{ | |
LinkFormatterResult result = LinkFormatter.Format("This is a http://www.basic-link.com/?test=test."); | |
Assert.AreEqual("This is a http://www.basic-link.com/?test=test.", result.Text); | |
Assert.AreEqual(1, result.Links.Count); | |
Assert.AreEqual("http://www.basic-link.com/?test=test", result.Links[0].Url); | |
Assert.AreEqual(10, result.Links[0].Index); | |
Assert.AreEqual(36, result.Links[0].Length); | |
} | |
[TestMethod] | |
public void TestMultipleComplexLinks() | |
{ | |
LinkFormatterResult result = LinkFormatter.Format("This is a http://test.io/link#fragment. (see https://twitter.com). Also, This string should not be altered. http://example.com/"); | |
Assert.AreEqual("This is a http://test.io/link#fragment. (see https://twitter.com). Also, This string should not be altered. http://example.com/", result.Text); | |
Assert.AreEqual(3, result.Links.Count); | |
Assert.AreEqual("http://test.io/link#fragment", result.Links[0].Url); | |
Assert.AreEqual(10, result.Links[0].Index); | |
Assert.AreEqual(28, result.Links[0].Length); | |
Assert.AreEqual("https://twitter.com", result.Links[1].Url); | |
Assert.AreEqual(45, result.Links[1].Index); | |
Assert.AreEqual(19, result.Links[1].Length); | |
Assert.AreEqual("http://example.com/", result.Links[2].Url); | |
Assert.AreEqual(108, result.Links[2].Index); | |
Assert.AreEqual(19, result.Links[2].Length); | |
} | |
[TestMethod] | |
public void TestAjaxLinks() | |
{ | |
LinkFormatterResult result = LinkFormatter.Format("https://twitter.com/#!/hashbanglinks"); | |
Assert.AreEqual("https://twitter.com/#!/hashbanglinks", result.Text); | |
Assert.AreEqual(0, result.Links[0].Index); | |
Assert.AreEqual(36, result.Links[0].Length); | |
} | |
[TestMethod] | |
public void TestUnixHomeLinks() | |
{ | |
LinkFormatterResult result = LinkFormatter.Format("http://www.chiark.greenend.org.uk/~sgtatham/putty/"); | |
Assert.AreEqual("http://www.chiark.greenend.org.uk/~sgtatham/putty/", result.Text); | |
Assert.AreEqual(0, result.Links[0].Index); | |
Assert.AreEqual(50, result.Links[0].Length); | |
} | |
[TestMethod] | |
public void TestCaseInsensitiveLinks() | |
{ | |
LinkFormatterResult result = LinkFormatter.Format("look: http://puu.sh/7Ggh8xcC6/asf0asd9876.NEF"); | |
Assert.AreEqual("look: http://puu.sh/7Ggh8xcC6/asf0asd9876.NEF", result.Text); | |
Assert.AreEqual(6, result.Links[0].Index); | |
Assert.AreEqual(39, result.Links[0].Length); | |
} | |
[TestMethod] | |
public void TestWikiLink() | |
{ | |
LinkFormatterResult result = LinkFormatter.Format("This is a [[Wiki Link]]."); | |
Assert.AreEqual("This is a wiki:Wiki Link.", result.Text); | |
Assert.AreEqual(1, result.Links.Count); | |
Assert.AreEqual("https://osu.ppy.sh/wiki/Wiki Link", result.Links[0].Url); | |
Assert.AreEqual(10, result.Links[0].Index); | |
Assert.AreEqual(14, result.Links[0].Length); | |
} | |
[TestMethod] | |
public void TestMultiWikiLink() | |
{ | |
LinkFormatterResult result = LinkFormatter.Format("This is a [[Wiki Link]] [[Wiki:Link]][[Wiki.Link]]."); | |
Assert.AreEqual("This is a wiki:Wiki Link wiki:Wiki:Linkwiki:Wiki.Link.", result.Text); | |
Assert.AreEqual(3, result.Links.Count); | |
Assert.AreEqual("https://osu.ppy.sh/wiki/Wiki Link", result.Links[0].Url); | |
Assert.AreEqual(10, result.Links[0].Index); | |
Assert.AreEqual(14, result.Links[0].Length); | |
Assert.AreEqual("https://osu.ppy.sh/wiki/Wiki:Link", result.Links[1].Url); | |
Assert.AreEqual(25, result.Links[1].Index); | |
Assert.AreEqual(14, result.Links[1].Length); | |
Assert.AreEqual("https://osu.ppy.sh/wiki/Wiki.Link", result.Links[2].Url); | |
Assert.AreEqual(39, result.Links[2].Index); | |
Assert.AreEqual(14, result.Links[2].Length); | |
} | |
[TestMethod] | |
public void TestOldFormatLink() | |
{ | |
LinkFormatterResult result = LinkFormatter.Format("This is a (simple test)[https://osu.ppy.sh]."); | |
Assert.AreEqual("This is a simple test.", result.Text); | |
Assert.AreEqual(1, result.Links.Count); | |
Assert.AreEqual("https://osu.ppy.sh", result.Links[0].Url); | |
Assert.AreEqual(10, result.Links[0].Index); | |
Assert.AreEqual(11, result.Links[0].Length); | |
} | |
[TestMethod] | |
public void TestNewFormatLink() | |
{ | |
LinkFormatterResult result = LinkFormatter.Format("This is a [https://osu.ppy.sh simple test]."); | |
Assert.AreEqual("This is a simple test.", result.Text); | |
Assert.AreEqual(1, result.Links.Count); | |
Assert.AreEqual("https://osu.ppy.sh", result.Links[0].Url); | |
Assert.AreEqual(10, result.Links[0].Index); | |
Assert.AreEqual(11, result.Links[0].Length); | |
} | |
[TestMethod] | |
public void TestRecursiveBreaking() | |
{ | |
LinkFormatterResult result = LinkFormatter.Format("This is a [https://osu.ppy.sh [[simple test]]]."); | |
Assert.AreEqual("This is a [[simple test]].", result.Text); | |
Assert.AreEqual(1, result.Links.Count); | |
Assert.AreEqual("https://osu.ppy.sh", result.Links[0].Url); | |
Assert.AreEqual(10, result.Links[0].Index); | |
Assert.AreEqual(15, result.Links[0].Length); | |
} | |
[TestMethod] | |
public void TestStartOffset() | |
{ | |
LinkFormatterResult result = LinkFormatter.Format("username[[test]] nothing to see here.", 15); | |
Assert.AreEqual(0, result.Links.Count); | |
result = LinkFormatter.Format("username[http://test nothing] to [[see]] here.", 21); | |
Assert.AreEqual(1, result.Links.Count); | |
} | |
[TestMethod] | |
public void TestLinkComplex() | |
{ | |
LinkFormatterResult result = LinkFormatter.Format("This is a [http://www.simple-test.com simple test] with some [traps] and [[wiki links]]. Don't forget to visit https://osu.ppy.sh (now!)[http://google.com]\uD83D\uDE12"); | |
Assert.AreEqual("This is a simple test with some [traps] and wiki:wiki links. Don't forget to visit https://osu.ppy.sh now!\0\0\0", result.Text); | |
Assert.AreEqual(5, result.Links.Count); | |
Link f = result.Links.Find(l => l.Url == "https://osu.ppy.sh/wiki/wiki links"); | |
Assert.AreEqual(44, f.Index); | |
Assert.AreEqual(15, f.Length); | |
f = result.Links.Find(l => l.Url == "http://www.simple-test.com"); | |
Assert.AreEqual(10, f.Index); | |
Assert.AreEqual(11, f.Length); | |
f = result.Links.Find(l => l.Url == "http://google.com"); | |
Assert.AreEqual(101, f.Index); | |
Assert.AreEqual(4, f.Length); | |
f = result.Links.Find(l => l.Url == "https://osu.ppy.sh"); | |
Assert.AreEqual(83, f.Index); | |
Assert.AreEqual(17, f.Length); | |
f = result.Links.Find(l => l.Url == "\uD83D\uDE12"); | |
Assert.AreEqual(105, f.Index); | |
Assert.AreEqual(3, f.Length); | |
} | |
[TestMethod] | |
public void TestEmoji() | |
{ | |
LinkFormatterResult result = LinkFormatter.Format("Hello world\uD83D\uDE12<--This is an emoji,There are more:\uD83D\uDE10\uD83D\uDE00,\uD83D\uDE20"); | |
Assert.AreEqual("Hello world\0\0\0<--This is an emoji,There are more:\0\0\0\0\0\0,\0\0\0", result.Text); | |
Assert.AreEqual(result.Links.Count, 4); | |
Assert.AreEqual(result.Links[0].Index, 11); | |
Assert.AreEqual(result.Links[1].Index, 49); | |
Assert.AreEqual(result.Links[2].Index, 52); | |
Assert.AreEqual(result.Links[3].Index, 56); | |
Assert.AreEqual(result.Links[0].Url, "\uD83D\uDE12"); | |
Assert.AreEqual(result.Links[1].Url, "\uD83D\uDE10"); | |
Assert.AreEqual(result.Links[2].Url, "\uD83D\uDE00"); | |
Assert.AreEqual(result.Links[3].Url, "\uD83D\uDE20"); | |
} | |
[TestMethod] | |
public void TestSearchReg() | |
{ | |
Regex reg = new Regex(@"(\w*)([\>|\<|\=]\=?)(\d+)"); | |
string test1 = "od>8"; | |
string test3 = "AR>=9"; | |
Match m1 = reg.Match(test1); | |
Assert.AreEqual(m1.Groups.Count, 4); | |
Assert.AreEqual(m1.Groups[1].Value, "od"); | |
Assert.AreEqual(m1.Groups[2].Value, ">"); | |
Assert.AreEqual(m1.Groups[3].Value, "8"); | |
m1 = reg.Match(test3); | |
Assert.AreEqual(m1.Groups.Count, 4); | |
Assert.AreEqual(m1.Groups[1].Value, "AR"); | |
Assert.AreEqual(m1.Groups[2].Value, ">="); | |
Assert.AreEqual(m1.Groups[3].Value, "9"); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment