Last active
July 4, 2018 10:01
-
-
Save guitarrapc/06018bfd1f02b7eb425e97ece0c60147 to your computer and use it in GitHub Desktop.
https://docs.microsoft.com/en-us/microsoftteams/platform/concepts/bots/bot-conversations/bots-conversations : Microsoft Teams Outgoing Webhook Message Parser. Aren't there parser exists for this not useful teams message format?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void Main() | |
{ | |
var sources = new[] | |
{ | |
"<at>cibot</at><strong>すとろんぐ</strong>", | |
"<at>cibot</at><strong>すとろんぐ</strong>ほげもげ", | |
"ほげもげ<at>cibot</at><strong>すとろんぐ</strong>ほげもげ", | |
"<at>cibot</at> こんにちは\n", | |
"<at>cibot</at> <img alt=\"404 エラー - 見つかりません\" src=\"https://as-api.asm.skype.com/v1/objects/0-ea-d5-f9a6e7404b54cf7e36924f8e684d3e36/views/imgo\" id=\"x_0-ea-d5-f9a6e7404b54cf7e36924f8e684d3e36\" itemscope=\"\" itemtype=\"http://schema.skype.com/AMSImage\" style=\"width:375px; height:250px\">\n", | |
"<at>cibot</at> \n", | |
"<at>cibot</at> こんちには\n", | |
"<at>cibot</at> <at><img alt=\"😁\" itemid=\"laugh\" itemscope=\"\" itemtype=\"http://schema.skype.com/Emoji\" src=\"https://statics.teams.microsoft.com/evergreen-assets/funstuff/skype-emoticons-f/laugh/default_20.png\" style=\"width:20px; height:20px\"></at>\n", | |
@"<at>cibot</at> hoge\n", | |
"<at>cibot</at> <at><img alt=\"❤\" itemid=\"heart\" itemscope=\"\" itemtype=\"http://schema.skype.com/Emoji\" src=\"https://statics.teams.microsoft.com/evergreen-assets/funstuff/skype-emoticons-f/heart/default_20.png\" style=\"width:20px; height:20px\"></at>", | |
"<at>cibot</at> <at><img alt=\"❤️\" itemid=\"heart\" itemscope=\"\" itemtype=\"http://schema.skype.com/Emoji\" src=\"https://statics.teams.microsoft.com/evergreen-assets/funstuff/skype-emoticons-f/heart/default_20.png\" style=\"width:20px; height:20px\"></at><at><img alt=\"🤗\" itemid=\"hug\" itemscope=\"\" itemtype=\"http://schema.skype.com/Emoji\" src=\"https://statics.teams.microsoft.com/evergreen-assets/funstuff/skype-emoticons-f/hug/default_20.png\" style=\"width:20px; height:20px\"></at><at><img alt=\"⛄\" itemid=\"snowangel\" itemscope=\"\" itemtype=\"http://schema.skype.com/Emoji\" src=\"https://statics.teams.microsoft.com/evergreen-assets/funstuff/skype-emoticons-f/snowangel/default_20.png\" style=\"width:20px; height:20px\"></at>\n", | |
"ほげもげ<at>cibot</at>ほげもげ<strong>すとろんぐ</strong> <at><img alt=\"😁\" itemid=\"laugh\" itemscope=\"\" itemtype=\"http://schema.skype.com/Emoji\" src=\"https://statics.teams.microsoft.com/evergreen-assets/funstuff/skype-emoticons-f/laugh/default_20.png\" style=\"width:20px; height:20px\"></at>という画像らしい\n", | |
"<at>cibot</at> <strong>ほげもげ</strong>\n\n<em>ふがうが</em>\n\n<em>ぴよぴよ</em>\n\n<u>ほげまsdふぁ</u>\n\n\r\n<blockquote>\n<u>なるほどね</u>\n</blockquote>\r\n\n", | |
"<at>cibot</at> <at>ほげもげ</at>\n<strong>ふぃががい</strong>\n\n<em>ぴよぴよ</em>\n\n<u>あああげ</u>\n\n\r\n<blockquote>\n<u>ほあsdふぁdふぁ</u>\n</blockquote>\r\n\n\n<a href=\"https://google.com\" rel=\"noreferrer noopener\" target=\"_blank\" title=\"https://google.com\">ほげもげ</a>\n\n\r\n<h2>asdfadfa</h2>\r\n\n\n", | |
"\r\n<div itemprop=\"copy-paste-block\"><at>cibot</at>\n\r\n<div style=\"font-size:14px\"><strong>ほげもげ</strong>\n\n\r\n<div style=\"font-size:14px\"><em>ふがうが</em>\n\n\r\n<div style=\"font-size:14px\"><em>ぴよぴよ</em>\n\n\r\n<div style=\"font-size:14px\"><u>ほげまsdふぁ</u>\n\n\r\n<blockquote style=\"font-size:14px\">\n<u>なるほどね</u>\n</blockquote>\r\n\n\n", | |
}; | |
var parser = new TeamsMessageParser(); | |
foreach (var source in sources) | |
{ | |
parser.Parse(source); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class TeamsMessageParser | |
{ | |
public ParsedMessageDetail Parse(string source) | |
{ | |
var currentIndex = 0; | |
var tokens = new List<MessageToken>(); | |
for (var i = 0; i < source.Length; i++) | |
{ | |
var currentTokenStatus = new MessageToken() | |
{ | |
Index = currentIndex++, | |
}; | |
var begin = 0; | |
int end = 0; | |
// tag handling | |
// < | |
if (source[i] == '<' && source[i + 1] != '/') | |
{ | |
// begin tag handling | |
(begin, currentTokenStatus.BeginTag, currentTokenStatus.AttributeSource) = DetectBeginTag(source, i); | |
i = begin; | |
// attribute handling | |
if (!string.IsNullOrWhiteSpace(currentTokenStatus.AttributeSource)) | |
{ | |
currentTokenStatus.Attributes = ParseAttribute(currentTokenStatus.AttributeSource, 0); | |
currentTokenStatus.AttributeType = currentTokenStatus.Tag; | |
} | |
// end tag handling | |
(end, currentTokenStatus.EndTag, currentTokenStatus.IsTagClosed) = DetectEndTag(source, begin, currentTokenStatus.Tag); | |
// content | |
var increment = 0; | |
if (currentTokenStatus.ContainsAttribute) | |
{ | |
currentTokenStatus.Content = source.ToCharArray().AsSpan(begin + " ".Length + currentTokenStatus.AttributeSource.Length + ">".Length, end - (begin + " ".Length + currentTokenStatus.AttributeSource.Length)).ToString(); | |
increment = currentTokenStatus.Content.Length + " ".Length + currentTokenStatus.AttributeSource.Length + currentTokenStatus.EndTag.Length; | |
// special handling - only img tag should set content manually. | |
if (currentTokenStatus.AttributeType == "img" && string.IsNullOrWhiteSpace(currentTokenStatus.Content)) | |
{ | |
// 後ろに全角1つ、半角2つの空白ないと文字がかぶったり表示されない... ハック.... | |
currentTokenStatus.Content = currentTokenStatus.FullQualifiedBeginTag + " "; | |
} | |
} | |
else | |
{ | |
currentTokenStatus.Content = source.ToCharArray().AsSpan(begin + 1, end - i).ToString(); | |
increment = currentTokenStatus.Content.Length + currentTokenStatus.EndTag.Length; | |
} | |
// content contains image attributes | |
if (IsImage(currentTokenStatus.Content, 0)) | |
{ | |
var contentBegin = 0; | |
var contentBeginTag = ""; | |
var contentAttributeSource = ""; | |
// begin tag handling | |
(contentBegin, contentBeginTag, contentAttributeSource) = DetectBeginTag(currentTokenStatus.Content, 0); | |
// attribute handling | |
if (!string.IsNullOrWhiteSpace(contentAttributeSource)) | |
{ | |
currentTokenStatus.Attributes = ParseAttribute(contentAttributeSource, 0); | |
currentTokenStatus.AttributeType = contentBeginTag.Replace("<", "").Replace(">", ""); | |
if (currentTokenStatus.AttributeType == "img") | |
{ | |
// 後ろに全角1つ、半角2つの空白ないと文字がかぶったり表示されない... ハック.... | |
currentTokenStatus.Content = currentTokenStatus.Content + " "; | |
} | |
} | |
} | |
// add to result | |
tokens.Add(currentTokenStatus); | |
// increment index | |
i += increment; | |
//currentTokenStatus.Dump(); | |
} | |
else | |
{ | |
// content handling | |
(begin, currentTokenStatus.Content) = ParseTaglessContent(source, i); | |
i += begin; | |
// add to result | |
tokens.Add(currentTokenStatus); | |
} | |
} | |
// Parse against self when Copy-Paste | |
if (tokens.Any(x => x.Tag == "div" && x.AttributeType == "div" && x.AttributeSource.Contains("copy-paste-block"))) | |
{ | |
var newSource = tokens.Select(x => x.Content).ToJoinedString(""); | |
return Parse(newSource); | |
} | |
//tokens.Dump(); | |
var result = new ParsedMessageDetail() | |
{ | |
Source = source, | |
To = tokens.Where(x => x.IsToSection).Select(x => x.Content).ToJoinedString(), | |
EchoMessage = tokens.Where(x => !x.IsToSection).Select(x => x.TaggedContent).ToJoinedString(), | |
TagglessMessage = tokens.Where(x => !x.IsToSection).Select(x => x.Content).ToJoinedString(), | |
Images = tokens.Where(x => !x.IsToSection).Where(x => x.IsImg).Select(x => x.Content).ToJoinedString(), | |
Tokens = tokens.ToArray(), | |
}; | |
return result; | |
} | |
private bool IsImage(string source, int startIndex) | |
{ | |
return source.Length - startIndex > 4 && source.AsSpan(startIndex, 4).ToString() == "<img"; | |
} | |
private (int legnth, string tag, string attribute) DetectBeginTag(string source, int startIndex) | |
{ | |
var tagLength = 0; | |
var begintagChars = new List<char>(); | |
var tagAttributeChars = new List<char>(); | |
begintagChars.Add(source[startIndex]); | |
for (var j = startIndex + 1; j < source.Length; j++) | |
{ | |
// ' ' | |
if (source[j] == ' ') | |
{ | |
// attribute detection | |
tagLength = j; | |
for (var k = j + 1; k < source.Length; k++) | |
{ | |
if (source[k] == '>') | |
{ | |
begintagChars.Add(source[k]); | |
break; | |
} | |
tagAttributeChars.Add(source[k]); | |
} | |
break; | |
} | |
// '>' | |
begintagChars.Add(source[j]); | |
if (source[j] == '>') | |
{ | |
tagLength = j; | |
break; | |
} | |
} | |
var begintag = begintagChars.ToJoinedString(); | |
//bgintag.Dump("begintag"); | |
var attribute = tagAttributeChars.ToJoinedString(); | |
//attribute.Dump(); | |
return (tagLength, begintag, attribute); | |
} | |
private string DetectAttribute(string source, int startIndex) | |
{ | |
var tagAttributeChars = new List<char>(); | |
for (var j = startIndex + 1; j < source.Length; j++) | |
{ | |
// ' ' | |
if (source[j] == ' ') | |
{ | |
// attribute detection | |
for (var k = j + 1; k < source.Length; k++) | |
{ | |
if (source[k] == '>') | |
{ | |
break; | |
} | |
tagAttributeChars.Add(source[k]); | |
} | |
break; | |
} | |
} | |
var attribute = tagAttributeChars.ToJoinedString(); | |
//attribute.Dump(); | |
return attribute; | |
} | |
private (int length, string tag, bool closed) DetectEndTag(string source, int startIndex, string tag) | |
{ | |
var length = 0; | |
var closed = false; | |
string endtag = ""; | |
var endtagChars = new List<char>(); | |
for (var j = startIndex + 1; j < source.Length; j++) | |
{ | |
if (source[j] == '<' && source[j + 1] == '/') | |
{ | |
// end tag should match with begin tag | |
if (source.AsSpan(j + "</".Length, tag.Length + 1).ToString() == tag + ">") | |
{ | |
endtagChars.Add(source[j]); | |
for (var k = j + 1; k < source.Length; k++) | |
{ | |
endtagChars.Add(source[k]); | |
if (source[k] == '>') | |
{ | |
break; | |
} | |
} | |
endtag = string.Join("", endtagChars); | |
closed = true; | |
//endtag.Dump("endtag"); | |
break; | |
} | |
} | |
else | |
{ | |
length = j; | |
} | |
} | |
return (length, endtag, closed); | |
} | |
private (int legnth, string content) ParseTaglessContent(string source, int startIndex) | |
{ | |
var length = 0; | |
var contentChars = new List<Char>(); | |
for (var j = startIndex; j < source.Length; j++) | |
{ | |
length++; | |
contentChars.Add(source[j]); | |
if (j < source.Length - 1 && source[j + 1] == '<') | |
{ | |
length--; | |
break; | |
} | |
} | |
var content = string.Join("", contentChars); | |
return (length, content); | |
} | |
private ContentAttribute[] ParseAttribute(string source, int startIndex) | |
{ | |
var attributes = new List<ContentAttribute>(); | |
var keyLength = 0; | |
for (var j = startIndex; j < source.Length - ">".Length; j++) | |
{ | |
if (source[j] != '=' && source[j + 1] != '"') | |
{ | |
keyLength++; | |
} | |
else | |
{ | |
var attribute = new ContentAttribute(); | |
attribute.Key = source.AsSpan(j - keyLength, keyLength).ToString(); | |
var valueLength = 0; | |
for (var k = j + "=\"".Length; k < source.Length; k++) | |
{ | |
if (source[k] != '"') | |
{ | |
valueLength++; | |
} | |
else | |
{ | |
attribute.Value = "\"" + source.AsSpan(k - valueLength, valueLength).ToString() + "\""; | |
break; | |
} | |
} | |
attribute.Source = source.AsSpan(j - attribute.Key.Length, attribute.Key.Length + attribute.Devide.Length + attribute.Value.Length).ToString(); | |
j += attribute.Source.Length - attribute.Key.Length; | |
keyLength = 0; | |
valueLength = 0; | |
attributes.Add(attribute); | |
} | |
} | |
return attributes.ToArray(); | |
} | |
} | |
public class ParsedMessageDetail | |
{ | |
public string Source { get; set; } | |
public string To { get; set; } | |
public string EchoMessage { get; set; } | |
public string TagglessMessage { get; set; } | |
public string Images { get; set; } | |
public MessageToken[] Tokens { get; set; } | |
} | |
public class MessageToken | |
{ | |
public int Index { get; set; } | |
public string Tag => BeginTag?.Replace("<", "").Replace(">", ""); | |
public bool IsToSection => Tag == "at" && !IsImg; | |
public string BeginTag { get; set; } | |
public string FullQualifiedBeginTag => string.IsNullOrWhiteSpace(BeginTag) | |
? "" | |
: Attributes == null || IsTagClosed | |
? BeginTag.ToCharArray().AsSpan(0, BeginTag.Length - 1).ToString() + BeginTag.ToCharArray().AsSpan(BeginTag.Length - 1, 1).ToString() | |
: BeginTag.ToCharArray().AsSpan(0, BeginTag.Length - 1).ToString() + " " + AttributeSource + BeginTag.ToCharArray().AsSpan(BeginTag.Length - 1, 1).ToString(); | |
public string EndTag { get; set; } | |
public bool IsTagClosed { get; set; } | |
public string Content { get; set; } | |
public bool IsImg => AttributeType == "img" && Attributes != null && Attributes.Any(); | |
public string TaggedContent => IsImg ? Content : FullQualifiedBeginTag + Content + EndTag; | |
public string AttributeSource { get; set; } | |
public string AttributeType { get; set; } | |
public bool ContainsAttribute => Attributes != null && Attributes.Any(); | |
public ContentAttribute[] Attributes { get; set; } | |
} | |
public class ContentAttribute | |
{ | |
public string Source { get; set; } | |
public string Devide { get; set; } = "="; | |
public string Key { get; set; } | |
public string Value { get; set; } | |
} | |
public static class EnumerableExtensions | |
{ | |
public static string ToJoinedString<T>(this IEnumerable<T> source, string separator = "") | |
{ | |
return string.Join(separator, source); | |
} | |
public static IEnumerable<TSource> Concat<TSource>(this IEnumerable<TSource> source, params TSource[] values) | |
{ | |
if (source == null) throw new ArgumentNullException("source"); | |
return source.ConcatCore(values); | |
} | |
private static IEnumerable<TSource> ConcatCore<TSource>(this IEnumerable<TSource> source, params TSource[] values) | |
{ | |
foreach (var item in source) yield return item; | |
foreach (var x in values) yield return x; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Result