Skip to content

Instantly share code, notes, and snippets.

@guitarrapc
Last active July 4, 2018 10:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save guitarrapc/06018bfd1f02b7eb425e97ece0c60147 to your computer and use it in GitHub Desktop.
Save guitarrapc/06018bfd1f02b7eb425e97ece0c60147 to your computer and use it in GitHub Desktop.
https://docs.microsoft.com/en-us/microsoftteams/platform/concepts/bots/bot-conversations/bots-conversations : Microsoft Teams Outgoing Webhook Message Parser. Aren't there parser exists for this not useful teams message format?
void Main()
{
var sources = new[]
{
"<at>cibot</at><strong>すとろんぐ</strong>",
"<at>cibot</at><strong>すとろんぐ</strong>ほげもげ",
"ほげもげ<at>cibot</at><strong>すとろんぐ</strong>ほげもげ",
"<at>cibot</at> こんにちは\n",
"<at>cibot</at> <img alt=\"404 エラー - 見つかりません\" src=\"https://as-api.asm.skype.com/v1/objects/0-ea-d5-f9a6e7404b54cf7e36924f8e684d3e36/views/imgo\" id=\"x_0-ea-d5-f9a6e7404b54cf7e36924f8e684d3e36\" itemscope=\"\" itemtype=\"http://schema.skype.com/AMSImage\" style=\"width:375px; height:250px\">\n",
"<at>cibot</at>&nbsp;\n",
"<at>cibot</at>&nbsp;こんちには\n",
"<at>cibot</at> <at><img alt=\"😁\" itemid=\"laugh\" itemscope=\"\" itemtype=\"http://schema.skype.com/Emoji\" src=\"https://statics.teams.microsoft.com/evergreen-assets/funstuff/skype-emoticons-f/laugh/default_20.png\" style=\"width:20px; height:20px\"></at>\n",
@"<at>cibot</at> hoge\n",
"<at>cibot</at> <at><img alt=\"❤\" itemid=\"heart\" itemscope=\"\" itemtype=\"http://schema.skype.com/Emoji\" src=\"https://statics.teams.microsoft.com/evergreen-assets/funstuff/skype-emoticons-f/heart/default_20.png\" style=\"width:20px; height:20px\"></at>",
"<at>cibot</at> <at><img alt=\"❤️\" itemid=\"heart\" itemscope=\"\" itemtype=\"http://schema.skype.com/Emoji\" src=\"https://statics.teams.microsoft.com/evergreen-assets/funstuff/skype-emoticons-f/heart/default_20.png\" style=\"width:20px; height:20px\"></at><at><img alt=\"🤗\" itemid=\"hug\" itemscope=\"\" itemtype=\"http://schema.skype.com/Emoji\" src=\"https://statics.teams.microsoft.com/evergreen-assets/funstuff/skype-emoticons-f/hug/default_20.png\" style=\"width:20px; height:20px\"></at><at><img alt=\"⛄\" itemid=\"snowangel\" itemscope=\"\" itemtype=\"http://schema.skype.com/Emoji\" src=\"https://statics.teams.microsoft.com/evergreen-assets/funstuff/skype-emoticons-f/snowangel/default_20.png\" style=\"width:20px; height:20px\"></at>\n",
"ほげもげ<at>cibot</at>ほげもげ<strong>すとろんぐ</strong> <at><img alt=\"😁\" itemid=\"laugh\" itemscope=\"\" itemtype=\"http://schema.skype.com/Emoji\" src=\"https://statics.teams.microsoft.com/evergreen-assets/funstuff/skype-emoticons-f/laugh/default_20.png\" style=\"width:20px; height:20px\"></at>という画像らしい\n",
"<at>cibot</at> <strong>ほげもげ</strong>\n\n<em>ふがうが</em>\n\n<em>ぴよぴよ</em>\n\n<u>ほげまsdふぁ</u>\n\n\r\n<blockquote>\n<u>なるほどね</u>\n</blockquote>\r\n\n",
"<at>cibot</at>&nbsp;<at>ほげもげ</at>\n<strong>ふぃががい</strong>\n\n<em>ぴよぴよ</em>\n\n<u>あああげ</u>\n\n\r\n<blockquote>\n<u>ほあsdふぁdふぁ</u>\n</blockquote>\r\n\n\n<a href=\"https://google.com\" rel=\"noreferrer noopener\" target=\"_blank\" title=\"https://google.com\">ほげもげ</a>\n\n\r\n<h2>asdfadfa</h2>\r\n\n\n",
"\r\n<div itemprop=\"copy-paste-block\"><at>cibot</at>\n\r\n<div style=\"font-size:14px\"><strong>ほげもげ</strong>\n\n\r\n<div style=\"font-size:14px\"><em>ふがうが</em>\n\n\r\n<div style=\"font-size:14px\"><em>ぴよぴよ</em>\n\n\r\n<div style=\"font-size:14px\"><u>ほげまsdふぁ</u>\n\n\r\n<blockquote style=\"font-size:14px\">\n<u>なるほどね</u>\n</blockquote>\r\n\n\n",
};
var parser = new TeamsMessageParser();
foreach (var source in sources)
{
parser.Parse(source);
}
}
public class TeamsMessageParser
{
public ParsedMessageDetail Parse(string source)
{
var currentIndex = 0;
var tokens = new List<MessageToken>();
for (var i = 0; i < source.Length; i++)
{
var currentTokenStatus = new MessageToken()
{
Index = currentIndex++,
};
var begin = 0;
int end = 0;
// tag handling
// <
if (source[i] == '<' && source[i + 1] != '/')
{
// begin tag handling
(begin, currentTokenStatus.BeginTag, currentTokenStatus.AttributeSource) = DetectBeginTag(source, i);
i = begin;
// attribute handling
if (!string.IsNullOrWhiteSpace(currentTokenStatus.AttributeSource))
{
currentTokenStatus.Attributes = ParseAttribute(currentTokenStatus.AttributeSource, 0);
currentTokenStatus.AttributeType = currentTokenStatus.Tag;
}
// end tag handling
(end, currentTokenStatus.EndTag, currentTokenStatus.IsTagClosed) = DetectEndTag(source, begin, currentTokenStatus.Tag);
// content
var increment = 0;
if (currentTokenStatus.ContainsAttribute)
{
currentTokenStatus.Content = source.ToCharArray().AsSpan(begin + " ".Length + currentTokenStatus.AttributeSource.Length + ">".Length, end - (begin + " ".Length + currentTokenStatus.AttributeSource.Length)).ToString();
increment = currentTokenStatus.Content.Length + " ".Length + currentTokenStatus.AttributeSource.Length + currentTokenStatus.EndTag.Length;
// special handling - only img tag should set content manually.
if (currentTokenStatus.AttributeType == "img" && string.IsNullOrWhiteSpace(currentTokenStatus.Content))
{
// 後ろに全角1つ、半角2つの空白ないと文字がかぶったり表示されない... ハック....
currentTokenStatus.Content = currentTokenStatus.FullQualifiedBeginTag + "   ";
}
}
else
{
currentTokenStatus.Content = source.ToCharArray().AsSpan(begin + 1, end - i).ToString();
increment = currentTokenStatus.Content.Length + currentTokenStatus.EndTag.Length;
}
// content contains image attributes
if (IsImage(currentTokenStatus.Content, 0))
{
var contentBegin = 0;
var contentBeginTag = "";
var contentAttributeSource = "";
// begin tag handling
(contentBegin, contentBeginTag, contentAttributeSource) = DetectBeginTag(currentTokenStatus.Content, 0);
// attribute handling
if (!string.IsNullOrWhiteSpace(contentAttributeSource))
{
currentTokenStatus.Attributes = ParseAttribute(contentAttributeSource, 0);
currentTokenStatus.AttributeType = contentBeginTag.Replace("<", "").Replace(">", "");
if (currentTokenStatus.AttributeType == "img")
{
// 後ろに全角1つ、半角2つの空白ないと文字がかぶったり表示されない... ハック....
currentTokenStatus.Content = currentTokenStatus.Content + "   ";
}
}
}
// add to result
tokens.Add(currentTokenStatus);
// increment index
i += increment;
//currentTokenStatus.Dump();
}
else
{
// content handling
(begin, currentTokenStatus.Content) = ParseTaglessContent(source, i);
i += begin;
// add to result
tokens.Add(currentTokenStatus);
}
}
// Parse against self when Copy-Paste
if (tokens.Any(x => x.Tag == "div" && x.AttributeType == "div" && x.AttributeSource.Contains("copy-paste-block")))
{
var newSource = tokens.Select(x => x.Content).ToJoinedString("");
return Parse(newSource);
}
//tokens.Dump();
var result = new ParsedMessageDetail()
{
Source = source,
To = tokens.Where(x => x.IsToSection).Select(x => x.Content).ToJoinedString(),
EchoMessage = tokens.Where(x => !x.IsToSection).Select(x => x.TaggedContent).ToJoinedString(),
TagglessMessage = tokens.Where(x => !x.IsToSection).Select(x => x.Content).ToJoinedString(),
Images = tokens.Where(x => !x.IsToSection).Where(x => x.IsImg).Select(x => x.Content).ToJoinedString(),
Tokens = tokens.ToArray(),
};
return result;
}
private bool IsImage(string source, int startIndex)
{
return source.Length - startIndex > 4 && source.AsSpan(startIndex, 4).ToString() == "<img";
}
private (int legnth, string tag, string attribute) DetectBeginTag(string source, int startIndex)
{
var tagLength = 0;
var begintagChars = new List<char>();
var tagAttributeChars = new List<char>();
begintagChars.Add(source[startIndex]);
for (var j = startIndex + 1; j < source.Length; j++)
{
// ' '
if (source[j] == ' ')
{
// attribute detection
tagLength = j;
for (var k = j + 1; k < source.Length; k++)
{
if (source[k] == '>')
{
begintagChars.Add(source[k]);
break;
}
tagAttributeChars.Add(source[k]);
}
break;
}
// '>'
begintagChars.Add(source[j]);
if (source[j] == '>')
{
tagLength = j;
break;
}
}
var begintag = begintagChars.ToJoinedString();
//bgintag.Dump("begintag");
var attribute = tagAttributeChars.ToJoinedString();
//attribute.Dump();
return (tagLength, begintag, attribute);
}
private string DetectAttribute(string source, int startIndex)
{
var tagAttributeChars = new List<char>();
for (var j = startIndex + 1; j < source.Length; j++)
{
// ' '
if (source[j] == ' ')
{
// attribute detection
for (var k = j + 1; k < source.Length; k++)
{
if (source[k] == '>')
{
break;
}
tagAttributeChars.Add(source[k]);
}
break;
}
}
var attribute = tagAttributeChars.ToJoinedString();
//attribute.Dump();
return attribute;
}
private (int length, string tag, bool closed) DetectEndTag(string source, int startIndex, string tag)
{
var length = 0;
var closed = false;
string endtag = "";
var endtagChars = new List<char>();
for (var j = startIndex + 1; j < source.Length; j++)
{
if (source[j] == '<' && source[j + 1] == '/')
{
// end tag should match with begin tag
if (source.AsSpan(j + "</".Length, tag.Length + 1).ToString() == tag + ">")
{
endtagChars.Add(source[j]);
for (var k = j + 1; k < source.Length; k++)
{
endtagChars.Add(source[k]);
if (source[k] == '>')
{
break;
}
}
endtag = string.Join("", endtagChars);
closed = true;
//endtag.Dump("endtag");
break;
}
}
else
{
length = j;
}
}
return (length, endtag, closed);
}
private (int legnth, string content) ParseTaglessContent(string source, int startIndex)
{
var length = 0;
var contentChars = new List<Char>();
for (var j = startIndex; j < source.Length; j++)
{
length++;
contentChars.Add(source[j]);
if (j < source.Length - 1 && source[j + 1] == '<')
{
length--;
break;
}
}
var content = string.Join("", contentChars);
return (length, content);
}
private ContentAttribute[] ParseAttribute(string source, int startIndex)
{
var attributes = new List<ContentAttribute>();
var keyLength = 0;
for (var j = startIndex; j < source.Length - ">".Length; j++)
{
if (source[j] != '=' && source[j + 1] != '"')
{
keyLength++;
}
else
{
var attribute = new ContentAttribute();
attribute.Key = source.AsSpan(j - keyLength, keyLength).ToString();
var valueLength = 0;
for (var k = j + "=\"".Length; k < source.Length; k++)
{
if (source[k] != '"')
{
valueLength++;
}
else
{
attribute.Value = "\"" + source.AsSpan(k - valueLength, valueLength).ToString() + "\"";
break;
}
}
attribute.Source = source.AsSpan(j - attribute.Key.Length, attribute.Key.Length + attribute.Devide.Length + attribute.Value.Length).ToString();
j += attribute.Source.Length - attribute.Key.Length;
keyLength = 0;
valueLength = 0;
attributes.Add(attribute);
}
}
return attributes.ToArray();
}
}
public class ParsedMessageDetail
{
public string Source { get; set; }
public string To { get; set; }
public string EchoMessage { get; set; }
public string TagglessMessage { get; set; }
public string Images { get; set; }
public MessageToken[] Tokens { get; set; }
}
public class MessageToken
{
public int Index { get; set; }
public string Tag => BeginTag?.Replace("<", "").Replace(">", "");
public bool IsToSection => Tag == "at" && !IsImg;
public string BeginTag { get; set; }
public string FullQualifiedBeginTag => string.IsNullOrWhiteSpace(BeginTag)
? ""
: Attributes == null || IsTagClosed
? BeginTag.ToCharArray().AsSpan(0, BeginTag.Length - 1).ToString() + BeginTag.ToCharArray().AsSpan(BeginTag.Length - 1, 1).ToString()
: BeginTag.ToCharArray().AsSpan(0, BeginTag.Length - 1).ToString() + " " + AttributeSource + BeginTag.ToCharArray().AsSpan(BeginTag.Length - 1, 1).ToString();
public string EndTag { get; set; }
public bool IsTagClosed { get; set; }
public string Content { get; set; }
public bool IsImg => AttributeType == "img" && Attributes != null && Attributes.Any();
public string TaggedContent => IsImg ? Content : FullQualifiedBeginTag + Content + EndTag;
public string AttributeSource { get; set; }
public string AttributeType { get; set; }
public bool ContainsAttribute => Attributes != null && Attributes.Any();
public ContentAttribute[] Attributes { get; set; }
}
public class ContentAttribute
{
public string Source { get; set; }
public string Devide { get; set; } = "=";
public string Key { get; set; }
public string Value { get; set; }
}
public static class EnumerableExtensions
{
public static string ToJoinedString<T>(this IEnumerable<T> source, string separator = "")
{
return string.Join(separator, source);
}
public static IEnumerable<TSource> Concat<TSource>(this IEnumerable<TSource> source, params TSource[] values)
{
if (source == null) throw new ArgumentNullException("source");
return source.ConcatCore(values);
}
private static IEnumerable<TSource> ConcatCore<TSource>(this IEnumerable<TSource> source, params TSource[] values)
{
foreach (var item in source) yield return item;
foreach (var x in values) yield return x;
}
}
@guitarrapc
Copy link
Author

Result

image

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment