Skip to content

Instantly share code, notes, and snippets.

@thebitbrine
Created March 28, 2019 23:16
Show Gist options
  • Save thebitbrine/8b9d3557c71b3b4b7b46b3bdbe2a6d5f to your computer and use it in GitHub Desktop.
Save thebitbrine/8b9d3557c71b3b4b7b46b3bdbe2a6d5f to your computer and use it in GitHub Desktop.
Gets child nodes' text without removing spaces.
public string[] GetChildNodeTexts(HtmlNode Node)
{
List<string> OutputNodes = new List<string>();
var Nodes = Node.Descendants()
.Where(n => !n.HasChildNodes && n.InnerText != "\n" && !string.IsNullOrWhiteSpace(n.InnerText) &&
n.InnerText != "*")
.Select(n => n.InnerText.Replace("\n", "")).ToArray();
foreach (var InnerNode in Nodes)
{
if (!string.IsNullOrWhiteSpace(InnerNode.Replace("*", "")))
OutputNodes.Add(InnerNode);
}
return OutputNodes.ToArray();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment