Created
September 12, 2018 16:29
-
-
Save felipebaltazar/3a84150f5c2a86418222bfcde0a9f860 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static string FiltrarConteudoHTML(string sTexto) { | |
var textOnly = sTexto; | |
var output = new StringBuilder(); | |
var htmlDoc = new HtmlDocument(); | |
try { | |
htmlDoc.LoadHtml(sTexto); | |
htmlDoc.DocumentNode.Descendants() | |
.Where(n => n.Name == "script" || n.Name == "comment" || n.Name == "div") | |
.ToList() | |
.ForEach(n => { | |
if (!n.InnerText.StartsWith("DOCTYPE") && n.Name != "div" | |
|| n.Attributes["id"]?.Value == "header") | |
n.Remove(); | |
}); | |
var linesCount = 0; | |
var text = htmlDoc.DocumentNode.SelectNodes("//body//text()").Select(node => node.InnerText); | |
foreach (string line in text) { | |
var result = Regex.Replace(line, @"\r\n?|\n", string.Empty); | |
if(linesCount < 2 && !string.IsNullOrEmpty(result) && !string.IsNullOrWhiteSpace(result)) { | |
output.AppendLine(result); | |
linesCount++; | |
} | |
} | |
textOnly = HttpUtility.HtmlDecode(output.ToString()); | |
} catch (Exception ex) { | |
TratarErro(ex,$"{nameof(Utils)}.{nameof(FiltrarConteudoHTML)}"); | |
} | |
return textOnly; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment