Skip to content

Instantly share code, notes, and snippets.

@JamesSkemp
Last active February 10, 2016 19:43
Show Gist options
  • Save JamesSkemp/3245ca2e03655395ce9c to your computer and use it in GitHub Desktop.
Save JamesSkemp/3245ca2e03655395ce9c to your computer and use it in GitHub Desktop.
Search a WordPress XML export for text in pages/posts.
// For running in LINQPad.
var typesToExclude = new List<string>() { "nav_menu_item", "attachment" };
var typesToInclude = new List<string>() { "page", "post" };
var searchTerm = "example.com/wp-content/uploads";
var xmlPath = @"C:\Users\jskemp\Downloads\example.wordpress.2016-01-22.xml";
var xml = XDocument.Load(xmlPath);
var xmlItems = xml
.Root.Element("channel")
.Elements("item");
("Items in XML: " + xmlItems.Count()).Dump();
XNamespace wpNs = "http://wordpress.org/export/1.2/";
XNamespace contentNs = "http://purl.org/rss/1.0/modules/content/";
xmlItems = xmlItems
//.Where(i => !typesToExclude.Contains(i.Element(wpNs + "post_type").Value))
.Where(i => typesToInclude.Contains(i.Element(wpNs + "post_type").Value))
;
xmlItems.Count().Dump();
xmlItems
.Where(i => i.Element(contentNs + "encoded").Value.Contains(searchTerm))
.OrderByDescending(i => i.Element(wpNs + "post_date").Value)
.Select (i => new { title = i.Element("title").Value, guid = i.Element("guid").Value, content = i.Element(contentNs + "encoded"), date = i.Element(wpNs + "post_date").Value })
.Dump();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment