Last active
September 14, 2015 22:29
-
-
Save aeshirey/dddb96006bb4d2e8cfc4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary> | |
/// Writes a set of <see cref="WikipediaArticle"/>s to disk in a simple binary format consisting of the article title and the plaintext contents. | |
/// </summary> | |
/// <param name="articles">A set of articles, probably from <see cref="ReadArticlesFromXmlDump"/></param> | |
/// <param name="outputFilename">The filename into which articles should be saved</param> | |
/// <returns>The number of articles written</returns> | |
public static int WriteToDisk(IEnumerable<WikipediaArticle> articles, string outputFilename) | |
{ | |
var numberOfArticles = 0; | |
using (var fh = File.Create(outputFilename)) | |
using (var bh = new BinaryWriter(fh)) | |
{ | |
foreach (var article in articles) | |
{ | |
bh.Write(article.Title); | |
bh.Write(article.Plaintext); | |
++numberOfArticles; | |
} | |
} | |
return numberOfArticles; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment