Skip to content

Instantly share code, notes, and snippets.

@ayende
Created April 17, 2014 04:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ayende/10953417 to your computer and use it in GitHub Desktop.
Save ayende/10953417 to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Threading.Tasks;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Version = Lucene.Net.Util.Version;
namespace ConsoleApplication1
{
internal class Program
{
private static void Main(string[] args)
{
using (var fti = FSDirectory.Open("index"))
using (var writer = new IndexWriter(fti, new StandardAnalyzer(Version.LUCENE_30), IndexWriter.MaxFieldLength.UNLIMITED))
{
var tasks = new List<Task>();
var queue = new BlockingCollection<string>();
for (int i = 0; i < Environment.ProcessorCount; i++)
{
tasks.Add(Task.Run(() =>
{
var doc = new Document();
var field = new Field("Title", string.Empty, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS);
doc.Add(field);
while (queue.IsCompleted == false)
{
var line = queue.Take();
if (line == null)
break;
field.SetValue(line);
writer.AddDocument(doc);
}
}));
}
var sp = Stopwatch.StartNew();
int count = 0;
using (var reader = new StreamReader(new GZipStream(File.OpenRead(@"C:\Work\Corax\Tryouts\bin\Debug\titles.gz"), CompressionMode.Decompress)))
{
while (true)
{
var line = reader.ReadLine();
if (line == null)
break;
queue.Add(line);
if (++count%50000 == 0)
{
Console.WriteLine("{0,10:#,#}: {1}", count, line);
}
}
queue.CompleteAdding();
Console.WriteLine("Reading {0:#,#} in {1}", count, sp.Elapsed);
}
sp.Restart();
while (Task.WaitAll(tasks.ToArray(), 1000) == false)
{
Console.Write("\r{0,10:#,#} ", queue.Count);
}
Console.WriteLine("\rIndexing " + sp.Elapsed);
sp.Restart();
writer.Commit();
Console.WriteLine("\rCommitting " + sp.Elapsed);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment