Skip to content

Instantly share code, notes, and snippets.

@sebgod
Created April 26, 2014 05:38
Show Gist options
  • Save sebgod/11312621 to your computer and use it in GitHub Desktop.
Save sebgod/11312621 to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace System.Linq
{
public static class LuceneNetExtensions
{
/// <summary>
/// <para>
/// Replacement for list.OrderBy(?).Take(n)
/// </para>
/// <para>
/// 1- Low Memory usage (since this method requires only ~"2*n" elements be in memory)
/// </para>
/// 2- Speed up by an order of magnitude especially when the input is large.
/// </summary>
public static IEnumerable<T> TakeOrdered<T, TKey>(this IEnumerable<T> list, int n, Func<T, TKey> keySelector, bool ascending = true) where TKey : IComparable<TKey>
{
var pq = new MyPriorityQueue<T>(n,
ascending
?
(Func<T, T, bool>)((a, b) => keySelector(a).CompareTo(keySelector(b)) >= 0)
:
(Func<T, T, bool>)((a, b) => keySelector(a).CompareTo(keySelector(b)) < 0)
);
Stack<T> stack = new Stack<T>();
int count = 0;
foreach (T item in list)
{
pq.InsertWithOverflow(item);
count++;
}
int min = Math.Min(count, n);
for (int i = 0; i < min; i++)
stack.Push(pq.Pop());
for (int i = 0; i < min; i++)
yield return stack.Pop();
}
public static IEnumerable<T> TakeOrdered<T>(this IEnumerable<T> list, int n, bool ascending = true) where T : IComparable<T>
{
return TakeOrdered(list, n, x => x,ascending);
}
class MyPriorityQueue<T> : Lucene.Net.Util.PriorityQueue<T>
{
Func<T, T, bool> _LessThan;
internal MyPriorityQueue(int n, Func<T, T, bool> lessThan)
{
_LessThan = lessThan;
Initialize(n);
}
public override bool LessThan(T a, T b)
{
return _LessThan(a, b);
}
}
}
}
#region Lucene.Net
/// --------------------------------------------------------------------------------------
/// PriorityQueue from Lucene.Net
/// http://incubator.apache.org/lucene.net/
///
/// Remove this part if you already have a reference to Lucene.Net
/// --------------------------------------------------------------------------------------
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
namespace Lucene.Net.Util
{
/// <summary>A PriorityQueue maintains a partial ordering of its elements such that the
/// least element can always be found in constant time. Put()'s and pop()'s
/// require log(size) time.
///
/// <p/><b>NOTE</b>: This class pre-allocates a full array of
/// length <code>maxSize+1</code>, in {@link #initialize}.
///
/// </summary>
public abstract class PriorityQueue<T>
{
private int size;
private int maxSize;
protected internal T[] heap;
/// <summary>Determines the ordering of objects in this priority queue. Subclasses
/// must define this one method.
/// </summary>
public abstract bool LessThan(T a, T b);
/// <summary> This method can be overridden by extending classes to return a sentinel
/// object which will be used by {@link #Initialize(int)} to fill the queue, so
/// that the code which uses that queue can always assume it's full and only
/// change the top without attempting to insert any new object.<br/>
///
/// Those sentinel values should always compare worse than any non-sentinel
/// value (i.e., {@link #LessThan(Object, Object)} should always favor the
/// non-sentinel values).<br/>
///
/// By default, this method returns false, which means the queue will not be
/// filled with sentinel values. Otherwise, the value returned will be used to
/// pre-populate the queue. Adds sentinel values to the queue.<br/>
///
/// If this method is extended to return a non-null value, then the following
/// usage pattern is recommended:
///
/// <pre>
/// // extends getSentinelObject() to return a non-null value.
/// PriorityQueue pq = new MyQueue(numHits);
/// // save the 'top' element, which is guaranteed to not be null.
/// MyObject pqTop = (MyObject) pq.top();
/// &lt;...&gt;
/// // now in order to add a new element, which is 'better' than top (after
/// // you've verified it is better), it is as simple as:
/// pqTop.change().
/// pqTop = pq.updateTop();
/// </pre>
///
/// <b>NOTE:</b> if this method returns a non-null value, it will be called by
/// {@link #Initialize(int)} {@link #Size()} times, relying on a new object to
/// be returned and will not check if it's null again. Therefore you should
/// ensure any call to this method creates a new instance and behaves
/// consistently, e.g., it cannot return null if it previously returned
/// non-null.
///
/// </summary>
/// <returns> the sentinel object to use to pre-populate the queue, or null if
/// sentinel objects are not supported.
/// </returns>
protected internal virtual T GetSentinelObject()
{
return default(T);
}
/// <summary>Subclass constructors must call this. </summary>
protected internal void Initialize(int maxSize)
{
size = 0;
int heapSize;
if (0 == maxSize)
// We allocate 1 extra to avoid if statement in top()
heapSize = 2;
else
{
if (maxSize == Int32.MaxValue)
{
// Don't wrap heapSize to -1, in this case, which
// causes a confusing NegativeArraySizeException.
// Note that very likely this will simply then hit
// an OOME, but at least that's more indicative to
// caller that this values is too big. We don't +1
// in this case, but it's very unlikely in practice
// one will actually insert this many objects into
// the PQ:
heapSize = Int32.MaxValue;
}
else
{
// NOTE: we add +1 because all access to heap is
// 1-based not 0-based. heap[0] is unused.
heapSize = maxSize + 1;
}
}
heap = new T[heapSize];
this.maxSize = maxSize;
// If sentinel objects are supported, populate the queue with them
T sentinel = GetSentinelObject();
if (sentinel != null && !sentinel.Equals(default(T)))
{
heap[1] = sentinel;
for (int i = 2; i < heap.Length; i++)
{
heap[i] = GetSentinelObject();
}
size = maxSize;
}
}
/// <summary> Adds an Object to a PriorityQueue in log(size) time. If one tries to add
/// more objects than maxSize from initialize a RuntimeException
/// (ArrayIndexOutOfBound) is thrown.
///
/// </summary>
/// <deprecated> use {@link #Add(Object)} which returns the new top object,
/// saving an additional call to {@link #Top()}.
/// </deprecated>
[Obsolete("use Add(Object) which returns the new top object, saving an additional call to Top().")]
public void Put(T element)
{
size++;
heap[size] = element;
UpHeap();
}
/// <summary> Adds an Object to a PriorityQueue in log(size) time. If one tries to add
/// more objects than maxSize from initialize an
/// {@link ArrayIndexOutOfBoundsException} is thrown.
///
/// </summary>
/// <returns> the new 'top' element in the queue.
/// </returns>
public T Add(T element)
{
size++;
heap[size] = element;
UpHeap();
return heap[1];
}
/// <summary> Adds element to the PriorityQueue in log(size) time if either the
/// PriorityQueue is not full, or not lessThan(element, top()).
///
/// </summary>
/// <param name="element">
/// </param>
/// <returns> true if element is added, false otherwise.
/// </returns>
/// <deprecated> use {@link #InsertWithOverflow(Object)} instead, which
/// encourages objects reuse.
/// </deprecated>
[Obsolete("use InsertWithOverflow(Object) instead, which encourages objects reuse.")]
public virtual bool Insert(T element)
{
return !element.Equals(InsertWithOverflow(element));
}
/// <summary> insertWithOverflow() is the same as insert() except its
/// return value: it returns the object (if any) that was
/// dropped off the heap because it was full. This can be
/// the given parameter (in case it is smaller than the
/// full heap's minimum, and couldn't be added), or another
/// object that was previously the smallest value in the
/// heap and now has been replaced by a larger one, or null
/// if the queue wasn't yet full with maxSize elements.
/// </summary>
public virtual T InsertWithOverflow(T element)
{
if (size < maxSize)
{
Put(element);
return default(T);
}
else if (size > 0 && !LessThan(element, heap[1]))
{
T ret = heap[1];
heap[1] = element;
AdjustTop();
return ret;
}
else
{
return element;
}
}
/// <summary>Returns the least element of the PriorityQueue in constant time. </summary>
public T Top()
{
// We don't need to check size here: if maxSize is 0,
// then heap is length 2 array with both entries null.
// If size is 0 then heap[1] is already null.
return heap[1];
}
/// <summary>Removes and returns the least element of the PriorityQueue in log(size)
/// time.
/// </summary>
public T Pop()
{
if (size > 0)
{
T result = heap[1]; // save first value
heap[1] = heap[size]; // move last to first
heap[size] = default(T); // permit GC of objects
size--;
DownHeap(); // adjust heap
return result;
}
else
return default(T);
}
/// <summary> Should be called when the Object at top changes values. Still log(n) worst
/// case, but it's at least twice as fast to
///
/// <pre>
/// pq.top().change();
/// pq.adjustTop();
/// </pre>
///
/// instead of
///
/// <pre>
/// o = pq.pop();
/// o.change();
/// pq.push(o);
/// </pre>
///
/// </summary>
/// <deprecated> use {@link #UpdateTop()} which returns the new top element and
/// saves an additional call to {@link #Top()}.
/// </deprecated>
[Obsolete("use UpdateTop() which returns the new top element and saves an additional call to Top()")]
public void AdjustTop()
{
DownHeap();
}
/// <summary> Should be called when the Object at top changes values. Still log(n) worst
/// case, but it's at least twice as fast to
///
/// <pre>
/// pq.top().change();
/// pq.updateTop();
/// </pre>
///
/// instead of
///
/// <pre>
/// o = pq.pop();
/// o.change();
/// pq.push(o);
/// </pre>
///
/// </summary>
/// <returns> the new 'top' element.
/// </returns>
public T UpdateTop()
{
DownHeap();
return heap[1];
}
/// <summary>Returns the number of elements currently stored in the PriorityQueue. </summary>
public int Size()
{
return size;
}
/// <summary>Removes all entries from the PriorityQueue. </summary>
public void Clear()
{
for (int i = 0; i <= size; i++)
{
heap[i] = default(T);
}
size = 0;
}
private void UpHeap()
{
int i = size;
T node = heap[i]; // save bottom node
int j = URShift(i, 1);
while (j > 0 && LessThan(node, heap[j]))
{
heap[i] = heap[j]; // shift parents down
i = j;
j = URShift(j, 1);
}
heap[i] = node; // install saved node
}
private void DownHeap()
{
int i = 1;
T node = heap[i]; // save top node
int j = i << 1; // find smaller child
int k = j + 1;
if (k <= size && LessThan(heap[k], heap[j]))
{
j = k;
}
while (j <= size && LessThan(heap[j], node))
{
heap[i] = heap[j]; // shift up child
i = j;
j = i << 1;
k = j + 1;
if (k <= size && LessThan(heap[k], heap[j]))
{
j = k;
}
}
heap[i] = node; // install saved node
}
public int URShift(int number, int bits)
{
return (int)(((uint)number) >> bits);
}
}
}
#endregion
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment