Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Replacement for EF Core .Contains, that avoids SQL Server plan cache pollution
public static class IQueryableExtensions
{
public static IQueryable<TQuery> In<TKey, TQuery>(
this IQueryable<TQuery> queryable,
IEnumerable<TKey> values,
Expression<Func<TQuery, TKey>> keySelector)
{
if (values == null)
{
throw new ArgumentNullException(nameof(values));
}
if (keySelector == null)
{
throw new ArgumentNullException(nameof(keySelector));
}
if (!values.Any())
{
return queryable.Take(0);
}
var distinctValues = Bucketize(values);
if (distinctValues.Length > 2048)
{
throw new ArgumentException("Too many parameters for SQL Server, reduce the number of parameters", nameof(keySelector));
}
var predicates = distinctValues
.Select(v =>
{
// Create an expression that captures the variable so EF can turn this into a parameterized SQL query
Expression<Func<TKey>> valueAsExpression = () => v;
return Expression.Equal(keySelector.Body, valueAsExpression.Body);
})
.ToList();
while (predicates.Count > 1)
{
predicates = PairWise(predicates).Select(p => Expression.OrElse(p.Item1, p.Item2)).ToList();
}
var body = predicates.Single();
var clause = Expression.Lambda<Func<TQuery, bool>>(body, keySelector.Parameters);
return queryable.Where(clause);
}
/// <summary>
/// Break a list of items tuples of pairs.
/// </summary>
private static IEnumerable<(T, T)> PairWise<T>(this IEnumerable<T> source)
{
var sourceEnumerator = source.GetEnumerator();
while (sourceEnumerator.MoveNext())
{
var a = sourceEnumerator.Current;
sourceEnumerator.MoveNext();
var b = sourceEnumerator.Current;
yield return (a, b);
}
}
private static TKey[] Bucketize<TKey>(IEnumerable<TKey> values)
{
var distinctValueList = values.Distinct().ToList();
// Calculate bucket size as 1,2,4,8,16,32,64,...
var bucket = 1;
while (distinctValueList.Count > bucket)
{
bucket *= 2;
}
// Fill all slots.
var lastValue = distinctValueList.Last();
for (var index = distinctValueList.Count; index < bucket; index++)
{
distinctValueList.Add(lastValue);
}
var distinctValues = distinctValueList.ToArray();
return distinctValues;
}
}
@moredatapls
Copy link

moredatapls commented Jun 2, 2020

Great work! Could you specify the license of this piece of code?

@ErikEJ
Copy link
Author

ErikEJ commented Jun 2, 2020

There is no license, and no license restrictions - use as you see fit!

@alb-xss
Copy link

alb-xss commented Dec 27, 2020

This is absolutely brilliant, the buckets idea is genius. Thank you for sharing your solution

@blogcraft
Copy link

blogcraft commented Feb 17, 2021

I can't see any difference. I'm using .Net 5.

@ErikEJ
Copy link
Author

ErikEJ commented Feb 17, 2021

@OculiViridi
Copy link

OculiViridi commented Jun 4, 2021

@ErikEJ Great piece of code! 😄

Is it possible to add this extension method in EF.Functions package? Here you can see also the Microsoft documentation.

I think it would be even more nice if it were possible to implement it as the EF.Functions.Like() method, to obtain ad usage like this:

var result = dbContext.MyEntities
    .Where(e => EF.Functions.In(values => e.Id))
    .ToList();

@h0wXD
Copy link

h0wXD commented Aug 13, 2021

I'm not sure what you're trying to achieve with the bucketize, but I think it can be much simpler, and only inlines once it exceeds the parameters:

// SQL Server max parameter count is 2100, keep space
private const int MaxSqlParameterCount = 2048;

// https://github.com/dotnet/efcore/issues/13617
public static IQueryable<TQuery> WhereIn<TKey, TQuery>(
    this IQueryable<TQuery> queryable,
    IEnumerable<TKey> values,
    Expression<Func<TQuery, TKey>> keySelector)
{
    if (values == null) throw new ArgumentNullException(nameof(values));
    if (keySelector == null) throw new ArgumentNullException(nameof(keySelector));
    if (!values.Any()) return queryable.Take(0);

    var distinctValues = values.Distinct().ToList();
    Expression expr = null;
    Expression<Func<TKey>> valueAsExpression = null;

    for (var i = 0; i < distinctValues.Count; i++)
    {
        var id = distinctValues[i];

        valueAsExpression = () => id;
        expr = expr == null
            ? Expression.Equal(keySelector.Body, valueAsExpression.Body)
            : i < MaxSqlParameterCount
                ? Expression.OrElse(expr, Expression.Equal(keySelector.Body, valueAsExpression.Body))
                : Expression.OrElse(expr, Expression.Equal(keySelector.Body, Expression.Constant(id)));
    }

    Bucketize(distinctValues.Count, () =>
        expr = Expression.OrElse(expr, Expression.Equal(keySelector.Body, valueAsExpression.Body)));

    var lambda = Expression.Lambda<Func<TQuery, bool>>(expr, keySelector.Parameters);

    return queryable.Where(lambda);
}

private static void Bucketize(int parameterCount, Action padExpression)
{
    if (parameterCount >= MaxSqlParameterCount)
    {
        return;
    }

    var bucket = 1;
    while (parameterCount > bucket)
    {
        bucket *= 2;
    }

    for (var i = parameterCount; i < bucket; i++)
    {
        padExpression();
    }
}

Updated to bucketize and limit the number of plans

@ErikEJ
Copy link
Author

ErikEJ commented Aug 13, 2021

@h0wXD Buckets are important in order to limit the number of plans, see my blog post here: https://erikej.github.io/efcore/sqlserver/2020/03/30/ef-core-cache-pollution.html

@RichardD2
Copy link

RichardD2 commented Aug 23, 2021

Couldn't you get rid of the PairWise function, and replace the loop:

while (predicates.Count > 1)
{
    predicates = PairWise(predicates).Select(p => Expression.OrElse(p.Item1, p.Item2)).ToList();
}

var body = predicates.Single();

with:

var body = predicates.Aggregate(Expression.OrElse);

@RichardD2
Copy link

RichardD2 commented Aug 24, 2021

Or, if you're worried about the deep nesting of conditions:

private readonly struct HalfList<T>
{
    private readonly IReadOnlyList<T> _list;
    private readonly int _startIndex;

    private HalfList(IReadOnlyList<T> list, int startIndex, int count)
    {
        _list = list ?? throw new ArgumentNullException(nameof(list));
        _startIndex = startIndex;
        Count = count;
    }

    public HalfList(IReadOnlyList<T> list) : this(list, 0, list.Count)
    {
    }

    public int Count { get; }
    
    public T Item => Count == 1 ? _list[_startIndex] : throw new InvalidOperationException();

    public (HalfList<T> left, HalfList<T> right) Split()
    {
        if (Count < 2) throw new InvalidOperationException();

        int pivot = Count >> 1;
        var left = new HalfList<T>(_list, _startIndex, pivot);
        var right = new HalfList<T>(_list, _startIndex + pivot, Count - pivot);
        return (left, right);
    }
}

private static Expression CombinePredicates(IReadOnlyList<Expression> parts, Func<Expression, Expression, Expression> fn)
{
    if (parts.Count == 0) throw new ArgumentException("At least one part is required.", nameof(parts));
    if (parts.Count == 1) return parts[0];
    
    var segment = new HalfList<Expression>(parts);
    return CombineCore(segment.Split(), fn);

    static Expression CombineCore((HalfList<Expression> left, HalfList<Expression> right) x, Func<Expression, Expression, Expression> fn)
    {
        var left = x.left.Count == 1 ? x.left.Item : CombineCore(x.left.Split(), fn);
        var right = x.right.Count == 1 ? x.right.Item : CombineCore(x.right.Split(), fn);
        return fn(left, right);
    }
}
var body = CombinePredicates(predicates, Expression.OrElse);

@joelmandell
Copy link

joelmandell commented Dec 3, 2021

I had to use the IQueryableExtensions.In(), to support older SQL Server versions.
And had an issue when using a library that parses the table name from generated query (I am using multiple Interceptors). The issue is when IEnumerable<TKey> values is empty.

It has the check if(!values.Any()) and returns queryable.Take(0).
When that call is used EF Core generates an subquery, and the library that I use (EFCoreSecondLevelCacheInterceptor) fails trying to fetch table name from query.

I resorted to return queryable.Where(x => true) instead. And that seems to work after that. Posting this in case someone else has this problem.

Updated code of that extension method, according to that change:

public static IQueryable<TQuery> In<TKey, TQuery>(
            this IQueryable<TQuery> queryable,
            IEnumerable<TKey> values,
            Expression<Func<TQuery, TKey>> keySelector)
        {
            if (values == null)
            {
                throw new ArgumentNullException(nameof(values));
            }

            if (keySelector == null)
            {
                throw new ArgumentNullException(nameof(keySelector));
            }

            if (!values.Any())
            {
                //.Where instead of .Take(0), cause that seem to produce "funky" SQL.
                return queryable.Where(x => true);
            }

            var distinctValues = Bucketize(values);

            if (distinctValues.Length > 2048)
            {
                throw new ArgumentException("Too many parameters for SQL Server, reduce the number of parameters", nameof(keySelector));
            }

            var predicates = distinctValues
                .Select(v =>
                {
                    // Create an expression that captures the variable so EF can turn this into a parameterized SQL query
                    Expression<Func<TKey>> valueAsExpression = () => v;
                    return Expression.Equal(keySelector.Body, valueAsExpression.Body);
                })
                .ToList();

            while (predicates.Count > 1)
            {
                predicates = PairWise(predicates).Select(p => Expression.OrElse(p.Item1, p.Item2)).ToList();
            }

            var body = predicates.Single();

            var clause = Expression.Lambda<Func<TQuery, bool>>(body, keySelector.Parameters);

            return queryable.Where(clause);
        }

@yv989c
Copy link

yv989c commented Dec 29, 2021

@ErikEJ I believe you may find my project useful. It solves this problem in a flexible way. I have been using a similar strategy in my work with acceptable results, so I put some effort and made a generic version of it for everyone to use. Please, feel free to code review it if you have time. Ideas are welcome.

@yv989c
Copy link

yv989c commented Dec 29, 2021

Hey @joelmandell. Please take a look ☝️ too 🙂.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment