Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Replacement for EF Core .Contains, that avoids SQL Server plan cache pollution
public static class IQueryableExtensions
{
public static IQueryable<TQuery> In<TKey, TQuery>(
this IQueryable<TQuery> queryable,
IEnumerable<TKey> values,
Expression<Func<TQuery, TKey>> keySelector)
{
if (values == null)
{
throw new ArgumentNullException(nameof(values));
}
if (keySelector == null)
{
throw new ArgumentNullException(nameof(keySelector));
}
if (!values.Any())
{
return queryable.Take(0);
}
var distinctValues = Bucketize(values);
if (distinctValues.Length > 2048)
{
throw new ArgumentException("Too many parameters for SQL Server, reduce the number of parameters", nameof(keySelector));
}
var predicates = distinctValues
.Select(v =>
{
// Create an expression that captures the variable so EF can turn this into a parameterized SQL query
Expression<Func<TKey>> valueAsExpression = () => v;
return Expression.Equal(keySelector.Body, valueAsExpression.Body);
})
.ToList();
while (predicates.Count > 1)
{
predicates = PairWise(predicates).Select(p => Expression.OrElse(p.Item1, p.Item2)).ToList();
}
var body = predicates.Single();
var clause = Expression.Lambda<Func<TQuery, bool>>(body, keySelector.Parameters);
return queryable.Where(clause);
}
/// <summary>
/// Break a list of items tuples of pairs.
/// </summary>
private static IEnumerable<(T, T)> PairWise<T>(this IEnumerable<T> source)
{
var sourceEnumerator = source.GetEnumerator();
while (sourceEnumerator.MoveNext())
{
var a = sourceEnumerator.Current;
sourceEnumerator.MoveNext();
var b = sourceEnumerator.Current;
yield return (a, b);
}
}
private static TKey[] Bucketize<TKey>(IEnumerable<TKey> values)
{
var distinctValueList = values.Distinct().ToList();
// Calculate bucket size as 1,2,4,8,16,32,64,...
var bucket = 1;
while (distinctValueList.Count > bucket)
{
bucket *= 2;
}
// Fill all slots.
var lastValue = distinctValueList.Last();
for (var index = distinctValueList.Count; index < bucket; index++)
{
distinctValueList.Add(lastValue);
}
var distinctValues = distinctValueList.ToArray();
return distinctValues;
}
}
@RichardD2
Copy link

RichardD2 commented Aug 23, 2021

Couldn't you get rid of the PairWise function, and replace the loop:

while (predicates.Count > 1)
{
    predicates = PairWise(predicates).Select(p => Expression.OrElse(p.Item1, p.Item2)).ToList();
}

var body = predicates.Single();

with:

var body = predicates.Aggregate(Expression.OrElse);

@RichardD2
Copy link

RichardD2 commented Aug 24, 2021

Or, if you're worried about the deep nesting of conditions:

private readonly struct HalfList<T>
{
    private readonly IReadOnlyList<T> _list;
    private readonly int _startIndex;

    private HalfList(IReadOnlyList<T> list, int startIndex, int count)
    {
        _list = list ?? throw new ArgumentNullException(nameof(list));
        _startIndex = startIndex;
        Count = count;
    }

    public HalfList(IReadOnlyList<T> list) : this(list, 0, list.Count)
    {
    }

    public int Count { get; }
    
    public T Item => Count == 1 ? _list[_startIndex] : throw new InvalidOperationException();

    public (HalfList<T> left, HalfList<T> right) Split()
    {
        if (Count < 2) throw new InvalidOperationException();

        int pivot = Count >> 1;
        var left = new HalfList<T>(_list, _startIndex, pivot);
        var right = new HalfList<T>(_list, _startIndex + pivot, Count - pivot);
        return (left, right);
    }
}

private static Expression CombinePredicates(IReadOnlyList<Expression> parts, Func<Expression, Expression, Expression> fn)
{
    if (parts.Count == 0) throw new ArgumentException("At least one part is required.", nameof(parts));
    if (parts.Count == 1) return parts[0];
    
    var segment = new HalfList<Expression>(parts);
    return CombineCore(segment.Split(), fn);

    static Expression CombineCore((HalfList<Expression> left, HalfList<Expression> right) x, Func<Expression, Expression, Expression> fn)
    {
        var left = x.left.Count == 1 ? x.left.Item : CombineCore(x.left.Split(), fn);
        var right = x.right.Count == 1 ? x.right.Item : CombineCore(x.right.Split(), fn);
        return fn(left, right);
    }
}
var body = CombinePredicates(predicates, Expression.OrElse);

@joelmandell
Copy link

joelmandell commented Dec 3, 2021

I had to use the IQueryableExtensions.In(), to support older SQL Server versions.
And had an issue when using a library that parses the table name from generated query (I am using multiple Interceptors). The issue is when IEnumerable<TKey> values is empty.

It has the check if(!values.Any()) and returns queryable.Take(0).
When that call is used EF Core generates an subquery, and the library that I use (EFCoreSecondLevelCacheInterceptor) fails trying to fetch table name from query.

I resorted to return queryable.Where(x => true) instead. And that seems to work after that. Posting this in case someone else has this problem.

Updated code of that extension method, according to that change:

public static IQueryable<TQuery> In<TKey, TQuery>(
            this IQueryable<TQuery> queryable,
            IEnumerable<TKey> values,
            Expression<Func<TQuery, TKey>> keySelector)
        {
            if (values == null)
            {
                throw new ArgumentNullException(nameof(values));
            }

            if (keySelector == null)
            {
                throw new ArgumentNullException(nameof(keySelector));
            }

            if (!values.Any())
            {
                //.Where instead of .Take(0), cause that seem to produce "funky" SQL.
                return queryable.Where(x => true);
            }

            var distinctValues = Bucketize(values);

            if (distinctValues.Length > 2048)
            {
                throw new ArgumentException("Too many parameters for SQL Server, reduce the number of parameters", nameof(keySelector));
            }

            var predicates = distinctValues
                .Select(v =>
                {
                    // Create an expression that captures the variable so EF can turn this into a parameterized SQL query
                    Expression<Func<TKey>> valueAsExpression = () => v;
                    return Expression.Equal(keySelector.Body, valueAsExpression.Body);
                })
                .ToList();

            while (predicates.Count > 1)
            {
                predicates = PairWise(predicates).Select(p => Expression.OrElse(p.Item1, p.Item2)).ToList();
            }

            var body = predicates.Single();

            var clause = Expression.Lambda<Func<TQuery, bool>>(body, keySelector.Parameters);

            return queryable.Where(clause);
        }

@yv989c
Copy link

yv989c commented Dec 29, 2021

@ErikEJ I believe you may find my project useful. It solves this problem in a flexible way. I have been using a similar strategy in my work with acceptable results, so I put some effort and made a generic version of it for everyone to use. Please, feel free to code review it if you have time. Ideas are welcome.

@yv989c
Copy link

yv989c commented Dec 29, 2021

Hey @joelmandell. Please take a look ☝️ too 🙂.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment