我认为其中大多数都存在问题,包括已接受的答案,因为由于服务器往返次数过多,数据返回过多或客户端执行过多,因此它们无法与Linq一起通过IQueryable很好地工作。
对于IEnumerable,我不喜欢Sehe的答案或类似的答案,因为它使用了过多的内存(一个简单的10000000两个列表测试在我的32GB机器上耗尽了Linqpad的内存)。
另外,大多数其他人实际上并没有实现适当的完全外部联接,因为他们使用的是带有右联接的联合,而不是带有右反半联接的Concat,这不仅从结果中消除了重复的内部联接行,而且左侧或右侧数据中最初存在的任何适当的重复项。
因此,以下是我的扩展程序,它们处理所有这些问题,生成SQL并在LINQ中直接实现到SQL的联接,在服务器上执行,并且比Enumerables上的其他程序更快,内存更少:
public static class Ext {
public static IEnumerable<TResult> LeftOuterJoin<TLeft, TRight, TKey, TResult>(
this IEnumerable<TLeft> leftItems,
IEnumerable<TRight> rightItems,
Func<TLeft, TKey> leftKeySelector,
Func<TRight, TKey> rightKeySelector,
Func<TLeft, TRight, TResult> resultSelector) {
return from left in leftItems
join right in rightItems on leftKeySelector(left) equals rightKeySelector(right) into temp
from right in temp.DefaultIfEmpty()
select resultSelector(left, right);
}
public static IEnumerable<TResult> RightOuterJoin<TLeft, TRight, TKey, TResult>(
this IEnumerable<TLeft> leftItems,
IEnumerable<TRight> rightItems,
Func<TLeft, TKey> leftKeySelector,
Func<TRight, TKey> rightKeySelector,
Func<TLeft, TRight, TResult> resultSelector) {
return from right in rightItems
join left in leftItems on rightKeySelector(right) equals leftKeySelector(left) into temp
from left in temp.DefaultIfEmpty()
select resultSelector(left, right);
}
public static IEnumerable<TResult> FullOuterJoinDistinct<TLeft, TRight, TKey, TResult>(
this IEnumerable<TLeft> leftItems,
IEnumerable<TRight> rightItems,
Func<TLeft, TKey> leftKeySelector,
Func<TRight, TKey> rightKeySelector,
Func<TLeft, TRight, TResult> resultSelector) {
return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Union(leftItems.RightOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
}
public static IEnumerable<TResult> RightAntiSemiJoin<TLeft, TRight, TKey, TResult>(
this IEnumerable<TLeft> leftItems,
IEnumerable<TRight> rightItems,
Func<TLeft, TKey> leftKeySelector,
Func<TRight, TKey> rightKeySelector,
Func<TLeft, TRight, TResult> resultSelector) {
var hashLK = new HashSet<TKey>(from l in leftItems select leftKeySelector(l));
return rightItems.Where(r => !hashLK.Contains(rightKeySelector(r))).Select(r => resultSelector(default(TLeft),r));
}
public static IEnumerable<TResult> FullOuterJoin<TLeft, TRight, TKey, TResult>(
this IEnumerable<TLeft> leftItems,
IEnumerable<TRight> rightItems,
Func<TLeft, TKey> leftKeySelector,
Func<TRight, TKey> rightKeySelector,
Func<TLeft, TRight, TResult> resultSelector) where TLeft : class {
return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Concat(leftItems.RightAntiSemiJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
}
private static Expression<Func<TP, TC, TResult>> CastSMBody<TP, TC, TResult>(LambdaExpression ex, TP unusedP, TC unusedC, TResult unusedRes) => (Expression<Func<TP, TC, TResult>>)ex;
public static IQueryable<TResult> LeftOuterJoin<TLeft, TRight, TKey, TResult>(
this IQueryable<TLeft> leftItems,
IQueryable<TRight> rightItems,
Expression<Func<TLeft, TKey>> leftKeySelector,
Expression<Func<TRight, TKey>> rightKeySelector,
Expression<Func<TLeft, TRight, TResult>> resultSelector) {
var sampleAnonLR = new { left = default(TLeft), rightg = default(IEnumerable<TRight>) };
var parmP = Expression.Parameter(sampleAnonLR.GetType(), "p");
var parmC = Expression.Parameter(typeof(TRight), "c");
var argLeft = Expression.PropertyOrField(parmP, "left");
var newleftrs = CastSMBody(Expression.Lambda(Expression.Invoke(resultSelector, argLeft, parmC), parmP, parmC), sampleAnonLR, default(TRight), default(TResult));
return leftItems.AsQueryable().GroupJoin(rightItems, leftKeySelector, rightKeySelector, (left, rightg) => new { left, rightg }).SelectMany(r => r.rightg.DefaultIfEmpty(), newleftrs);
}
public static IQueryable<TResult> RightOuterJoin<TLeft, TRight, TKey, TResult>(
this IQueryable<TLeft> leftItems,
IQueryable<TRight> rightItems,
Expression<Func<TLeft, TKey>> leftKeySelector,
Expression<Func<TRight, TKey>> rightKeySelector,
Expression<Func<TLeft, TRight, TResult>> resultSelector) {
var sampleAnonLR = new { leftg = default(IEnumerable<TLeft>), right = default(TRight) };
var parmP = Expression.Parameter(sampleAnonLR.GetType(), "p");
var parmC = Expression.Parameter(typeof(TLeft), "c");
var argRight = Expression.PropertyOrField(parmP, "right");
var newrightrs = CastSMBody(Expression.Lambda(Expression.Invoke(resultSelector, parmC, argRight), parmP, parmC), sampleAnonLR, default(TLeft), default(TResult));
return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right }).SelectMany(l => l.leftg.DefaultIfEmpty(), newrightrs);
}
public static IQueryable<TResult> FullOuterJoinDistinct<TLeft, TRight, TKey, TResult>(
this IQueryable<TLeft> leftItems,
IQueryable<TRight> rightItems,
Expression<Func<TLeft, TKey>> leftKeySelector,
Expression<Func<TRight, TKey>> rightKeySelector,
Expression<Func<TLeft, TRight, TResult>> resultSelector) {
return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Union(leftItems.RightOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
}
private static Expression<Func<TP, TResult>> CastSBody<TP, TResult>(LambdaExpression ex, TP unusedP, TResult unusedRes) => (Expression<Func<TP, TResult>>)ex;
public static IQueryable<TResult> RightAntiSemiJoin<TLeft, TRight, TKey, TResult>(
this IQueryable<TLeft> leftItems,
IQueryable<TRight> rightItems,
Expression<Func<TLeft, TKey>> leftKeySelector,
Expression<Func<TRight, TKey>> rightKeySelector,
Expression<Func<TLeft, TRight, TResult>> resultSelector) {
var sampleAnonLgR = new { leftg = default(IEnumerable<TLeft>), right = default(TRight) };
var parmLgR = Expression.Parameter(sampleAnonLgR.GetType(), "lgr");
var argLeft = Expression.Constant(default(TLeft), typeof(TLeft));
var argRight = Expression.PropertyOrField(parmLgR, "right");
var newrightrs = CastSBody(Expression.Lambda(Expression.Invoke(resultSelector, argLeft, argRight), parmLgR), sampleAnonLgR, default(TResult));
return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right }).Where(lgr => !lgr.leftg.Any()).Select(newrightrs);
}
public static IQueryable<TResult> FullOuterJoin<TLeft, TRight, TKey, TResult>(
this IQueryable<TLeft> leftItems,
IQueryable<TRight> rightItems,
Expression<Func<TLeft, TKey>> leftKeySelector,
Expression<Func<TRight, TKey>> rightKeySelector,
Expression<Func<TLeft, TRight, TResult>> resultSelector) {
return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Concat(leftItems.RightAntiSemiJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
}
}
正确的反半联接之间的差异主要是通过Linq to Objects或源代码来解决,但在最终回答中在服务器(SQL)方面有所不同,从而消除了不必要的问题 JOIN
。
的手编码 Expression
Expression<Func<>>
使用LinqKit可以改进用于将a 合并为lambda,但是如果语言/编译器为此添加了一些帮助,那就很好了。该FullOuterJoinDistinct
和RightOuterJoin
功能出于完整性考虑,但我没有重新实现FullOuterGroupJoin
呢。
我写了另一个版本的完全外部联接IEnumerable
键是可排序的情况,至少在较小的集合上,它比左外部联接与右反半联接相结合的速度快约50%。它只排序一次,然后遍历每个集合。
我还为与EF兼容的版本添加了另一个答案,方法是Invoke
使用自定义扩展替换。