Last active
January 21, 2024 07:30
-
-
Save Enna1/b2d4245feec090ff793e371d00016afb to your computer and use it in GitHub Desktop.
x264 vectorization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; *** IR Dump After LoopVectorizePass on get_ref *** | |
; Function Attrs: nofree norecurse nosync nounwind memory(readwrite, inaccessiblemem: none) uwtable | |
define internal ptr @get_ref(ptr noundef %dst, ptr nocapture noundef %i_dst_stride, ptr nocapture noundef readonly %src, i32 noundef %i_src_stride, i32 noundef %mvx, i32 noundef %mvy, i32 noundef %i_width, i32 noundef %i_height, ptr nocapture noundef readonly %weight) #0 { | |
entry: | |
%and = and i32 %mvy, 3 | |
%shl = shl nuw nsw i32 %and, 2 | |
%and1 = and i32 %mvx, 3 | |
%add = or disjoint i32 %shl, %and1 | |
%shr = ashr i32 %mvy, 2 | |
%mul = mul i32 %shr, %i_src_stride | |
%shr2 = ashr i32 %mvx, 2 | |
%add3 = add i32 %mul, %shr2 | |
%idxprom = zext nneg i32 %add to i64 | |
%arrayidx = getelementptr inbounds [16 x i8], ptr @hpel_ref0, i64 0, i64 %idxprom | |
%0 = load i8, ptr %arrayidx, align 1, !tbaa !15 | |
%idxprom4 = zext i8 %0 to i64 | |
%arrayidx5 = getelementptr inbounds ptr, ptr %src, i64 %idxprom4 | |
%1 = load ptr, ptr %arrayidx5, align 8, !tbaa !9 | |
%idx.ext = sext i32 %add3 to i64 | |
%add.ptr = getelementptr i8, ptr %1, i64 %idx.ext | |
%cmp = icmp eq i32 %and, 3 | |
%mul7 = select i1 %cmp, i32 %i_src_stride, i32 0 | |
%idx.ext8 = sext i32 %mul7 to i64 | |
%add.ptr9 = getelementptr i8, ptr %add.ptr, i64 %idx.ext8 | |
%and10 = and i32 %add, 5 | |
%tobool.not = icmp eq i32 %and10, 0 | |
br i1 %tobool.not, label %if.else, label %if.then | |
if.then: ; preds = %entry | |
%arrayidx12 = getelementptr inbounds [16 x i8], ptr @hpel_ref1, i64 0, i64 %idxprom | |
%2 = load i8, ptr %arrayidx12, align 1, !tbaa !15 | |
%idxprom13 = zext i8 %2 to i64 | |
%arrayidx14 = getelementptr inbounds ptr, ptr %src, i64 %idxprom13 | |
%3 = load ptr, ptr %arrayidx14, align 8, !tbaa !9 | |
%add.ptr16 = getelementptr i8, ptr %3, i64 %idx.ext | |
%cmp18 = icmp eq i32 %and1, 3 | |
%idx.ext20 = zext i1 %cmp18 to i64 | |
%add.ptr21 = getelementptr i8, ptr %add.ptr16, i64 %idx.ext20 | |
%cmp29.i = icmp sgt i32 %i_height, 0 | |
br i1 %cmp29.i, label %for.cond1.preheader.lr.ph.i, label %pixel_avg.exit | |
for.cond1.preheader.lr.ph.i: ; preds = %if.then | |
%4 = load i32, ptr %i_dst_stride, align 4, !tbaa !13 | |
%cmp227.i = icmp sgt i32 %i_width, 0 | |
%idx.ext.i = sext i32 %4 to i64 | |
%idx.ext12.i = sext i32 %i_src_stride to i64 | |
br i1 %cmp227.i, label %for.cond1.preheader.us.preheader.i, label %pixel_avg.exit | |
for.cond1.preheader.us.preheader.i: ; preds = %for.cond1.preheader.lr.ph.i | |
%wide.trip.count.i = zext nneg i32 %i_width to i64 | |
%5 = add i32 %i_height, -1 | |
%6 = zext i32 %5 to i64 | |
%7 = mul i64 %idx.ext.i, %6 | |
%8 = add i64 %7, %wide.trip.count.i | |
%scevgep = getelementptr i8, ptr %dst, i64 %8 | |
%9 = mul i64 %idx.ext12.i, %6 | |
%10 = add i64 %9, %idx.ext8 | |
%11 = add i64 %10, %idx.ext | |
%12 = add i64 %11, %wide.trip.count.i | |
%scevgep154 = getelementptr i8, ptr %1, i64 %12 | |
%13 = add i64 %9, %idx.ext | |
%14 = add i64 %13, %idx.ext20 | |
%15 = add i64 %14, %wide.trip.count.i | |
%scevgep155 = getelementptr i8, ptr %3, i64 %15 | |
br label %iter.check | |
iter.check: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us.i, %for.cond1.preheader.us.preheader.i | |
%y.033.us.i = phi i32 [ %inc17.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ 0, %for.cond1.preheader.us.preheader.i ] | |
%dst.addr.032.us.i = phi ptr [ %add.ptr.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ %dst, %for.cond1.preheader.us.preheader.i ] | |
%src1.addr.031.us.i = phi ptr [ %add.ptr13.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ %add.ptr9, %for.cond1.preheader.us.preheader.i ] | |
%src2.addr.030.us.i = phi ptr [ %add.ptr15.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ %add.ptr21, %for.cond1.preheader.us.preheader.i ] | |
%min.iters.check = icmp ult i64 %wide.trip.count.i, 8 | |
br i1 %min.iters.check, label %vec.epilog.scalar.ph, label %vector.memcheck | |
vector.memcheck: ; preds = %iter.check | |
%bound0 = icmp ult ptr %dst, %scevgep154 | |
%bound1 = icmp ult ptr %add.ptr9, %scevgep | |
%found.conflict = and i1 %bound0, %bound1 | |
%stride.check = icmp slt i64 %idx.ext.i, 0 | |
%16 = or i1 %found.conflict, %stride.check | |
%stride.check156 = icmp slt i64 %idx.ext12.i, 0 | |
%17 = or i1 %16, %stride.check156 | |
%bound0157 = icmp ult ptr %dst, %scevgep155 | |
%bound1158 = icmp ult ptr %add.ptr21, %scevgep | |
%found.conflict159 = and i1 %bound0157, %bound1158 | |
%stride.check160 = icmp slt i64 %idx.ext.i, 0 | |
%18 = or i1 %found.conflict159, %stride.check160 | |
%stride.check161 = icmp slt i64 %idx.ext12.i, 0 | |
%19 = or i1 %18, %stride.check161 | |
%conflict.rdx = or i1 %17, %19 | |
br i1 %conflict.rdx, label %vec.epilog.scalar.ph, label %vector.main.loop.iter.check | |
vector.main.loop.iter.check: ; preds = %vector.memcheck | |
%min.iters.check162 = icmp ult i64 %wide.trip.count.i, 32 | |
br i1 %min.iters.check162, label %vec.epilog.ph, label %vector.ph | |
vector.ph: ; preds = %vector.main.loop.iter.check | |
%n.mod.vf = urem i64 %wide.trip.count.i, 32 | |
%n.vec = sub i64 %wide.trip.count.i, %n.mod.vf | |
br label %vector.body | |
vector.body: ; preds = %vector.body, %vector.ph | |
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | |
%20 = add i64 %index, 0 | |
%21 = add i64 %index, 16 | |
%22 = getelementptr inbounds i8, ptr %src1.addr.031.us.i, i64 %20 | |
%23 = getelementptr inbounds i8, ptr %src1.addr.031.us.i, i64 %21 | |
%24 = getelementptr inbounds i8, ptr %22, i32 0 | |
%25 = getelementptr inbounds i8, ptr %22, i32 16 | |
%wide.load = load <16 x i8>, ptr %24, align 1, !tbaa !15, !alias.scope !144 | |
%wide.load163 = load <16 x i8>, ptr %25, align 1, !tbaa !15, !alias.scope !144 | |
%26 = zext <16 x i8> %wide.load to <16 x i16> | |
%27 = zext <16 x i8> %wide.load163 to <16 x i16> | |
%28 = getelementptr inbounds i8, ptr %src2.addr.030.us.i, i64 %20 | |
%29 = getelementptr inbounds i8, ptr %src2.addr.030.us.i, i64 %21 | |
%30 = getelementptr inbounds i8, ptr %28, i32 0 | |
%31 = getelementptr inbounds i8, ptr %28, i32 16 | |
%wide.load164 = load <16 x i8>, ptr %30, align 1, !tbaa !15, !alias.scope !147 | |
%wide.load165 = load <16 x i8>, ptr %31, align 1, !tbaa !15, !alias.scope !147 | |
%32 = zext <16 x i8> %wide.load164 to <16 x i16> | |
%33 = zext <16 x i8> %wide.load165 to <16 x i16> | |
%34 = add nuw nsw <16 x i16> %26, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> | |
%35 = add nuw nsw <16 x i16> %27, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> | |
%36 = add nuw nsw <16 x i16> %34, %32 | |
%37 = add nuw nsw <16 x i16> %35, %33 | |
%38 = lshr <16 x i16> %36, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> | |
%39 = lshr <16 x i16> %37, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> | |
%40 = trunc <16 x i16> %38 to <16 x i8> | |
%41 = trunc <16 x i16> %39 to <16 x i8> | |
%42 = getelementptr inbounds i8, ptr %dst.addr.032.us.i, i64 %20 | |
%43 = getelementptr inbounds i8, ptr %dst.addr.032.us.i, i64 %21 | |
%44 = getelementptr inbounds i8, ptr %42, i32 0 | |
%45 = getelementptr inbounds i8, ptr %42, i32 16 | |
store <16 x i8> %40, ptr %44, align 1, !tbaa !15, !alias.scope !149, !noalias !151 | |
store <16 x i8> %41, ptr %45, align 1, !tbaa !15, !alias.scope !149, !noalias !151 | |
%index.next = add nuw i64 %index, 32 | |
%46 = icmp eq i64 %index.next, %n.vec | |
br i1 %46, label %middle.block, label %vector.body, !llvm.loop !152 | |
middle.block: ; preds = %vector.body | |
%cmp.n = icmp eq i64 %wide.trip.count.i, %n.vec | |
br i1 %cmp.n, label %for.cond1.for.cond.cleanup3_crit_edge.us.i, label %vec.epilog.iter.check | |
vec.epilog.iter.check: ; preds = %middle.block | |
%n.vec.remaining = sub i64 %wide.trip.count.i, %n.vec | |
%min.epilog.iters.check = icmp ult i64 %n.vec.remaining, 8 | |
br i1 %min.epilog.iters.check, label %vec.epilog.scalar.ph, label %vec.epilog.ph | |
vec.epilog.ph: ; preds = %vector.main.loop.iter.check, %vec.epilog.iter.check | |
%vec.epilog.resume.val = phi i64 [ %n.vec, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ] | |
%n.mod.vf166 = urem i64 %wide.trip.count.i, 8 | |
%n.vec167 = sub i64 %wide.trip.count.i, %n.mod.vf166 | |
br label %vec.epilog.vector.body | |
vec.epilog.vector.body: ; preds = %vec.epilog.vector.body, %vec.epilog.ph | |
%index169 = phi i64 [ %vec.epilog.resume.val, %vec.epilog.ph ], [ %index.next172, %vec.epilog.vector.body ] | |
%47 = add i64 %index169, 0 | |
%48 = getelementptr inbounds i8, ptr %src1.addr.031.us.i, i64 %47 | |
%49 = getelementptr inbounds i8, ptr %48, i32 0 | |
%wide.load170 = load <8 x i8>, ptr %49, align 1, !tbaa !15, !alias.scope !153 | |
%50 = zext <8 x i8> %wide.load170 to <8 x i16> | |
%51 = getelementptr inbounds i8, ptr %src2.addr.030.us.i, i64 %47 | |
%52 = getelementptr inbounds i8, ptr %51, i32 0 | |
%wide.load171 = load <8 x i8>, ptr %52, align 1, !tbaa !15, !alias.scope !156 | |
%53 = zext <8 x i8> %wide.load171 to <8 x i16> | |
%54 = add nuw nsw <8 x i16> %50, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> | |
%55 = add nuw nsw <8 x i16> %54, %53 | |
%56 = lshr <8 x i16> %55, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> | |
%57 = trunc <8 x i16> %56 to <8 x i8> | |
%58 = getelementptr inbounds i8, ptr %dst.addr.032.us.i, i64 %47 | |
%59 = getelementptr inbounds i8, ptr %58, i32 0 | |
store <8 x i8> %57, ptr %59, align 1, !tbaa !15, !alias.scope !158, !noalias !160 | |
%index.next172 = add nuw i64 %index169, 8 | |
%60 = icmp eq i64 %index.next172, %n.vec167 | |
br i1 %60, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !llvm.loop !161 | |
vec.epilog.middle.block: ; preds = %vec.epilog.vector.body | |
%cmp.n168 = icmp eq i64 %wide.trip.count.i, %n.vec167 | |
br i1 %cmp.n168, label %for.cond1.for.cond.cleanup3_crit_edge.us.i, label %vec.epilog.scalar.ph | |
vec.epilog.scalar.ph: ; preds = %vector.memcheck, %iter.check, %vec.epilog.iter.check, %vec.epilog.middle.block | |
%bc.resume.val = phi i64 [ %n.vec167, %vec.epilog.middle.block ], [ %n.vec, %vec.epilog.iter.check ], [ 0, %vector.memcheck ], [ 0, %iter.check ] | |
br label %for.body4.us.i | |
for.body4.us.i: ; preds = %for.body4.us.i, %vec.epilog.scalar.ph | |
%indvars.iv.i = phi i64 [ %bc.resume.val, %vec.epilog.scalar.ph ], [ %indvars.iv.next.i, %for.body4.us.i ] | |
%arrayidx.us.i = getelementptr inbounds i8, ptr %src1.addr.031.us.i, i64 %indvars.iv.i | |
%61 = load i8, ptr %arrayidx.us.i, align 1, !tbaa !15 | |
%conv.us.i = zext i8 %61 to i16 | |
%arrayidx6.us.i = getelementptr inbounds i8, ptr %src2.addr.030.us.i, i64 %indvars.iv.i | |
%62 = load i8, ptr %arrayidx6.us.i, align 1, !tbaa !15 | |
%conv7.us.i = zext i8 %62 to i16 | |
%add.us.i = add nuw nsw i16 %conv.us.i, 1 | |
%add8.us.i = add nuw nsw i16 %add.us.i, %conv7.us.i | |
%shr.us.i = lshr i16 %add8.us.i, 1 | |
%conv9.us.i = trunc i16 %shr.us.i to i8 | |
%arrayidx11.us.i = getelementptr inbounds i8, ptr %dst.addr.032.us.i, i64 %indvars.iv.i | |
store i8 %conv9.us.i, ptr %arrayidx11.us.i, align 1, !tbaa !15 | |
%indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1 | |
%exitcond.not.i = icmp eq i64 %indvars.iv.next.i, %wide.trip.count.i | |
br i1 %exitcond.not.i, label %for.cond1.for.cond.cleanup3_crit_edge.us.i, label %for.body4.us.i, !llvm.loop !162 | |
for.cond1.for.cond.cleanup3_crit_edge.us.i: ; preds = %vec.epilog.middle.block, %middle.block, %for.body4.us.i | |
%add.ptr.us.i = getelementptr inbounds i8, ptr %dst.addr.032.us.i, i64 %idx.ext.i | |
%add.ptr13.us.i = getelementptr inbounds i8, ptr %src1.addr.031.us.i, i64 %idx.ext12.i | |
%add.ptr15.us.i = getelementptr inbounds i8, ptr %src2.addr.030.us.i, i64 %idx.ext12.i | |
%inc17.us.i = add nuw nsw i32 %y.033.us.i, 1 | |
%exitcond36.not.i = icmp eq i32 %inc17.us.i, %i_height | |
br i1 %exitcond36.not.i, label %pixel_avg.exit.loopexit, label %iter.check, !llvm.loop !100 | |
pixel_avg.exit.loopexit: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us.i | |
br label %pixel_avg.exit | |
pixel_avg.exit: ; preds = %pixel_avg.exit.loopexit, %if.then, %for.cond1.preheader.lr.ph.i | |
%weightfn = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 5 | |
%63 = load ptr, ptr %weightfn, align 16, !tbaa !101 | |
%tobool22.not = icmp eq ptr %63, null | |
br i1 %tobool22.not, label %cleanup, label %if.then23 | |
if.then23: ; preds = %pixel_avg.exit | |
%64 = load i32, ptr %i_dst_stride, align 4, !tbaa !13 | |
%i_denom.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 2 | |
%65 = load i32, ptr %i_denom.i, align 16, !tbaa !103 | |
%cmp.i = icmp sgt i32 %65, 0 | |
br i1 %cmp.i, label %for.cond.preheader.i, label %for.cond17.preheader.i | |
for.cond17.preheader.i: ; preds = %if.then23 | |
br i1 %cmp29.i, label %for.cond23.preheader.lr.ph.i, label %cleanup | |
for.cond23.preheader.lr.ph.i: ; preds = %for.cond17.preheader.i | |
%cmp2477.i = icmp sgt i32 %i_width, 0 | |
%i_scale31.i = getelementptr %struct.x264_weight_t, ptr %weight, i64 0, i32 3 | |
%i_offset33.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4 | |
%idx.ext43.i = sext i32 %64 to i64 | |
br i1 %cmp2477.i, label %for.cond23.preheader.us.preheader.i, label %cleanup | |
for.cond23.preheader.us.preheader.i: ; preds = %for.cond23.preheader.lr.ph.i | |
%wide.trip.count.i61 = zext nneg i32 %i_width to i64 | |
%66 = add i32 %i_height, -1 | |
%67 = zext i32 %66 to i64 | |
%68 = mul i64 %idx.ext43.i, %67 | |
%69 = add i64 %68, %wide.trip.count.i61 | |
%scevgep174 = getelementptr i8, ptr %dst, i64 %69 | |
%scevgep175 = getelementptr i8, ptr %weight, i64 44 | |
br label %for.cond23.preheader.us.i | |
for.cond23.preheader.us.i: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i, %for.cond23.preheader.us.preheader.i | |
%y16.082.us.i = phi i32 [ %inc42.us.i, %for.cond23.for.cond.cleanup26_crit_edge.us.i ], [ 0, %for.cond23.preheader.us.preheader.i ] | |
%dst.addr.181.us.i = phi ptr [ %add.ptr44.us.i, %for.cond23.for.cond.cleanup26_crit_edge.us.i ], [ %dst, %for.cond23.preheader.us.preheader.i ] | |
%min.iters.check181 = icmp ult i64 %wide.trip.count.i61, 8 | |
br i1 %min.iters.check181, label %scalar.ph, label %vector.memcheck173 | |
vector.memcheck173: ; preds = %for.cond23.preheader.us.i | |
%bound0176 = icmp ult ptr %dst, %scevgep175 | |
%bound1177 = icmp ult ptr %i_scale31.i, %scevgep174 | |
%found.conflict178 = and i1 %bound0176, %bound1177 | |
%stride.check179 = icmp slt i64 %idx.ext43.i, 0 | |
%70 = or i1 %found.conflict178, %stride.check179 | |
br i1 %70, label %scalar.ph, label %vector.ph182 | |
vector.ph182: ; preds = %vector.memcheck173 | |
%n.mod.vf183 = urem i64 %wide.trip.count.i61, 4 | |
%n.vec184 = sub i64 %wide.trip.count.i61, %n.mod.vf183 | |
br label %vector.body187 | |
vector.body187: ; preds = %vector.body187, %vector.ph182 | |
%index188 = phi i64 [ 0, %vector.ph182 ], [ %index.next192, %vector.body187 ] | |
%71 = add i64 %index188, 0 | |
%72 = getelementptr inbounds i8, ptr %dst.addr.181.us.i, i64 %71 | |
%73 = getelementptr inbounds i8, ptr %72, i32 0 | |
%wide.load189 = load <4 x i8>, ptr %73, align 1, !tbaa !15, !alias.scope !163, !noalias !166 | |
%74 = zext <4 x i8> %wide.load189 to <4 x i32> | |
%75 = load i32, ptr %i_scale31.i, align 4, !tbaa !104, !alias.scope !166 | |
%broadcast.splatinsert = insertelement <4 x i32> poison, i32 %75, i64 0 | |
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer | |
%76 = mul nsw <4 x i32> %broadcast.splat, %74 | |
%77 = load i32, ptr %i_offset33.i, align 8, !tbaa !108, !alias.scope !166 | |
%broadcast.splatinsert190 = insertelement <4 x i32> poison, i32 %77, i64 0 | |
%broadcast.splat191 = shufflevector <4 x i32> %broadcast.splatinsert190, <4 x i32> poison, <4 x i32> zeroinitializer | |
%78 = add nsw <4 x i32> %76, %broadcast.splat191 | |
%79 = icmp ult <4 x i32> %78, <i32 256, i32 256, i32 256, i32 256> | |
%80 = icmp sgt <4 x i32> %78, zeroinitializer | |
%81 = sext <4 x i1> %80 to <4 x i32> | |
%82 = select <4 x i1> %79, <4 x i32> %78, <4 x i32> %81 | |
%83 = trunc <4 x i32> %82 to <4 x i8> | |
store <4 x i8> %83, ptr %73, align 1, !tbaa !15, !alias.scope !163, !noalias !166 | |
%index.next192 = add nuw i64 %index188, 4 | |
%84 = icmp eq i64 %index.next192, %n.vec184 | |
br i1 %84, label %middle.block180, label %vector.body187, !llvm.loop !168 | |
middle.block180: ; preds = %vector.body187 | |
%cmp.n186 = icmp eq i64 %wide.trip.count.i61, %n.vec184 | |
br i1 %cmp.n186, label %for.cond23.for.cond.cleanup26_crit_edge.us.i, label %scalar.ph | |
scalar.ph: ; preds = %vector.memcheck173, %for.cond23.preheader.us.i, %middle.block180 | |
%bc.resume.val185 = phi i64 [ %n.vec184, %middle.block180 ], [ 0, %for.cond23.preheader.us.i ], [ 0, %vector.memcheck173 ] | |
br label %for.body27.us.i | |
for.body27.us.i: ; preds = %for.body27.us.i, %scalar.ph | |
%indvars.iv.i62 = phi i64 [ %bc.resume.val185, %scalar.ph ], [ %indvars.iv.next.i63, %for.body27.us.i ] | |
%arrayidx29.us.i = getelementptr inbounds i8, ptr %dst.addr.181.us.i, i64 %indvars.iv.i62 | |
%85 = load i8, ptr %arrayidx29.us.i, align 1, !tbaa !15 | |
%conv30.us.i = zext i8 %85 to i32 | |
%86 = load i32, ptr %i_scale31.i, align 4, !tbaa !104 | |
%mul32.us.i = mul nsw i32 %86, %conv30.us.i | |
%87 = load i32, ptr %i_offset33.i, align 8, !tbaa !108 | |
%add34.us.i = add nsw i32 %mul32.us.i, %87 | |
%tobool.not.i72.us.i = icmp ult i32 %add34.us.i, 256 | |
%88 = icmp sgt i32 %add34.us.i, 0 | |
%shr.i73.us.i = sext i1 %88 to i32 | |
%cond.i74.us.i = select i1 %tobool.not.i72.us.i, i32 %add34.us.i, i32 %shr.i73.us.i | |
%conv.i75.us.i = trunc i32 %cond.i74.us.i to i8 | |
store i8 %conv.i75.us.i, ptr %arrayidx29.us.i, align 1, !tbaa !15 | |
%indvars.iv.next.i63 = add nuw nsw i64 %indvars.iv.i62, 1 | |
%exitcond.not.i64 = icmp eq i64 %indvars.iv.next.i63, %wide.trip.count.i61 | |
br i1 %exitcond.not.i64, label %for.cond23.for.cond.cleanup26_crit_edge.us.i, label %for.body27.us.i, !llvm.loop !169 | |
for.cond23.for.cond.cleanup26_crit_edge.us.i: ; preds = %middle.block180, %for.body27.us.i | |
%inc42.us.i = add nuw nsw i32 %y16.082.us.i, 1 | |
%add.ptr44.us.i = getelementptr i8, ptr %dst.addr.181.us.i, i64 %idx.ext43.i | |
%exitcond93.not.i = icmp eq i32 %inc42.us.i, %i_height | |
br i1 %exitcond93.not.i, label %cleanup.loopexit153, label %for.cond23.preheader.us.i, !llvm.loop !113 | |
for.cond.preheader.i: ; preds = %if.then23 | |
br i1 %cmp29.i, label %for.cond2.preheader.lr.ph.i, label %cleanup | |
for.cond2.preheader.lr.ph.i: ; preds = %for.cond.preheader.i | |
%cmp383.i = icmp sgt i32 %i_width, 0 | |
%i_scale.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 3 | |
%i_offset.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4 | |
%idx.ext.i65 = sext i32 %64 to i64 | |
br i1 %cmp383.i, label %for.cond2.preheader.us.preheader.i, label %cleanup | |
for.cond2.preheader.us.preheader.i: ; preds = %for.cond2.preheader.lr.ph.i | |
%wide.trip.count97.i = zext nneg i32 %i_width to i64 | |
%89 = add i32 %i_height, -1 | |
%90 = zext i32 %89 to i64 | |
%91 = mul i64 %idx.ext.i65, %90 | |
%92 = add i64 %91, %wide.trip.count97.i | |
%scevgep194 = getelementptr i8, ptr %dst, i64 %92 | |
%scevgep195 = getelementptr i8, ptr %weight, i64 44 | |
br label %for.cond2.preheader.us.i | |
for.cond2.preheader.us.i: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i, %for.cond2.preheader.us.preheader.i | |
%y.088.us.i = phi i32 [ %inc12.us.i, %for.cond2.for.cond.cleanup4_crit_edge.us.i ], [ 0, %for.cond2.preheader.us.preheader.i ] | |
%dst.addr.087.us.i = phi ptr [ %add.ptr.us.i71, %for.cond2.for.cond.cleanup4_crit_edge.us.i ], [ %dst, %for.cond2.preheader.us.preheader.i ] | |
%min.iters.check202 = icmp ult i64 %wide.trip.count97.i, 4 | |
br i1 %min.iters.check202, label %scalar.ph201, label %vector.memcheck193 | |
vector.memcheck193: ; preds = %for.cond2.preheader.us.i | |
%bound0196 = icmp ult ptr %dst, %scevgep195 | |
%bound1197 = icmp ult ptr %i_denom.i, %scevgep194 | |
%found.conflict198 = and i1 %bound0196, %bound1197 | |
%stride.check199 = icmp slt i64 %idx.ext.i65, 0 | |
%93 = or i1 %found.conflict198, %stride.check199 | |
br i1 %93, label %scalar.ph201, label %vector.ph203 | |
vector.ph203: ; preds = %vector.memcheck193 | |
%n.mod.vf204 = urem i64 %wide.trip.count97.i, 4 | |
%n.vec205 = sub i64 %wide.trip.count97.i, %n.mod.vf204 | |
br label %vector.body208 | |
vector.body208: ; preds = %vector.body208, %vector.ph203 | |
%index209 = phi i64 [ 0, %vector.ph203 ], [ %index.next217, %vector.body208 ] | |
%94 = add i64 %index209, 0 | |
%95 = getelementptr inbounds i8, ptr %dst.addr.087.us.i, i64 %94 | |
%96 = getelementptr inbounds i8, ptr %95, i32 0 | |
%wide.load210 = load <4 x i8>, ptr %96, align 1, !tbaa !15, !alias.scope !170, !noalias !173 | |
%97 = zext <4 x i8> %wide.load210 to <4 x i32> | |
%98 = load i32, ptr %i_scale.i, align 4, !tbaa !104, !alias.scope !173 | |
%broadcast.splatinsert211 = insertelement <4 x i32> poison, i32 %98, i64 0 | |
%broadcast.splat212 = shufflevector <4 x i32> %broadcast.splatinsert211, <4 x i32> poison, <4 x i32> zeroinitializer | |
%99 = mul nsw <4 x i32> %broadcast.splat212, %97 | |
%100 = load i32, ptr %i_denom.i, align 16, !tbaa !103, !alias.scope !173 | |
%broadcast.splatinsert213 = insertelement <4 x i32> poison, i32 %100, i64 0 | |
%broadcast.splat214 = shufflevector <4 x i32> %broadcast.splatinsert213, <4 x i32> poison, <4 x i32> zeroinitializer | |
%101 = add nsw <4 x i32> %broadcast.splat214, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%102 = shl nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %101 | |
%103 = add nsw <4 x i32> %102, %99 | |
%104 = ashr <4 x i32> %103, %broadcast.splat214 | |
%105 = load i32, ptr %i_offset.i, align 8, !tbaa !108, !alias.scope !173 | |
%broadcast.splatinsert215 = insertelement <4 x i32> poison, i32 %105, i64 0 | |
%broadcast.splat216 = shufflevector <4 x i32> %broadcast.splatinsert215, <4 x i32> poison, <4 x i32> zeroinitializer | |
%106 = add nsw <4 x i32> %104, %broadcast.splat216 | |
%107 = icmp ult <4 x i32> %106, <i32 256, i32 256, i32 256, i32 256> | |
%108 = icmp sgt <4 x i32> %106, zeroinitializer | |
%109 = sext <4 x i1> %108 to <4 x i32> | |
%110 = select <4 x i1> %107, <4 x i32> %106, <4 x i32> %109 | |
%111 = trunc <4 x i32> %110 to <4 x i8> | |
store <4 x i8> %111, ptr %96, align 1, !tbaa !15, !alias.scope !170, !noalias !173 | |
%index.next217 = add nuw i64 %index209, 4 | |
%112 = icmp eq i64 %index.next217, %n.vec205 | |
br i1 %112, label %middle.block200, label %vector.body208, !llvm.loop !175 | |
middle.block200: ; preds = %vector.body208 | |
%cmp.n207 = icmp eq i64 %wide.trip.count97.i, %n.vec205 | |
br i1 %cmp.n207, label %for.cond2.for.cond.cleanup4_crit_edge.us.i, label %scalar.ph201 | |
scalar.ph201: ; preds = %vector.memcheck193, %for.cond2.preheader.us.i, %middle.block200 | |
%bc.resume.val206 = phi i64 [ %n.vec205, %middle.block200 ], [ 0, %for.cond2.preheader.us.i ], [ 0, %vector.memcheck193 ] | |
br label %for.body5.us.i | |
for.body5.us.i: ; preds = %for.body5.us.i, %scalar.ph201 | |
%indvars.iv94.i = phi i64 [ %bc.resume.val206, %scalar.ph201 ], [ %indvars.iv.next95.i, %for.body5.us.i ] | |
%arrayidx.us.i66 = getelementptr inbounds i8, ptr %dst.addr.087.us.i, i64 %indvars.iv94.i | |
%113 = load i8, ptr %arrayidx.us.i66, align 1, !tbaa !15 | |
%conv.us.i67 = zext i8 %113 to i32 | |
%114 = load i32, ptr %i_scale.i, align 4, !tbaa !104 | |
%mul.us.i = mul nsw i32 %114, %conv.us.i67 | |
%115 = load i32, ptr %i_denom.i, align 16, !tbaa !103 | |
%sub.us.i = add nsw i32 %115, -1 | |
%shl.us.i = shl nuw i32 1, %sub.us.i | |
%add.us.i68 = add nsw i32 %shl.us.i, %mul.us.i | |
%shr.us.i69 = ashr i32 %add.us.i68, %115 | |
%116 = load i32, ptr %i_offset.i, align 8, !tbaa !108 | |
%add8.us.i70 = add nsw i32 %shr.us.i69, %116 | |
%tobool.not.i.us.i = icmp ult i32 %add8.us.i70, 256 | |
%117 = icmp sgt i32 %add8.us.i70, 0 | |
%shr.i.us.i = sext i1 %117 to i32 | |
%cond.i.us.i = select i1 %tobool.not.i.us.i, i32 %add8.us.i70, i32 %shr.i.us.i | |
%conv.i.us.i = trunc i32 %cond.i.us.i to i8 | |
store i8 %conv.i.us.i, ptr %arrayidx.us.i66, align 1, !tbaa !15 | |
%indvars.iv.next95.i = add nuw nsw i64 %indvars.iv94.i, 1 | |
%exitcond98.not.i = icmp eq i64 %indvars.iv.next95.i, %wide.trip.count97.i | |
br i1 %exitcond98.not.i, label %for.cond2.for.cond.cleanup4_crit_edge.us.i, label %for.body5.us.i, !llvm.loop !176 | |
for.cond2.for.cond.cleanup4_crit_edge.us.i: ; preds = %middle.block200, %for.body5.us.i | |
%inc12.us.i = add nuw nsw i32 %y.088.us.i, 1 | |
%add.ptr.us.i71 = getelementptr i8, ptr %dst.addr.087.us.i, i64 %idx.ext.i65 | |
%exitcond99.not.i = icmp eq i32 %inc12.us.i, %i_height | |
br i1 %exitcond99.not.i, label %cleanup.loopexit152, label %for.cond2.preheader.us.i, !llvm.loop !121 | |
if.else: ; preds = %entry | |
%weightfn24 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 5 | |
%118 = load ptr, ptr %weightfn24, align 16, !tbaa !101 | |
%tobool25.not = icmp eq ptr %118, null | |
br i1 %tobool25.not, label %if.else27, label %if.then26 | |
if.then26: ; preds = %if.else | |
%119 = load i32, ptr %i_dst_stride, align 4, !tbaa !13 | |
%i_denom.i72 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 2 | |
%120 = load i32, ptr %i_denom.i72, align 16, !tbaa !103 | |
%cmp.i73 = icmp sgt i32 %120, 0 | |
%cmp185.i74 = icmp sgt i32 %i_height, 0 | |
br i1 %cmp.i73, label %for.cond.preheader.i106, label %for.cond17.preheader.i75 | |
for.cond17.preheader.i75: ; preds = %if.then26 | |
br i1 %cmp185.i74, label %for.cond23.preheader.lr.ph.i76, label %cleanup | |
for.cond23.preheader.lr.ph.i76: ; preds = %for.cond17.preheader.i75 | |
%cmp2477.i77 = icmp sgt i32 %i_width, 0 | |
%i_scale31.i78 = getelementptr %struct.x264_weight_t, ptr %weight, i64 0, i32 3 | |
%i_offset33.i79 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4 | |
%idx.ext43.i80 = sext i32 %119 to i64 | |
%idx.ext45.i81 = sext i32 %i_src_stride to i64 | |
br i1 %cmp2477.i77, label %for.cond23.preheader.us.preheader.i82, label %cleanup | |
for.cond23.preheader.us.preheader.i82: ; preds = %for.cond23.preheader.lr.ph.i76 | |
%wide.trip.count.i83 = zext nneg i32 %i_width to i64 | |
%121 = add i32 %i_height, -1 | |
%122 = zext i32 %121 to i64 | |
%123 = mul i64 %idx.ext43.i80, %122 | |
%124 = add i64 %123, %wide.trip.count.i83 | |
%scevgep219 = getelementptr i8, ptr %dst, i64 %124 | |
%125 = mul i64 %idx.ext45.i81, %122 | |
%126 = add i64 %125, %idx.ext8 | |
%127 = add i64 %126, %idx.ext | |
%128 = add i64 %127, %wide.trip.count.i83 | |
%scevgep220 = getelementptr i8, ptr %1, i64 %128 | |
%scevgep221 = getelementptr i8, ptr %weight, i64 44 | |
br label %for.cond23.preheader.us.i84 | |
for.cond23.preheader.us.i84: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i101, %for.cond23.preheader.us.preheader.i82 | |
%y16.082.us.i85 = phi i32 [ %inc42.us.i102, %for.cond23.for.cond.cleanup26_crit_edge.us.i101 ], [ 0, %for.cond23.preheader.us.preheader.i82 ] | |
%dst.addr.181.us.i86 = phi ptr [ %add.ptr44.us.i103, %for.cond23.for.cond.cleanup26_crit_edge.us.i101 ], [ %dst, %for.cond23.preheader.us.preheader.i82 ] | |
%src.addr.180.us.i87 = phi ptr [ %add.ptr46.us.i104, %for.cond23.for.cond.cleanup26_crit_edge.us.i101 ], [ %add.ptr9, %for.cond23.preheader.us.preheader.i82 ] | |
%min.iters.check234 = icmp ult i64 %wide.trip.count.i83, 12 | |
br i1 %min.iters.check234, label %scalar.ph233, label %vector.memcheck218 | |
vector.memcheck218: ; preds = %for.cond23.preheader.us.i84 | |
%bound0222 = icmp ult ptr %dst, %scevgep220 | |
%bound1223 = icmp ult ptr %add.ptr9, %scevgep219 | |
%found.conflict224 = and i1 %bound0222, %bound1223 | |
%stride.check225 = icmp slt i64 %idx.ext43.i80, 0 | |
%129 = or i1 %found.conflict224, %stride.check225 | |
%stride.check226 = icmp slt i64 %idx.ext45.i81, 0 | |
%130 = or i1 %129, %stride.check226 | |
%bound0227 = icmp ult ptr %dst, %scevgep221 | |
%bound1228 = icmp ult ptr %i_scale31.i78, %scevgep219 | |
%found.conflict229 = and i1 %bound0227, %bound1228 | |
%stride.check230 = icmp slt i64 %idx.ext43.i80, 0 | |
%131 = or i1 %found.conflict229, %stride.check230 | |
%conflict.rdx231 = or i1 %130, %131 | |
br i1 %conflict.rdx231, label %scalar.ph233, label %vector.ph235 | |
vector.ph235: ; preds = %vector.memcheck218 | |
%n.mod.vf236 = urem i64 %wide.trip.count.i83, 4 | |
%n.vec237 = sub i64 %wide.trip.count.i83, %n.mod.vf236 | |
br label %vector.body240 | |
vector.body240: ; preds = %vector.body240, %vector.ph235 | |
%index241 = phi i64 [ 0, %vector.ph235 ], [ %index.next247, %vector.body240 ] | |
%132 = add i64 %index241, 0 | |
%133 = getelementptr inbounds i8, ptr %src.addr.180.us.i87, i64 %132 | |
%134 = getelementptr inbounds i8, ptr %133, i32 0 | |
%wide.load242 = load <4 x i8>, ptr %134, align 1, !tbaa !15, !alias.scope !177 | |
%135 = zext <4 x i8> %wide.load242 to <4 x i32> | |
%136 = load i32, ptr %i_scale31.i78, align 4, !tbaa !104, !alias.scope !180 | |
%broadcast.splatinsert243 = insertelement <4 x i32> poison, i32 %136, i64 0 | |
%broadcast.splat244 = shufflevector <4 x i32> %broadcast.splatinsert243, <4 x i32> poison, <4 x i32> zeroinitializer | |
%137 = mul nsw <4 x i32> %broadcast.splat244, %135 | |
%138 = load i32, ptr %i_offset33.i79, align 8, !tbaa !108, !alias.scope !180 | |
%broadcast.splatinsert245 = insertelement <4 x i32> poison, i32 %138, i64 0 | |
%broadcast.splat246 = shufflevector <4 x i32> %broadcast.splatinsert245, <4 x i32> poison, <4 x i32> zeroinitializer | |
%139 = add nsw <4 x i32> %137, %broadcast.splat246 | |
%140 = icmp ult <4 x i32> %139, <i32 256, i32 256, i32 256, i32 256> | |
%141 = icmp sgt <4 x i32> %139, zeroinitializer | |
%142 = sext <4 x i1> %141 to <4 x i32> | |
%143 = select <4 x i1> %140, <4 x i32> %139, <4 x i32> %142 | |
%144 = trunc <4 x i32> %143 to <4 x i8> | |
%145 = getelementptr inbounds i8, ptr %dst.addr.181.us.i86, i64 %132 | |
%146 = getelementptr inbounds i8, ptr %145, i32 0 | |
store <4 x i8> %144, ptr %146, align 1, !tbaa !15, !alias.scope !182, !noalias !184 | |
%index.next247 = add nuw i64 %index241, 4 | |
%147 = icmp eq i64 %index.next247, %n.vec237 | |
br i1 %147, label %middle.block232, label %vector.body240, !llvm.loop !185 | |
middle.block232: ; preds = %vector.body240 | |
%cmp.n239 = icmp eq i64 %wide.trip.count.i83, %n.vec237 | |
br i1 %cmp.n239, label %for.cond23.for.cond.cleanup26_crit_edge.us.i101, label %scalar.ph233 | |
scalar.ph233: ; preds = %vector.memcheck218, %for.cond23.preheader.us.i84, %middle.block232 | |
%bc.resume.val238 = phi i64 [ %n.vec237, %middle.block232 ], [ 0, %for.cond23.preheader.us.i84 ], [ 0, %vector.memcheck218 ] | |
br label %for.body27.us.i88 | |
for.body27.us.i88: ; preds = %for.body27.us.i88, %scalar.ph233 | |
%indvars.iv.i89 = phi i64 [ %bc.resume.val238, %scalar.ph233 ], [ %indvars.iv.next.i99, %for.body27.us.i88 ] | |
%arrayidx29.us.i90 = getelementptr inbounds i8, ptr %src.addr.180.us.i87, i64 %indvars.iv.i89 | |
%148 = load i8, ptr %arrayidx29.us.i90, align 1, !tbaa !15 | |
%conv30.us.i91 = zext i8 %148 to i32 | |
%149 = load i32, ptr %i_scale31.i78, align 4, !tbaa !104 | |
%mul32.us.i92 = mul nsw i32 %149, %conv30.us.i91 | |
%150 = load i32, ptr %i_offset33.i79, align 8, !tbaa !108 | |
%add34.us.i93 = add nsw i32 %mul32.us.i92, %150 | |
%tobool.not.i72.us.i94 = icmp ult i32 %add34.us.i93, 256 | |
%151 = icmp sgt i32 %add34.us.i93, 0 | |
%shr.i73.us.i95 = sext i1 %151 to i32 | |
%cond.i74.us.i96 = select i1 %tobool.not.i72.us.i94, i32 %add34.us.i93, i32 %shr.i73.us.i95 | |
%conv.i75.us.i97 = trunc i32 %cond.i74.us.i96 to i8 | |
%arrayidx37.us.i98 = getelementptr inbounds i8, ptr %dst.addr.181.us.i86, i64 %indvars.iv.i89 | |
store i8 %conv.i75.us.i97, ptr %arrayidx37.us.i98, align 1, !tbaa !15 | |
%indvars.iv.next.i99 = add nuw nsw i64 %indvars.iv.i89, 1 | |
%exitcond.not.i100 = icmp eq i64 %indvars.iv.next.i99, %wide.trip.count.i83 | |
br i1 %exitcond.not.i100, label %for.cond23.for.cond.cleanup26_crit_edge.us.i101, label %for.body27.us.i88, !llvm.loop !186 | |
for.cond23.for.cond.cleanup26_crit_edge.us.i101: ; preds = %middle.block232, %for.body27.us.i88 | |
%inc42.us.i102 = add nuw nsw i32 %y16.082.us.i85, 1 | |
%add.ptr44.us.i103 = getelementptr inbounds i8, ptr %dst.addr.181.us.i86, i64 %idx.ext43.i80 | |
%add.ptr46.us.i104 = getelementptr inbounds i8, ptr %src.addr.180.us.i87, i64 %idx.ext45.i81 | |
%exitcond93.not.i105 = icmp eq i32 %inc42.us.i102, %i_height | |
br i1 %exitcond93.not.i105, label %cleanup.loopexit151, label %for.cond23.preheader.us.i84, !llvm.loop !113 | |
for.cond.preheader.i106: ; preds = %if.then26 | |
br i1 %cmp185.i74, label %for.cond2.preheader.lr.ph.i107, label %cleanup | |
for.cond2.preheader.lr.ph.i107: ; preds = %for.cond.preheader.i106 | |
%cmp383.i108 = icmp sgt i32 %i_width, 0 | |
%i_scale.i109 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 3 | |
%i_offset.i110 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4 | |
%idx.ext.i111 = sext i32 %119 to i64 | |
%idx.ext13.i112 = sext i32 %i_src_stride to i64 | |
br i1 %cmp383.i108, label %for.cond2.preheader.us.preheader.i113, label %cleanup | |
for.cond2.preheader.us.preheader.i113: ; preds = %for.cond2.preheader.lr.ph.i107 | |
%wide.trip.count97.i114 = zext nneg i32 %i_width to i64 | |
%152 = add i32 %i_height, -1 | |
%153 = zext i32 %152 to i64 | |
%154 = mul i64 %idx.ext.i111, %153 | |
%155 = add i64 %154, %wide.trip.count97.i114 | |
%scevgep249 = getelementptr i8, ptr %dst, i64 %155 | |
%156 = mul i64 %idx.ext13.i112, %153 | |
%157 = add i64 %156, %idx.ext8 | |
%158 = add i64 %157, %idx.ext | |
%159 = add i64 %158, %wide.trip.count97.i114 | |
%scevgep250 = getelementptr i8, ptr %1, i64 %159 | |
%scevgep251 = getelementptr i8, ptr %weight, i64 44 | |
br label %for.cond2.preheader.us.i115 | |
for.cond2.preheader.us.i115: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i136, %for.cond2.preheader.us.preheader.i113 | |
%y.088.us.i116 = phi i32 [ %inc12.us.i137, %for.cond2.for.cond.cleanup4_crit_edge.us.i136 ], [ 0, %for.cond2.preheader.us.preheader.i113 ] | |
%dst.addr.087.us.i117 = phi ptr [ %add.ptr.us.i138, %for.cond2.for.cond.cleanup4_crit_edge.us.i136 ], [ %dst, %for.cond2.preheader.us.preheader.i113 ] | |
%src.addr.086.us.i118 = phi ptr [ %add.ptr14.us.i139, %for.cond2.for.cond.cleanup4_crit_edge.us.i136 ], [ %add.ptr9, %for.cond2.preheader.us.preheader.i113 ] | |
%min.iters.check264 = icmp ult i64 %wide.trip.count97.i114, 8 | |
br i1 %min.iters.check264, label %scalar.ph263, label %vector.memcheck248 | |
vector.memcheck248: ; preds = %for.cond2.preheader.us.i115 | |
%bound0252 = icmp ult ptr %dst, %scevgep250 | |
%bound1253 = icmp ult ptr %add.ptr9, %scevgep249 | |
%found.conflict254 = and i1 %bound0252, %bound1253 | |
%stride.check255 = icmp slt i64 %idx.ext.i111, 0 | |
%160 = or i1 %found.conflict254, %stride.check255 | |
%stride.check256 = icmp slt i64 %idx.ext13.i112, 0 | |
%161 = or i1 %160, %stride.check256 | |
%bound0257 = icmp ult ptr %dst, %scevgep251 | |
%bound1258 = icmp ult ptr %i_denom.i72, %scevgep249 | |
%found.conflict259 = and i1 %bound0257, %bound1258 | |
%stride.check260 = icmp slt i64 %idx.ext.i111, 0 | |
%162 = or i1 %found.conflict259, %stride.check260 | |
%conflict.rdx261 = or i1 %161, %162 | |
br i1 %conflict.rdx261, label %scalar.ph263, label %vector.ph265 | |
vector.ph265: ; preds = %vector.memcheck248 | |
%n.mod.vf266 = urem i64 %wide.trip.count97.i114, 4 | |
%n.vec267 = sub i64 %wide.trip.count97.i114, %n.mod.vf266 | |
br label %vector.body270 | |
vector.body270: ; preds = %vector.body270, %vector.ph265 | |
%index271 = phi i64 [ 0, %vector.ph265 ], [ %index.next279, %vector.body270 ] | |
%163 = add i64 %index271, 0 | |
%164 = getelementptr inbounds i8, ptr %src.addr.086.us.i118, i64 %163 | |
%165 = getelementptr inbounds i8, ptr %164, i32 0 | |
%wide.load272 = load <4 x i8>, ptr %165, align 1, !tbaa !15, !alias.scope !187 | |
%166 = zext <4 x i8> %wide.load272 to <4 x i32> | |
%167 = load i32, ptr %i_scale.i109, align 4, !tbaa !104, !alias.scope !190 | |
%broadcast.splatinsert273 = insertelement <4 x i32> poison, i32 %167, i64 0 | |
%broadcast.splat274 = shufflevector <4 x i32> %broadcast.splatinsert273, <4 x i32> poison, <4 x i32> zeroinitializer | |
%168 = mul nsw <4 x i32> %broadcast.splat274, %166 | |
%169 = load i32, ptr %i_denom.i72, align 16, !tbaa !103, !alias.scope !190 | |
%broadcast.splatinsert275 = insertelement <4 x i32> poison, i32 %169, i64 0 | |
%broadcast.splat276 = shufflevector <4 x i32> %broadcast.splatinsert275, <4 x i32> poison, <4 x i32> zeroinitializer | |
%170 = add nsw <4 x i32> %broadcast.splat276, <i32 -1, i32 -1, i32 -1, i32 -1> | |
%171 = shl nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %170 | |
%172 = add nsw <4 x i32> %171, %168 | |
%173 = ashr <4 x i32> %172, %broadcast.splat276 | |
%174 = load i32, ptr %i_offset.i110, align 8, !tbaa !108, !alias.scope !190 | |
%broadcast.splatinsert277 = insertelement <4 x i32> poison, i32 %174, i64 0 | |
%broadcast.splat278 = shufflevector <4 x i32> %broadcast.splatinsert277, <4 x i32> poison, <4 x i32> zeroinitializer | |
%175 = add nsw <4 x i32> %173, %broadcast.splat278 | |
%176 = icmp ult <4 x i32> %175, <i32 256, i32 256, i32 256, i32 256> | |
%177 = icmp sgt <4 x i32> %175, zeroinitializer | |
%178 = sext <4 x i1> %177 to <4 x i32> | |
%179 = select <4 x i1> %176, <4 x i32> %175, <4 x i32> %178 | |
%180 = trunc <4 x i32> %179 to <4 x i8> | |
%181 = getelementptr inbounds i8, ptr %dst.addr.087.us.i117, i64 %163 | |
%182 = getelementptr inbounds i8, ptr %181, i32 0 | |
store <4 x i8> %180, ptr %182, align 1, !tbaa !15, !alias.scope !192, !noalias !194 | |
%index.next279 = add nuw i64 %index271, 4 | |
%183 = icmp eq i64 %index.next279, %n.vec267 | |
br i1 %183, label %middle.block262, label %vector.body270, !llvm.loop !195 | |
middle.block262: ; preds = %vector.body270 | |
%cmp.n269 = icmp eq i64 %wide.trip.count97.i114, %n.vec267 | |
br i1 %cmp.n269, label %for.cond2.for.cond.cleanup4_crit_edge.us.i136, label %scalar.ph263 | |
scalar.ph263: ; preds = %vector.memcheck248, %for.cond2.preheader.us.i115, %middle.block262 | |
%bc.resume.val268 = phi i64 [ %n.vec267, %middle.block262 ], [ 0, %for.cond2.preheader.us.i115 ], [ 0, %vector.memcheck248 ] | |
br label %for.body5.us.i119 | |
for.body5.us.i119: ; preds = %for.body5.us.i119, %scalar.ph263 | |
%indvars.iv94.i120 = phi i64 [ %bc.resume.val268, %scalar.ph263 ], [ %indvars.iv.next95.i134, %for.body5.us.i119 ] | |
%arrayidx.us.i121 = getelementptr inbounds i8, ptr %src.addr.086.us.i118, i64 %indvars.iv94.i120 | |
%184 = load i8, ptr %arrayidx.us.i121, align 1, !tbaa !15 | |
%conv.us.i122 = zext i8 %184 to i32 | |
%185 = load i32, ptr %i_scale.i109, align 4, !tbaa !104 | |
%mul.us.i123 = mul nsw i32 %185, %conv.us.i122 | |
%186 = load i32, ptr %i_denom.i72, align 16, !tbaa !103 | |
%sub.us.i124 = add nsw i32 %186, -1 | |
%shl.us.i125 = shl nuw i32 1, %sub.us.i124 | |
%add.us.i126 = add nsw i32 %shl.us.i125, %mul.us.i123 | |
%shr.us.i127 = ashr i32 %add.us.i126, %186 | |
%187 = load i32, ptr %i_offset.i110, align 8, !tbaa !108 | |
%add8.us.i128 = add nsw i32 %shr.us.i127, %187 | |
%tobool.not.i.us.i129 = icmp ult i32 %add8.us.i128, 256 | |
%188 = icmp sgt i32 %add8.us.i128, 0 | |
%shr.i.us.i130 = sext i1 %188 to i32 | |
%cond.i.us.i131 = select i1 %tobool.not.i.us.i129, i32 %add8.us.i128, i32 %shr.i.us.i130 | |
%conv.i.us.i132 = trunc i32 %cond.i.us.i131 to i8 | |
%arrayidx10.us.i133 = getelementptr inbounds i8, ptr %dst.addr.087.us.i117, i64 %indvars.iv94.i120 | |
store i8 %conv.i.us.i132, ptr %arrayidx10.us.i133, align 1, !tbaa !15 | |
%indvars.iv.next95.i134 = add nuw nsw i64 %indvars.iv94.i120, 1 | |
%exitcond98.not.i135 = icmp eq i64 %indvars.iv.next95.i134, %wide.trip.count97.i114 | |
br i1 %exitcond98.not.i135, label %for.cond2.for.cond.cleanup4_crit_edge.us.i136, label %for.body5.us.i119, !llvm.loop !196 | |
for.cond2.for.cond.cleanup4_crit_edge.us.i136: ; preds = %middle.block262, %for.body5.us.i119 | |
%inc12.us.i137 = add nuw nsw i32 %y.088.us.i116, 1 | |
%add.ptr.us.i138 = getelementptr inbounds i8, ptr %dst.addr.087.us.i117, i64 %idx.ext.i111 | |
%add.ptr14.us.i139 = getelementptr inbounds i8, ptr %src.addr.086.us.i118, i64 %idx.ext13.i112 | |
%exitcond99.not.i140 = icmp eq i32 %inc12.us.i137, %i_height | |
br i1 %exitcond99.not.i140, label %cleanup.loopexit, label %for.cond2.preheader.us.i115, !llvm.loop !121 | |
if.else27: ; preds = %if.else | |
store i32 %i_src_stride, ptr %i_dst_stride, align 4, !tbaa !13 | |
br label %cleanup | |
cleanup.loopexit: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i136 | |
br label %cleanup | |
cleanup.loopexit151: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i101 | |
br label %cleanup | |
cleanup.loopexit152: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i | |
br label %cleanup | |
cleanup.loopexit153: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i | |
br label %cleanup | |
cleanup: ; preds = %cleanup.loopexit153, %cleanup.loopexit152, %cleanup.loopexit151, %cleanup.loopexit, %for.cond2.preheader.lr.ph.i107, %for.cond.preheader.i106, %for.cond23.preheader.lr.ph.i76, %for.cond17.preheader.i75, %for.cond2.preheader.lr.ph.i, %for.cond.preheader.i, %for.cond23.preheader.lr.ph.i, %for.cond17.preheader.i, %pixel_avg.exit, %if.else27 | |
%retval.0 = phi ptr [ %add.ptr9, %if.else27 ], [ %dst, %pixel_avg.exit ], [ %dst, %for.cond17.preheader.i ], [ %dst, %for.cond23.preheader.lr.ph.i ], [ %dst, %for.cond.preheader.i ], [ %dst, %for.cond2.preheader.lr.ph.i ], [ %dst, %for.cond17.preheader.i75 ], [ %dst, %for.cond23.preheader.lr.ph.i76 ], [ %dst, %for.cond.preheader.i106 ], [ %dst, %for.cond2.preheader.lr.ph.i107 ], [ %dst, %cleanup.loopexit ], [ %dst, %cleanup.loopexit151 ], [ %dst, %cleanup.loopexit152 ], [ %dst, %cleanup.loopexit153 ] | |
ret ptr %retval.0 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; *** IR Dump Before LoopVectorizePass on get_ref *** | |
; Function Attrs: nofree norecurse nosync nounwind memory(readwrite, inaccessiblemem: none) uwtable | |
define internal ptr @get_ref(ptr noundef %dst, ptr nocapture noundef %i_dst_stride, ptr nocapture noundef readonly %src, i32 noundef %i_src_stride, i32 noundef %mvx, i32 noundef %mvy, i32 noundef %i_width, i32 noundef %i_height, ptr nocapture noundef readonly %weight) #0 { | |
entry: | |
%and = and i32 %mvy, 3 | |
%shl = shl nuw nsw i32 %and, 2 | |
%and1 = and i32 %mvx, 3 | |
%add = or disjoint i32 %shl, %and1 | |
%shr = ashr i32 %mvy, 2 | |
%mul = mul nsw i32 %shr, %i_src_stride | |
%shr2 = ashr i32 %mvx, 2 | |
%add3 = add nsw i32 %mul, %shr2 | |
%idxprom = zext nneg i32 %add to i64 | |
%arrayidx = getelementptr inbounds [16 x i8], ptr @hpel_ref0, i64 0, i64 %idxprom | |
%0 = load i8, ptr %arrayidx, align 1, !tbaa !15 | |
%idxprom4 = zext i8 %0 to i64 | |
%arrayidx5 = getelementptr inbounds ptr, ptr %src, i64 %idxprom4 | |
%1 = load ptr, ptr %arrayidx5, align 8, !tbaa !9 | |
%idx.ext = sext i32 %add3 to i64 | |
%add.ptr = getelementptr inbounds i8, ptr %1, i64 %idx.ext | |
%cmp = icmp eq i32 %and, 3 | |
%mul7 = select i1 %cmp, i32 %i_src_stride, i32 0 | |
%idx.ext8 = sext i32 %mul7 to i64 | |
%add.ptr9 = getelementptr inbounds i8, ptr %add.ptr, i64 %idx.ext8 | |
%and10 = and i32 %add, 5 | |
%tobool.not = icmp eq i32 %and10, 0 | |
br i1 %tobool.not, label %if.else, label %if.then | |
if.then: ; preds = %entry | |
%arrayidx12 = getelementptr inbounds [16 x i8], ptr @hpel_ref1, i64 0, i64 %idxprom | |
%2 = load i8, ptr %arrayidx12, align 1, !tbaa !15 | |
%idxprom13 = zext i8 %2 to i64 | |
%arrayidx14 = getelementptr inbounds ptr, ptr %src, i64 %idxprom13 | |
%3 = load ptr, ptr %arrayidx14, align 8, !tbaa !9 | |
%add.ptr16 = getelementptr inbounds i8, ptr %3, i64 %idx.ext | |
%cmp18 = icmp eq i32 %and1, 3 | |
%idx.ext20 = zext i1 %cmp18 to i64 | |
%add.ptr21 = getelementptr inbounds i8, ptr %add.ptr16, i64 %idx.ext20 | |
%cmp29.i = icmp sgt i32 %i_height, 0 | |
br i1 %cmp29.i, label %for.cond1.preheader.lr.ph.i, label %pixel_avg.exit | |
for.cond1.preheader.lr.ph.i: ; preds = %if.then | |
%4 = load i32, ptr %i_dst_stride, align 4, !tbaa !13 | |
%cmp227.i = icmp sgt i32 %i_width, 0 | |
%idx.ext.i = sext i32 %4 to i64 | |
%idx.ext12.i = sext i32 %i_src_stride to i64 | |
br i1 %cmp227.i, label %for.cond1.preheader.us.preheader.i, label %pixel_avg.exit | |
for.cond1.preheader.us.preheader.i: ; preds = %for.cond1.preheader.lr.ph.i | |
%wide.trip.count.i = zext nneg i32 %i_width to i64 | |
br label %for.cond1.preheader.us.i | |
for.cond1.preheader.us.i: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us.i, %for.cond1.preheader.us.preheader.i | |
%y.033.us.i = phi i32 [ %inc17.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ 0, %for.cond1.preheader.us.preheader.i ] | |
%dst.addr.032.us.i = phi ptr [ %add.ptr.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ %dst, %for.cond1.preheader.us.preheader.i ] | |
%src1.addr.031.us.i = phi ptr [ %add.ptr13.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ %add.ptr9, %for.cond1.preheader.us.preheader.i ] | |
%src2.addr.030.us.i = phi ptr [ %add.ptr15.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ %add.ptr21, %for.cond1.preheader.us.preheader.i ] | |
br label %for.body4.us.i | |
for.body4.us.i: ; preds = %for.body4.us.i, %for.cond1.preheader.us.i | |
%indvars.iv.i = phi i64 [ 0, %for.cond1.preheader.us.i ], [ %indvars.iv.next.i, %for.body4.us.i ] | |
%arrayidx.us.i = getelementptr inbounds i8, ptr %src1.addr.031.us.i, i64 %indvars.iv.i | |
%5 = load i8, ptr %arrayidx.us.i, align 1, !tbaa !15 | |
%conv.us.i = zext i8 %5 to i16 | |
%arrayidx6.us.i = getelementptr inbounds i8, ptr %src2.addr.030.us.i, i64 %indvars.iv.i | |
%6 = load i8, ptr %arrayidx6.us.i, align 1, !tbaa !15 | |
%conv7.us.i = zext i8 %6 to i16 | |
%add.us.i = add nuw nsw i16 %conv.us.i, 1 | |
%add8.us.i = add nuw nsw i16 %add.us.i, %conv7.us.i | |
%shr.us.i = lshr i16 %add8.us.i, 1 | |
%conv9.us.i = trunc i16 %shr.us.i to i8 | |
%arrayidx11.us.i = getelementptr inbounds i8, ptr %dst.addr.032.us.i, i64 %indvars.iv.i | |
store i8 %conv9.us.i, ptr %arrayidx11.us.i, align 1, !tbaa !15 | |
%indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1 | |
%exitcond.not.i = icmp eq i64 %indvars.iv.next.i, %wide.trip.count.i | |
br i1 %exitcond.not.i, label %for.cond1.for.cond.cleanup3_crit_edge.us.i, label %for.body4.us.i, !llvm.loop !144 | |
for.cond1.for.cond.cleanup3_crit_edge.us.i: ; preds = %for.body4.us.i | |
%add.ptr.us.i = getelementptr inbounds i8, ptr %dst.addr.032.us.i, i64 %idx.ext.i | |
%add.ptr13.us.i = getelementptr inbounds i8, ptr %src1.addr.031.us.i, i64 %idx.ext12.i | |
%add.ptr15.us.i = getelementptr inbounds i8, ptr %src2.addr.030.us.i, i64 %idx.ext12.i | |
%inc17.us.i = add nuw nsw i32 %y.033.us.i, 1 | |
%exitcond36.not.i = icmp eq i32 %inc17.us.i, %i_height | |
br i1 %exitcond36.not.i, label %pixel_avg.exit.loopexit, label %for.cond1.preheader.us.i, !llvm.loop !100 | |
pixel_avg.exit.loopexit: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us.i | |
br label %pixel_avg.exit | |
pixel_avg.exit: ; preds = %pixel_avg.exit.loopexit, %if.then, %for.cond1.preheader.lr.ph.i | |
%weightfn = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 5 | |
%7 = load ptr, ptr %weightfn, align 16, !tbaa !101 | |
%tobool22.not = icmp eq ptr %7, null | |
br i1 %tobool22.not, label %cleanup, label %if.then23 | |
if.then23: ; preds = %pixel_avg.exit | |
%8 = load i32, ptr %i_dst_stride, align 4, !tbaa !13 | |
%i_denom.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 2 | |
%9 = load i32, ptr %i_denom.i, align 16, !tbaa !103 | |
%cmp.i = icmp sgt i32 %9, 0 | |
br i1 %cmp.i, label %for.cond.preheader.i, label %for.cond17.preheader.i | |
for.cond17.preheader.i: ; preds = %if.then23 | |
br i1 %cmp29.i, label %for.cond23.preheader.lr.ph.i, label %cleanup | |
for.cond23.preheader.lr.ph.i: ; preds = %for.cond17.preheader.i | |
%cmp2477.i = icmp sgt i32 %i_width, 0 | |
%i_scale31.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 3 | |
%i_offset33.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4 | |
%idx.ext43.i = sext i32 %8 to i64 | |
br i1 %cmp2477.i, label %for.cond23.preheader.us.preheader.i, label %cleanup | |
for.cond23.preheader.us.preheader.i: ; preds = %for.cond23.preheader.lr.ph.i | |
%wide.trip.count.i61 = zext nneg i32 %i_width to i64 | |
br label %for.cond23.preheader.us.i | |
for.cond23.preheader.us.i: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i, %for.cond23.preheader.us.preheader.i | |
%y16.082.us.i = phi i32 [ %inc42.us.i, %for.cond23.for.cond.cleanup26_crit_edge.us.i ], [ 0, %for.cond23.preheader.us.preheader.i ] | |
%dst.addr.181.us.i = phi ptr [ %add.ptr44.us.i, %for.cond23.for.cond.cleanup26_crit_edge.us.i ], [ %dst, %for.cond23.preheader.us.preheader.i ] | |
br label %for.body27.us.i | |
for.body27.us.i: ; preds = %for.body27.us.i, %for.cond23.preheader.us.i | |
%indvars.iv.i62 = phi i64 [ 0, %for.cond23.preheader.us.i ], [ %indvars.iv.next.i63, %for.body27.us.i ] | |
%arrayidx29.us.i = getelementptr inbounds i8, ptr %dst.addr.181.us.i, i64 %indvars.iv.i62 | |
%10 = load i8, ptr %arrayidx29.us.i, align 1, !tbaa !15 | |
%conv30.us.i = zext i8 %10 to i32 | |
%11 = load i32, ptr %i_scale31.i, align 4, !tbaa !104 | |
%mul32.us.i = mul nsw i32 %11, %conv30.us.i | |
%12 = load i32, ptr %i_offset33.i, align 8, !tbaa !108 | |
%add34.us.i = add nsw i32 %mul32.us.i, %12 | |
%tobool.not.i72.us.i = icmp ult i32 %add34.us.i, 256 | |
%13 = icmp sgt i32 %add34.us.i, 0 | |
%shr.i73.us.i = sext i1 %13 to i32 | |
%cond.i74.us.i = select i1 %tobool.not.i72.us.i, i32 %add34.us.i, i32 %shr.i73.us.i | |
%conv.i75.us.i = trunc i32 %cond.i74.us.i to i8 | |
store i8 %conv.i75.us.i, ptr %arrayidx29.us.i, align 1, !tbaa !15 | |
%indvars.iv.next.i63 = add nuw nsw i64 %indvars.iv.i62, 1 | |
%exitcond.not.i64 = icmp eq i64 %indvars.iv.next.i63, %wide.trip.count.i61 | |
br i1 %exitcond.not.i64, label %for.cond23.for.cond.cleanup26_crit_edge.us.i, label %for.body27.us.i, !llvm.loop !145 | |
for.cond23.for.cond.cleanup26_crit_edge.us.i: ; preds = %for.body27.us.i | |
%inc42.us.i = add nuw nsw i32 %y16.082.us.i, 1 | |
%add.ptr44.us.i = getelementptr i8, ptr %dst.addr.181.us.i, i64 %idx.ext43.i | |
%exitcond93.not.i = icmp eq i32 %inc42.us.i, %i_height | |
br i1 %exitcond93.not.i, label %cleanup.loopexit153, label %for.cond23.preheader.us.i, !llvm.loop !113 | |
for.cond.preheader.i: ; preds = %if.then23 | |
br i1 %cmp29.i, label %for.cond2.preheader.lr.ph.i, label %cleanup | |
for.cond2.preheader.lr.ph.i: ; preds = %for.cond.preheader.i | |
%cmp383.i = icmp sgt i32 %i_width, 0 | |
%i_scale.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 3 | |
%i_offset.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4 | |
%idx.ext.i65 = sext i32 %8 to i64 | |
br i1 %cmp383.i, label %for.cond2.preheader.us.preheader.i, label %cleanup | |
for.cond2.preheader.us.preheader.i: ; preds = %for.cond2.preheader.lr.ph.i | |
%wide.trip.count97.i = zext nneg i32 %i_width to i64 | |
br label %for.cond2.preheader.us.i | |
for.cond2.preheader.us.i: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i, %for.cond2.preheader.us.preheader.i | |
%y.088.us.i = phi i32 [ %inc12.us.i, %for.cond2.for.cond.cleanup4_crit_edge.us.i ], [ 0, %for.cond2.preheader.us.preheader.i ] | |
%dst.addr.087.us.i = phi ptr [ %add.ptr.us.i71, %for.cond2.for.cond.cleanup4_crit_edge.us.i ], [ %dst, %for.cond2.preheader.us.preheader.i ] | |
br label %for.body5.us.i | |
for.body5.us.i: ; preds = %for.body5.us.i, %for.cond2.preheader.us.i | |
%indvars.iv94.i = phi i64 [ 0, %for.cond2.preheader.us.i ], [ %indvars.iv.next95.i, %for.body5.us.i ] | |
%arrayidx.us.i66 = getelementptr inbounds i8, ptr %dst.addr.087.us.i, i64 %indvars.iv94.i | |
%14 = load i8, ptr %arrayidx.us.i66, align 1, !tbaa !15 | |
%conv.us.i67 = zext i8 %14 to i32 | |
%15 = load i32, ptr %i_scale.i, align 4, !tbaa !104 | |
%mul.us.i = mul nsw i32 %15, %conv.us.i67 | |
%16 = load i32, ptr %i_denom.i, align 16, !tbaa !103 | |
%sub.us.i = add nsw i32 %16, -1 | |
%shl.us.i = shl nuw i32 1, %sub.us.i | |
%add.us.i68 = add nsw i32 %shl.us.i, %mul.us.i | |
%shr.us.i69 = ashr i32 %add.us.i68, %16 | |
%17 = load i32, ptr %i_offset.i, align 8, !tbaa !108 | |
%add8.us.i70 = add nsw i32 %shr.us.i69, %17 | |
%tobool.not.i.us.i = icmp ult i32 %add8.us.i70, 256 | |
%18 = icmp sgt i32 %add8.us.i70, 0 | |
%shr.i.us.i = sext i1 %18 to i32 | |
%cond.i.us.i = select i1 %tobool.not.i.us.i, i32 %add8.us.i70, i32 %shr.i.us.i | |
%conv.i.us.i = trunc i32 %cond.i.us.i to i8 | |
store i8 %conv.i.us.i, ptr %arrayidx.us.i66, align 1, !tbaa !15 | |
%indvars.iv.next95.i = add nuw nsw i64 %indvars.iv94.i, 1 | |
%exitcond98.not.i = icmp eq i64 %indvars.iv.next95.i, %wide.trip.count97.i | |
br i1 %exitcond98.not.i, label %for.cond2.for.cond.cleanup4_crit_edge.us.i, label %for.body5.us.i, !llvm.loop !146 | |
for.cond2.for.cond.cleanup4_crit_edge.us.i: ; preds = %for.body5.us.i | |
%inc12.us.i = add nuw nsw i32 %y.088.us.i, 1 | |
%add.ptr.us.i71 = getelementptr i8, ptr %dst.addr.087.us.i, i64 %idx.ext.i65 | |
%exitcond99.not.i = icmp eq i32 %inc12.us.i, %i_height | |
br i1 %exitcond99.not.i, label %cleanup.loopexit152, label %for.cond2.preheader.us.i, !llvm.loop !121 | |
if.else: ; preds = %entry | |
%weightfn24 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 5 | |
%19 = load ptr, ptr %weightfn24, align 16, !tbaa !101 | |
%tobool25.not = icmp eq ptr %19, null | |
br i1 %tobool25.not, label %if.else27, label %if.then26 | |
if.then26: ; preds = %if.else | |
%20 = load i32, ptr %i_dst_stride, align 4, !tbaa !13 | |
%i_denom.i72 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 2 | |
%21 = load i32, ptr %i_denom.i72, align 16, !tbaa !103 | |
%cmp.i73 = icmp sgt i32 %21, 0 | |
%cmp185.i74 = icmp sgt i32 %i_height, 0 | |
br i1 %cmp.i73, label %for.cond.preheader.i106, label %for.cond17.preheader.i75 | |
for.cond17.preheader.i75: ; preds = %if.then26 | |
br i1 %cmp185.i74, label %for.cond23.preheader.lr.ph.i76, label %cleanup | |
for.cond23.preheader.lr.ph.i76: ; preds = %for.cond17.preheader.i75 | |
%cmp2477.i77 = icmp sgt i32 %i_width, 0 | |
%i_scale31.i78 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 3 | |
%i_offset33.i79 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4 | |
%idx.ext43.i80 = sext i32 %20 to i64 | |
%idx.ext45.i81 = sext i32 %i_src_stride to i64 | |
br i1 %cmp2477.i77, label %for.cond23.preheader.us.preheader.i82, label %cleanup | |
for.cond23.preheader.us.preheader.i82: ; preds = %for.cond23.preheader.lr.ph.i76 | |
%wide.trip.count.i83 = zext nneg i32 %i_width to i64 | |
br label %for.cond23.preheader.us.i84 | |
for.cond23.preheader.us.i84: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i101, %for.cond23.preheader.us.preheader.i82 | |
%y16.082.us.i85 = phi i32 [ %inc42.us.i102, %for.cond23.for.cond.cleanup26_crit_edge.us.i101 ], [ 0, %for.cond23.preheader.us.preheader.i82 ] | |
%dst.addr.181.us.i86 = phi ptr [ %add.ptr44.us.i103, %for.cond23.for.cond.cleanup26_crit_edge.us.i101 ], [ %dst, %for.cond23.preheader.us.preheader.i82 ] | |
%src.addr.180.us.i87 = phi ptr [ %add.ptr46.us.i104, %for.cond23.for.cond.cleanup26_crit_edge.us.i101 ], [ %add.ptr9, %for.cond23.preheader.us.preheader.i82 ] | |
br label %for.body27.us.i88 | |
for.body27.us.i88: ; preds = %for.body27.us.i88, %for.cond23.preheader.us.i84 | |
%indvars.iv.i89 = phi i64 [ 0, %for.cond23.preheader.us.i84 ], [ %indvars.iv.next.i99, %for.body27.us.i88 ] | |
%arrayidx29.us.i90 = getelementptr inbounds i8, ptr %src.addr.180.us.i87, i64 %indvars.iv.i89 | |
%22 = load i8, ptr %arrayidx29.us.i90, align 1, !tbaa !15 | |
%conv30.us.i91 = zext i8 %22 to i32 | |
%23 = load i32, ptr %i_scale31.i78, align 4, !tbaa !104 | |
%mul32.us.i92 = mul nsw i32 %23, %conv30.us.i91 | |
%24 = load i32, ptr %i_offset33.i79, align 8, !tbaa !108 | |
%add34.us.i93 = add nsw i32 %mul32.us.i92, %24 | |
%tobool.not.i72.us.i94 = icmp ult i32 %add34.us.i93, 256 | |
%25 = icmp sgt i32 %add34.us.i93, 0 | |
%shr.i73.us.i95 = sext i1 %25 to i32 | |
%cond.i74.us.i96 = select i1 %tobool.not.i72.us.i94, i32 %add34.us.i93, i32 %shr.i73.us.i95 | |
%conv.i75.us.i97 = trunc i32 %cond.i74.us.i96 to i8 | |
%arrayidx37.us.i98 = getelementptr inbounds i8, ptr %dst.addr.181.us.i86, i64 %indvars.iv.i89 | |
store i8 %conv.i75.us.i97, ptr %arrayidx37.us.i98, align 1, !tbaa !15 | |
%indvars.iv.next.i99 = add nuw nsw i64 %indvars.iv.i89, 1 | |
%exitcond.not.i100 = icmp eq i64 %indvars.iv.next.i99, %wide.trip.count.i83 | |
br i1 %exitcond.not.i100, label %for.cond23.for.cond.cleanup26_crit_edge.us.i101, label %for.body27.us.i88, !llvm.loop !145 | |
for.cond23.for.cond.cleanup26_crit_edge.us.i101: ; preds = %for.body27.us.i88 | |
%inc42.us.i102 = add nuw nsw i32 %y16.082.us.i85, 1 | |
%add.ptr44.us.i103 = getelementptr inbounds i8, ptr %dst.addr.181.us.i86, i64 %idx.ext43.i80 | |
%add.ptr46.us.i104 = getelementptr inbounds i8, ptr %src.addr.180.us.i87, i64 %idx.ext45.i81 | |
%exitcond93.not.i105 = icmp eq i32 %inc42.us.i102, %i_height | |
br i1 %exitcond93.not.i105, label %cleanup.loopexit151, label %for.cond23.preheader.us.i84, !llvm.loop !113 | |
for.cond.preheader.i106: ; preds = %if.then26 | |
br i1 %cmp185.i74, label %for.cond2.preheader.lr.ph.i107, label %cleanup | |
for.cond2.preheader.lr.ph.i107: ; preds = %for.cond.preheader.i106 | |
%cmp383.i108 = icmp sgt i32 %i_width, 0 | |
%i_scale.i109 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 3 | |
%i_offset.i110 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4 | |
%idx.ext.i111 = sext i32 %20 to i64 | |
%idx.ext13.i112 = sext i32 %i_src_stride to i64 | |
br i1 %cmp383.i108, label %for.cond2.preheader.us.preheader.i113, label %cleanup | |
for.cond2.preheader.us.preheader.i113: ; preds = %for.cond2.preheader.lr.ph.i107 | |
%wide.trip.count97.i114 = zext nneg i32 %i_width to i64 | |
br label %for.cond2.preheader.us.i115 | |
for.cond2.preheader.us.i115: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i136, %for.cond2.preheader.us.preheader.i113 | |
%y.088.us.i116 = phi i32 [ %inc12.us.i137, %for.cond2.for.cond.cleanup4_crit_edge.us.i136 ], [ 0, %for.cond2.preheader.us.preheader.i113 ] | |
%dst.addr.087.us.i117 = phi ptr [ %add.ptr.us.i138, %for.cond2.for.cond.cleanup4_crit_edge.us.i136 ], [ %dst, %for.cond2.preheader.us.preheader.i113 ] | |
%src.addr.086.us.i118 = phi ptr [ %add.ptr14.us.i139, %for.cond2.for.cond.cleanup4_crit_edge.us.i136 ], [ %add.ptr9, %for.cond2.preheader.us.preheader.i113 ] | |
br label %for.body5.us.i119 | |
for.body5.us.i119: ; preds = %for.body5.us.i119, %for.cond2.preheader.us.i115 | |
%indvars.iv94.i120 = phi i64 [ 0, %for.cond2.preheader.us.i115 ], [ %indvars.iv.next95.i134, %for.body5.us.i119 ] | |
%arrayidx.us.i121 = getelementptr inbounds i8, ptr %src.addr.086.us.i118, i64 %indvars.iv94.i120 | |
%26 = load i8, ptr %arrayidx.us.i121, align 1, !tbaa !15 | |
%conv.us.i122 = zext i8 %26 to i32 | |
%27 = load i32, ptr %i_scale.i109, align 4, !tbaa !104 | |
%mul.us.i123 = mul nsw i32 %27, %conv.us.i122 | |
%28 = load i32, ptr %i_denom.i72, align 16, !tbaa !103 | |
%sub.us.i124 = add nsw i32 %28, -1 | |
%shl.us.i125 = shl nuw i32 1, %sub.us.i124 | |
%add.us.i126 = add nsw i32 %shl.us.i125, %mul.us.i123 | |
%shr.us.i127 = ashr i32 %add.us.i126, %28 | |
%29 = load i32, ptr %i_offset.i110, align 8, !tbaa !108 | |
%add8.us.i128 = add nsw i32 %shr.us.i127, %29 | |
%tobool.not.i.us.i129 = icmp ult i32 %add8.us.i128, 256 | |
%30 = icmp sgt i32 %add8.us.i128, 0 | |
%shr.i.us.i130 = sext i1 %30 to i32 | |
%cond.i.us.i131 = select i1 %tobool.not.i.us.i129, i32 %add8.us.i128, i32 %shr.i.us.i130 | |
%conv.i.us.i132 = trunc i32 %cond.i.us.i131 to i8 | |
%arrayidx10.us.i133 = getelementptr inbounds i8, ptr %dst.addr.087.us.i117, i64 %indvars.iv94.i120 | |
store i8 %conv.i.us.i132, ptr %arrayidx10.us.i133, align 1, !tbaa !15 | |
%indvars.iv.next95.i134 = add nuw nsw i64 %indvars.iv94.i120, 1 | |
%exitcond98.not.i135 = icmp eq i64 %indvars.iv.next95.i134, %wide.trip.count97.i114 | |
br i1 %exitcond98.not.i135, label %for.cond2.for.cond.cleanup4_crit_edge.us.i136, label %for.body5.us.i119, !llvm.loop !146 | |
for.cond2.for.cond.cleanup4_crit_edge.us.i136: ; preds = %for.body5.us.i119 | |
%inc12.us.i137 = add nuw nsw i32 %y.088.us.i116, 1 | |
%add.ptr.us.i138 = getelementptr inbounds i8, ptr %dst.addr.087.us.i117, i64 %idx.ext.i111 | |
%add.ptr14.us.i139 = getelementptr inbounds i8, ptr %src.addr.086.us.i118, i64 %idx.ext13.i112 | |
%exitcond99.not.i140 = icmp eq i32 %inc12.us.i137, %i_height | |
br i1 %exitcond99.not.i140, label %cleanup.loopexit, label %for.cond2.preheader.us.i115, !llvm.loop !121 | |
if.else27: ; preds = %if.else | |
store i32 %i_src_stride, ptr %i_dst_stride, align 4, !tbaa !13 | |
br label %cleanup | |
cleanup.loopexit: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i136 | |
br label %cleanup | |
cleanup.loopexit151: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i101 | |
br label %cleanup | |
cleanup.loopexit152: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i | |
br label %cleanup | |
cleanup.loopexit153: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i | |
br label %cleanup | |
cleanup: ; preds = %cleanup.loopexit153, %cleanup.loopexit152, %cleanup.loopexit151, %cleanup.loopexit, %for.cond2.preheader.lr.ph.i107, %for.cond.preheader.i106, %for.cond23.preheader.lr.ph.i76, %for.cond17.preheader.i75, %for.cond2.preheader.lr.ph.i, %for.cond.preheader.i, %for.cond23.preheader.lr.ph.i, %for.cond17.preheader.i, %pixel_avg.exit, %if.else27 | |
%retval.0 = phi ptr [ %add.ptr9, %if.else27 ], [ %dst, %pixel_avg.exit ], [ %dst, %for.cond17.preheader.i ], [ %dst, %for.cond23.preheader.lr.ph.i ], [ %dst, %for.cond.preheader.i ], [ %dst, %for.cond2.preheader.lr.ph.i ], [ %dst, %for.cond17.preheader.i75 ], [ %dst, %for.cond23.preheader.lr.ph.i76 ], [ %dst, %for.cond.preheader.i106 ], [ %dst, %for.cond2.preheader.lr.ph.i107 ], [ %dst, %cleanup.loopexit ], [ %dst, %cleanup.loopexit151 ], [ %dst, %cleanup.loopexit152 ], [ %dst, %cleanup.loopexit153 ] | |
ret ptr %retval.0 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; *** IR Dump After SLPVectorizerPass on x264_pixel_sad_16x16 *** | |
; Function Attrs: nofree norecurse nosync nounwind memory(read, inaccessiblemem: none) uwtable | |
define internal i32 @x264_pixel_sad_16x16(ptr nocapture noundef readonly %pix1, i32 noundef %i_stride_pix1, ptr nocapture noundef readonly %pix2, i32 noundef %i_stride_pix2) #4 { | |
entry: | |
%idx.ext = sext i32 %i_stride_pix1 to i64 | |
%idx.ext8 = sext i32 %i_stride_pix2 to i64 | |
br label %for.cond1.preheader | |
for.cond1.preheader: ; preds = %entry, %for.cond1.preheader | |
%y.025 = phi i32 [ 0, %entry ], [ %inc11, %for.cond1.preheader ] | |
%i_sum.024 = phi i32 [ 0, %entry ], [ %op.rdx, %for.cond1.preheader ] | |
%pix1.addr.023 = phi ptr [ %pix1, %entry ], [ %add.ptr, %for.cond1.preheader ] | |
%pix2.addr.022 = phi ptr [ %pix2, %entry ], [ %add.ptr9, %for.cond1.preheader ] | |
%0 = load <16 x i8>, ptr %pix1.addr.023, align 1, !tbaa !14 | |
%1 = zext <16 x i8> %0 to <16 x i32> | |
%2 = load <16 x i8>, ptr %pix2.addr.022, align 1, !tbaa !14 | |
%3 = zext <16 x i8> %2 to <16 x i32> | |
%4 = sub nsw <16 x i32> %1, %3 | |
%5 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %4, i1 true) | |
%6 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %5) | |
%op.rdx = add i32 %6, %i_sum.024 | |
%add.ptr = getelementptr inbounds i8, ptr %pix1.addr.023, i64 %idx.ext | |
%add.ptr9 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 %idx.ext8 | |
%inc11 = add nuw nsw i32 %y.025, 1 | |
%exitcond.not = icmp eq i32 %inc11, 16 | |
br i1 %exitcond.not, label %for.cond.cleanup, label %for.cond1.preheader, !llvm.loop !30 | |
for.cond.cleanup: ; preds = %for.cond1.preheader | |
ret i32 %op.rdx | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; *** IR Dump Before SLPVectorizerPass on x264_pixel_sad_16x16 *** | |
; Function Attrs: nofree norecurse nosync nounwind memory(read, inaccessiblemem: none) uwtable | |
define internal i32 @x264_pixel_sad_16x16(ptr nocapture noundef readonly %pix1, i32 noundef %i_stride_pix1, ptr nocapture noundef readonly %pix2, i32 noundef %i_stride_pix2) #4 { | |
entry: | |
%idx.ext = sext i32 %i_stride_pix1 to i64 | |
%idx.ext8 = sext i32 %i_stride_pix2 to i64 | |
br label %for.cond1.preheader | |
for.cond1.preheader: ; preds = %entry, %for.cond1.preheader | |
%y.025 = phi i32 [ 0, %entry ], [ %inc11, %for.cond1.preheader ] | |
%i_sum.024 = phi i32 [ 0, %entry ], [ %add.15, %for.cond1.preheader ] | |
%pix1.addr.023 = phi ptr [ %pix1, %entry ], [ %add.ptr, %for.cond1.preheader ] | |
%pix2.addr.022 = phi ptr [ %pix2, %entry ], [ %add.ptr9, %for.cond1.preheader ] | |
%0 = load i8, ptr %pix1.addr.023, align 1, !tbaa !14 | |
%conv = zext i8 %0 to i32 | |
%1 = load i8, ptr %pix2.addr.022, align 1, !tbaa !14 | |
%conv7 = zext i8 %1 to i32 | |
%sub = sub nsw i32 %conv, %conv7 | |
%2 = tail call i32 @llvm.abs.i32(i32 %sub, i1 true) | |
%add = add nsw i32 %2, %i_sum.024 | |
%arrayidx.1 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 1 | |
%3 = load i8, ptr %arrayidx.1, align 1, !tbaa !14 | |
%conv.1 = zext i8 %3 to i32 | |
%arrayidx6.1 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 1 | |
%4 = load i8, ptr %arrayidx6.1, align 1, !tbaa !14 | |
%conv7.1 = zext i8 %4 to i32 | |
%sub.1 = sub nsw i32 %conv.1, %conv7.1 | |
%5 = tail call i32 @llvm.abs.i32(i32 %sub.1, i1 true) | |
%add.1 = add nsw i32 %5, %add | |
%arrayidx.2 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 2 | |
%6 = load i8, ptr %arrayidx.2, align 1, !tbaa !14 | |
%conv.2 = zext i8 %6 to i32 | |
%arrayidx6.2 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 2 | |
%7 = load i8, ptr %arrayidx6.2, align 1, !tbaa !14 | |
%conv7.2 = zext i8 %7 to i32 | |
%sub.2 = sub nsw i32 %conv.2, %conv7.2 | |
%8 = tail call i32 @llvm.abs.i32(i32 %sub.2, i1 true) | |
%add.2 = add nsw i32 %8, %add.1 | |
%arrayidx.3 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 3 | |
%9 = load i8, ptr %arrayidx.3, align 1, !tbaa !14 | |
%conv.3 = zext i8 %9 to i32 | |
%arrayidx6.3 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 3 | |
%10 = load i8, ptr %arrayidx6.3, align 1, !tbaa !14 | |
%conv7.3 = zext i8 %10 to i32 | |
%sub.3 = sub nsw i32 %conv.3, %conv7.3 | |
%11 = tail call i32 @llvm.abs.i32(i32 %sub.3, i1 true) | |
%add.3 = add nsw i32 %11, %add.2 | |
%arrayidx.4 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 4 | |
%12 = load i8, ptr %arrayidx.4, align 1, !tbaa !14 | |
%conv.4 = zext i8 %12 to i32 | |
%arrayidx6.4 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 4 | |
%13 = load i8, ptr %arrayidx6.4, align 1, !tbaa !14 | |
%conv7.4 = zext i8 %13 to i32 | |
%sub.4 = sub nsw i32 %conv.4, %conv7.4 | |
%14 = tail call i32 @llvm.abs.i32(i32 %sub.4, i1 true) | |
%add.4 = add nsw i32 %14, %add.3 | |
%arrayidx.5 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 5 | |
%15 = load i8, ptr %arrayidx.5, align 1, !tbaa !14 | |
%conv.5 = zext i8 %15 to i32 | |
%arrayidx6.5 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 5 | |
%16 = load i8, ptr %arrayidx6.5, align 1, !tbaa !14 | |
%conv7.5 = zext i8 %16 to i32 | |
%sub.5 = sub nsw i32 %conv.5, %conv7.5 | |
%17 = tail call i32 @llvm.abs.i32(i32 %sub.5, i1 true) | |
%add.5 = add nsw i32 %17, %add.4 | |
%arrayidx.6 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 6 | |
%18 = load i8, ptr %arrayidx.6, align 1, !tbaa !14 | |
%conv.6 = zext i8 %18 to i32 | |
%arrayidx6.6 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 6 | |
%19 = load i8, ptr %arrayidx6.6, align 1, !tbaa !14 | |
%conv7.6 = zext i8 %19 to i32 | |
%sub.6 = sub nsw i32 %conv.6, %conv7.6 | |
%20 = tail call i32 @llvm.abs.i32(i32 %sub.6, i1 true) | |
%add.6 = add nsw i32 %20, %add.5 | |
%arrayidx.7 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 7 | |
%21 = load i8, ptr %arrayidx.7, align 1, !tbaa !14 | |
%conv.7 = zext i8 %21 to i32 | |
%arrayidx6.7 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 7 | |
%22 = load i8, ptr %arrayidx6.7, align 1, !tbaa !14 | |
%conv7.7 = zext i8 %22 to i32 | |
%sub.7 = sub nsw i32 %conv.7, %conv7.7 | |
%23 = tail call i32 @llvm.abs.i32(i32 %sub.7, i1 true) | |
%add.7 = add nsw i32 %23, %add.6 | |
%arrayidx.8 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 8 | |
%24 = load i8, ptr %arrayidx.8, align 1, !tbaa !14 | |
%conv.8 = zext i8 %24 to i32 | |
%arrayidx6.8 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 8 | |
%25 = load i8, ptr %arrayidx6.8, align 1, !tbaa !14 | |
%conv7.8 = zext i8 %25 to i32 | |
%sub.8 = sub nsw i32 %conv.8, %conv7.8 | |
%26 = tail call i32 @llvm.abs.i32(i32 %sub.8, i1 true) | |
%add.8 = add nsw i32 %26, %add.7 | |
%arrayidx.9 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 9 | |
%27 = load i8, ptr %arrayidx.9, align 1, !tbaa !14 | |
%conv.9 = zext i8 %27 to i32 | |
%arrayidx6.9 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 9 | |
%28 = load i8, ptr %arrayidx6.9, align 1, !tbaa !14 | |
%conv7.9 = zext i8 %28 to i32 | |
%sub.9 = sub nsw i32 %conv.9, %conv7.9 | |
%29 = tail call i32 @llvm.abs.i32(i32 %sub.9, i1 true) | |
%add.9 = add nsw i32 %29, %add.8 | |
%arrayidx.10 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 10 | |
%30 = load i8, ptr %arrayidx.10, align 1, !tbaa !14 | |
%conv.10 = zext i8 %30 to i32 | |
%arrayidx6.10 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 10 | |
%31 = load i8, ptr %arrayidx6.10, align 1, !tbaa !14 | |
%conv7.10 = zext i8 %31 to i32 | |
%sub.10 = sub nsw i32 %conv.10, %conv7.10 | |
%32 = tail call i32 @llvm.abs.i32(i32 %sub.10, i1 true) | |
%add.10 = add nsw i32 %32, %add.9 | |
%arrayidx.11 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 11 | |
%33 = load i8, ptr %arrayidx.11, align 1, !tbaa !14 | |
%conv.11 = zext i8 %33 to i32 | |
%arrayidx6.11 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 11 | |
%34 = load i8, ptr %arrayidx6.11, align 1, !tbaa !14 | |
%conv7.11 = zext i8 %34 to i32 | |
%sub.11 = sub nsw i32 %conv.11, %conv7.11 | |
%35 = tail call i32 @llvm.abs.i32(i32 %sub.11, i1 true) | |
%add.11 = add nsw i32 %35, %add.10 | |
%arrayidx.12 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 12 | |
%36 = load i8, ptr %arrayidx.12, align 1, !tbaa !14 | |
%conv.12 = zext i8 %36 to i32 | |
%arrayidx6.12 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 12 | |
%37 = load i8, ptr %arrayidx6.12, align 1, !tbaa !14 | |
%conv7.12 = zext i8 %37 to i32 | |
%sub.12 = sub nsw i32 %conv.12, %conv7.12 | |
%38 = tail call i32 @llvm.abs.i32(i32 %sub.12, i1 true) | |
%add.12 = add nsw i32 %38, %add.11 | |
%arrayidx.13 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 13 | |
%39 = load i8, ptr %arrayidx.13, align 1, !tbaa !14 | |
%conv.13 = zext i8 %39 to i32 | |
%arrayidx6.13 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 13 | |
%40 = load i8, ptr %arrayidx6.13, align 1, !tbaa !14 | |
%conv7.13 = zext i8 %40 to i32 | |
%sub.13 = sub nsw i32 %conv.13, %conv7.13 | |
%41 = tail call i32 @llvm.abs.i32(i32 %sub.13, i1 true) | |
%add.13 = add nsw i32 %41, %add.12 | |
%arrayidx.14 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 14 | |
%42 = load i8, ptr %arrayidx.14, align 1, !tbaa !14 | |
%conv.14 = zext i8 %42 to i32 | |
%arrayidx6.14 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 14 | |
%43 = load i8, ptr %arrayidx6.14, align 1, !tbaa !14 | |
%conv7.14 = zext i8 %43 to i32 | |
%sub.14 = sub nsw i32 %conv.14, %conv7.14 | |
%44 = tail call i32 @llvm.abs.i32(i32 %sub.14, i1 true) | |
%add.14 = add nsw i32 %44, %add.13 | |
%arrayidx.15 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 15 | |
%45 = load i8, ptr %arrayidx.15, align 1, !tbaa !14 | |
%conv.15 = zext i8 %45 to i32 | |
%arrayidx6.15 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 15 | |
%46 = load i8, ptr %arrayidx6.15, align 1, !tbaa !14 | |
%conv7.15 = zext i8 %46 to i32 | |
%sub.15 = sub nsw i32 %conv.15, %conv7.15 | |
%47 = tail call i32 @llvm.abs.i32(i32 %sub.15, i1 true) | |
%add.15 = add nsw i32 %47, %add.14 | |
%add.ptr = getelementptr inbounds i8, ptr %pix1.addr.023, i64 %idx.ext | |
%add.ptr9 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 %idx.ext8 | |
%inc11 = add nuw nsw i32 %y.025, 1 | |
%exitcond.not = icmp eq i32 %inc11, 16 | |
br i1 %exitcond.not, label %for.cond.cleanup, label %for.cond1.preheader, !llvm.loop !30 | |
for.cond.cleanup: ; preds = %for.cond1.preheader | |
ret i32 %add.15 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment