Skip to content

Instantly share code, notes, and snippets.

@Enna1
Last active January 21, 2024 07:30
Show Gist options
  • Save Enna1/b2d4245feec090ff793e371d00016afb to your computer and use it in GitHub Desktop.
Save Enna1/b2d4245feec090ff793e371d00016afb to your computer and use it in GitHub Desktop.
x264 vectorization
; *** IR Dump After LoopVectorizePass on get_ref ***
; Function Attrs: nofree norecurse nosync nounwind memory(readwrite, inaccessiblemem: none) uwtable
define internal ptr @get_ref(ptr noundef %dst, ptr nocapture noundef %i_dst_stride, ptr nocapture noundef readonly %src, i32 noundef %i_src_stride, i32 noundef %mvx, i32 noundef %mvy, i32 noundef %i_width, i32 noundef %i_height, ptr nocapture noundef readonly %weight) #0 {
entry:
%and = and i32 %mvy, 3
%shl = shl nuw nsw i32 %and, 2
%and1 = and i32 %mvx, 3
%add = or disjoint i32 %shl, %and1
%shr = ashr i32 %mvy, 2
%mul = mul i32 %shr, %i_src_stride
%shr2 = ashr i32 %mvx, 2
%add3 = add i32 %mul, %shr2
%idxprom = zext nneg i32 %add to i64
%arrayidx = getelementptr inbounds [16 x i8], ptr @hpel_ref0, i64 0, i64 %idxprom
%0 = load i8, ptr %arrayidx, align 1, !tbaa !15
%idxprom4 = zext i8 %0 to i64
%arrayidx5 = getelementptr inbounds ptr, ptr %src, i64 %idxprom4
%1 = load ptr, ptr %arrayidx5, align 8, !tbaa !9
%idx.ext = sext i32 %add3 to i64
%add.ptr = getelementptr i8, ptr %1, i64 %idx.ext
%cmp = icmp eq i32 %and, 3
%mul7 = select i1 %cmp, i32 %i_src_stride, i32 0
%idx.ext8 = sext i32 %mul7 to i64
%add.ptr9 = getelementptr i8, ptr %add.ptr, i64 %idx.ext8
%and10 = and i32 %add, 5
%tobool.not = icmp eq i32 %and10, 0
br i1 %tobool.not, label %if.else, label %if.then
if.then: ; preds = %entry
%arrayidx12 = getelementptr inbounds [16 x i8], ptr @hpel_ref1, i64 0, i64 %idxprom
%2 = load i8, ptr %arrayidx12, align 1, !tbaa !15
%idxprom13 = zext i8 %2 to i64
%arrayidx14 = getelementptr inbounds ptr, ptr %src, i64 %idxprom13
%3 = load ptr, ptr %arrayidx14, align 8, !tbaa !9
%add.ptr16 = getelementptr i8, ptr %3, i64 %idx.ext
%cmp18 = icmp eq i32 %and1, 3
%idx.ext20 = zext i1 %cmp18 to i64
%add.ptr21 = getelementptr i8, ptr %add.ptr16, i64 %idx.ext20
%cmp29.i = icmp sgt i32 %i_height, 0
br i1 %cmp29.i, label %for.cond1.preheader.lr.ph.i, label %pixel_avg.exit
for.cond1.preheader.lr.ph.i: ; preds = %if.then
%4 = load i32, ptr %i_dst_stride, align 4, !tbaa !13
%cmp227.i = icmp sgt i32 %i_width, 0
%idx.ext.i = sext i32 %4 to i64
%idx.ext12.i = sext i32 %i_src_stride to i64
br i1 %cmp227.i, label %for.cond1.preheader.us.preheader.i, label %pixel_avg.exit
for.cond1.preheader.us.preheader.i: ; preds = %for.cond1.preheader.lr.ph.i
%wide.trip.count.i = zext nneg i32 %i_width to i64
%5 = add i32 %i_height, -1
%6 = zext i32 %5 to i64
%7 = mul i64 %idx.ext.i, %6
%8 = add i64 %7, %wide.trip.count.i
%scevgep = getelementptr i8, ptr %dst, i64 %8
%9 = mul i64 %idx.ext12.i, %6
%10 = add i64 %9, %idx.ext8
%11 = add i64 %10, %idx.ext
%12 = add i64 %11, %wide.trip.count.i
%scevgep154 = getelementptr i8, ptr %1, i64 %12
%13 = add i64 %9, %idx.ext
%14 = add i64 %13, %idx.ext20
%15 = add i64 %14, %wide.trip.count.i
%scevgep155 = getelementptr i8, ptr %3, i64 %15
br label %iter.check
iter.check: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us.i, %for.cond1.preheader.us.preheader.i
%y.033.us.i = phi i32 [ %inc17.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ 0, %for.cond1.preheader.us.preheader.i ]
%dst.addr.032.us.i = phi ptr [ %add.ptr.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ %dst, %for.cond1.preheader.us.preheader.i ]
%src1.addr.031.us.i = phi ptr [ %add.ptr13.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ %add.ptr9, %for.cond1.preheader.us.preheader.i ]
%src2.addr.030.us.i = phi ptr [ %add.ptr15.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ %add.ptr21, %for.cond1.preheader.us.preheader.i ]
%min.iters.check = icmp ult i64 %wide.trip.count.i, 8
br i1 %min.iters.check, label %vec.epilog.scalar.ph, label %vector.memcheck
vector.memcheck: ; preds = %iter.check
%bound0 = icmp ult ptr %dst, %scevgep154
%bound1 = icmp ult ptr %add.ptr9, %scevgep
%found.conflict = and i1 %bound0, %bound1
%stride.check = icmp slt i64 %idx.ext.i, 0
%16 = or i1 %found.conflict, %stride.check
%stride.check156 = icmp slt i64 %idx.ext12.i, 0
%17 = or i1 %16, %stride.check156
%bound0157 = icmp ult ptr %dst, %scevgep155
%bound1158 = icmp ult ptr %add.ptr21, %scevgep
%found.conflict159 = and i1 %bound0157, %bound1158
%stride.check160 = icmp slt i64 %idx.ext.i, 0
%18 = or i1 %found.conflict159, %stride.check160
%stride.check161 = icmp slt i64 %idx.ext12.i, 0
%19 = or i1 %18, %stride.check161
%conflict.rdx = or i1 %17, %19
br i1 %conflict.rdx, label %vec.epilog.scalar.ph, label %vector.main.loop.iter.check
vector.main.loop.iter.check: ; preds = %vector.memcheck
%min.iters.check162 = icmp ult i64 %wide.trip.count.i, 32
br i1 %min.iters.check162, label %vec.epilog.ph, label %vector.ph
vector.ph: ; preds = %vector.main.loop.iter.check
%n.mod.vf = urem i64 %wide.trip.count.i, 32
%n.vec = sub i64 %wide.trip.count.i, %n.mod.vf
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%20 = add i64 %index, 0
%21 = add i64 %index, 16
%22 = getelementptr inbounds i8, ptr %src1.addr.031.us.i, i64 %20
%23 = getelementptr inbounds i8, ptr %src1.addr.031.us.i, i64 %21
%24 = getelementptr inbounds i8, ptr %22, i32 0
%25 = getelementptr inbounds i8, ptr %22, i32 16
%wide.load = load <16 x i8>, ptr %24, align 1, !tbaa !15, !alias.scope !144
%wide.load163 = load <16 x i8>, ptr %25, align 1, !tbaa !15, !alias.scope !144
%26 = zext <16 x i8> %wide.load to <16 x i16>
%27 = zext <16 x i8> %wide.load163 to <16 x i16>
%28 = getelementptr inbounds i8, ptr %src2.addr.030.us.i, i64 %20
%29 = getelementptr inbounds i8, ptr %src2.addr.030.us.i, i64 %21
%30 = getelementptr inbounds i8, ptr %28, i32 0
%31 = getelementptr inbounds i8, ptr %28, i32 16
%wide.load164 = load <16 x i8>, ptr %30, align 1, !tbaa !15, !alias.scope !147
%wide.load165 = load <16 x i8>, ptr %31, align 1, !tbaa !15, !alias.scope !147
%32 = zext <16 x i8> %wide.load164 to <16 x i16>
%33 = zext <16 x i8> %wide.load165 to <16 x i16>
%34 = add nuw nsw <16 x i16> %26, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%35 = add nuw nsw <16 x i16> %27, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%36 = add nuw nsw <16 x i16> %34, %32
%37 = add nuw nsw <16 x i16> %35, %33
%38 = lshr <16 x i16> %36, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%39 = lshr <16 x i16> %37, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%40 = trunc <16 x i16> %38 to <16 x i8>
%41 = trunc <16 x i16> %39 to <16 x i8>
%42 = getelementptr inbounds i8, ptr %dst.addr.032.us.i, i64 %20
%43 = getelementptr inbounds i8, ptr %dst.addr.032.us.i, i64 %21
%44 = getelementptr inbounds i8, ptr %42, i32 0
%45 = getelementptr inbounds i8, ptr %42, i32 16
store <16 x i8> %40, ptr %44, align 1, !tbaa !15, !alias.scope !149, !noalias !151
store <16 x i8> %41, ptr %45, align 1, !tbaa !15, !alias.scope !149, !noalias !151
%index.next = add nuw i64 %index, 32
%46 = icmp eq i64 %index.next, %n.vec
br i1 %46, label %middle.block, label %vector.body, !llvm.loop !152
middle.block: ; preds = %vector.body
%cmp.n = icmp eq i64 %wide.trip.count.i, %n.vec
br i1 %cmp.n, label %for.cond1.for.cond.cleanup3_crit_edge.us.i, label %vec.epilog.iter.check
vec.epilog.iter.check: ; preds = %middle.block
%n.vec.remaining = sub i64 %wide.trip.count.i, %n.vec
%min.epilog.iters.check = icmp ult i64 %n.vec.remaining, 8
br i1 %min.epilog.iters.check, label %vec.epilog.scalar.ph, label %vec.epilog.ph
vec.epilog.ph: ; preds = %vector.main.loop.iter.check, %vec.epilog.iter.check
%vec.epilog.resume.val = phi i64 [ %n.vec, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ]
%n.mod.vf166 = urem i64 %wide.trip.count.i, 8
%n.vec167 = sub i64 %wide.trip.count.i, %n.mod.vf166
br label %vec.epilog.vector.body
vec.epilog.vector.body: ; preds = %vec.epilog.vector.body, %vec.epilog.ph
%index169 = phi i64 [ %vec.epilog.resume.val, %vec.epilog.ph ], [ %index.next172, %vec.epilog.vector.body ]
%47 = add i64 %index169, 0
%48 = getelementptr inbounds i8, ptr %src1.addr.031.us.i, i64 %47
%49 = getelementptr inbounds i8, ptr %48, i32 0
%wide.load170 = load <8 x i8>, ptr %49, align 1, !tbaa !15, !alias.scope !153
%50 = zext <8 x i8> %wide.load170 to <8 x i16>
%51 = getelementptr inbounds i8, ptr %src2.addr.030.us.i, i64 %47
%52 = getelementptr inbounds i8, ptr %51, i32 0
%wide.load171 = load <8 x i8>, ptr %52, align 1, !tbaa !15, !alias.scope !156
%53 = zext <8 x i8> %wide.load171 to <8 x i16>
%54 = add nuw nsw <8 x i16> %50, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%55 = add nuw nsw <8 x i16> %54, %53
%56 = lshr <8 x i16> %55, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%57 = trunc <8 x i16> %56 to <8 x i8>
%58 = getelementptr inbounds i8, ptr %dst.addr.032.us.i, i64 %47
%59 = getelementptr inbounds i8, ptr %58, i32 0
store <8 x i8> %57, ptr %59, align 1, !tbaa !15, !alias.scope !158, !noalias !160
%index.next172 = add nuw i64 %index169, 8
%60 = icmp eq i64 %index.next172, %n.vec167
br i1 %60, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !llvm.loop !161
vec.epilog.middle.block: ; preds = %vec.epilog.vector.body
%cmp.n168 = icmp eq i64 %wide.trip.count.i, %n.vec167
br i1 %cmp.n168, label %for.cond1.for.cond.cleanup3_crit_edge.us.i, label %vec.epilog.scalar.ph
vec.epilog.scalar.ph: ; preds = %vector.memcheck, %iter.check, %vec.epilog.iter.check, %vec.epilog.middle.block
%bc.resume.val = phi i64 [ %n.vec167, %vec.epilog.middle.block ], [ %n.vec, %vec.epilog.iter.check ], [ 0, %vector.memcheck ], [ 0, %iter.check ]
br label %for.body4.us.i
for.body4.us.i: ; preds = %for.body4.us.i, %vec.epilog.scalar.ph
%indvars.iv.i = phi i64 [ %bc.resume.val, %vec.epilog.scalar.ph ], [ %indvars.iv.next.i, %for.body4.us.i ]
%arrayidx.us.i = getelementptr inbounds i8, ptr %src1.addr.031.us.i, i64 %indvars.iv.i
%61 = load i8, ptr %arrayidx.us.i, align 1, !tbaa !15
%conv.us.i = zext i8 %61 to i16
%arrayidx6.us.i = getelementptr inbounds i8, ptr %src2.addr.030.us.i, i64 %indvars.iv.i
%62 = load i8, ptr %arrayidx6.us.i, align 1, !tbaa !15
%conv7.us.i = zext i8 %62 to i16
%add.us.i = add nuw nsw i16 %conv.us.i, 1
%add8.us.i = add nuw nsw i16 %add.us.i, %conv7.us.i
%shr.us.i = lshr i16 %add8.us.i, 1
%conv9.us.i = trunc i16 %shr.us.i to i8
%arrayidx11.us.i = getelementptr inbounds i8, ptr %dst.addr.032.us.i, i64 %indvars.iv.i
store i8 %conv9.us.i, ptr %arrayidx11.us.i, align 1, !tbaa !15
%indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1
%exitcond.not.i = icmp eq i64 %indvars.iv.next.i, %wide.trip.count.i
br i1 %exitcond.not.i, label %for.cond1.for.cond.cleanup3_crit_edge.us.i, label %for.body4.us.i, !llvm.loop !162
for.cond1.for.cond.cleanup3_crit_edge.us.i: ; preds = %vec.epilog.middle.block, %middle.block, %for.body4.us.i
%add.ptr.us.i = getelementptr inbounds i8, ptr %dst.addr.032.us.i, i64 %idx.ext.i
%add.ptr13.us.i = getelementptr inbounds i8, ptr %src1.addr.031.us.i, i64 %idx.ext12.i
%add.ptr15.us.i = getelementptr inbounds i8, ptr %src2.addr.030.us.i, i64 %idx.ext12.i
%inc17.us.i = add nuw nsw i32 %y.033.us.i, 1
%exitcond36.not.i = icmp eq i32 %inc17.us.i, %i_height
br i1 %exitcond36.not.i, label %pixel_avg.exit.loopexit, label %iter.check, !llvm.loop !100
pixel_avg.exit.loopexit: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us.i
br label %pixel_avg.exit
pixel_avg.exit: ; preds = %pixel_avg.exit.loopexit, %if.then, %for.cond1.preheader.lr.ph.i
%weightfn = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 5
%63 = load ptr, ptr %weightfn, align 16, !tbaa !101
%tobool22.not = icmp eq ptr %63, null
br i1 %tobool22.not, label %cleanup, label %if.then23
if.then23: ; preds = %pixel_avg.exit
%64 = load i32, ptr %i_dst_stride, align 4, !tbaa !13
%i_denom.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 2
%65 = load i32, ptr %i_denom.i, align 16, !tbaa !103
%cmp.i = icmp sgt i32 %65, 0
br i1 %cmp.i, label %for.cond.preheader.i, label %for.cond17.preheader.i
for.cond17.preheader.i: ; preds = %if.then23
br i1 %cmp29.i, label %for.cond23.preheader.lr.ph.i, label %cleanup
for.cond23.preheader.lr.ph.i: ; preds = %for.cond17.preheader.i
%cmp2477.i = icmp sgt i32 %i_width, 0
%i_scale31.i = getelementptr %struct.x264_weight_t, ptr %weight, i64 0, i32 3
%i_offset33.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4
%idx.ext43.i = sext i32 %64 to i64
br i1 %cmp2477.i, label %for.cond23.preheader.us.preheader.i, label %cleanup
for.cond23.preheader.us.preheader.i: ; preds = %for.cond23.preheader.lr.ph.i
%wide.trip.count.i61 = zext nneg i32 %i_width to i64
%66 = add i32 %i_height, -1
%67 = zext i32 %66 to i64
%68 = mul i64 %idx.ext43.i, %67
%69 = add i64 %68, %wide.trip.count.i61
%scevgep174 = getelementptr i8, ptr %dst, i64 %69
%scevgep175 = getelementptr i8, ptr %weight, i64 44
br label %for.cond23.preheader.us.i
for.cond23.preheader.us.i: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i, %for.cond23.preheader.us.preheader.i
%y16.082.us.i = phi i32 [ %inc42.us.i, %for.cond23.for.cond.cleanup26_crit_edge.us.i ], [ 0, %for.cond23.preheader.us.preheader.i ]
%dst.addr.181.us.i = phi ptr [ %add.ptr44.us.i, %for.cond23.for.cond.cleanup26_crit_edge.us.i ], [ %dst, %for.cond23.preheader.us.preheader.i ]
%min.iters.check181 = icmp ult i64 %wide.trip.count.i61, 8
br i1 %min.iters.check181, label %scalar.ph, label %vector.memcheck173
vector.memcheck173: ; preds = %for.cond23.preheader.us.i
%bound0176 = icmp ult ptr %dst, %scevgep175
%bound1177 = icmp ult ptr %i_scale31.i, %scevgep174
%found.conflict178 = and i1 %bound0176, %bound1177
%stride.check179 = icmp slt i64 %idx.ext43.i, 0
%70 = or i1 %found.conflict178, %stride.check179
br i1 %70, label %scalar.ph, label %vector.ph182
vector.ph182: ; preds = %vector.memcheck173
%n.mod.vf183 = urem i64 %wide.trip.count.i61, 4
%n.vec184 = sub i64 %wide.trip.count.i61, %n.mod.vf183
br label %vector.body187
vector.body187: ; preds = %vector.body187, %vector.ph182
%index188 = phi i64 [ 0, %vector.ph182 ], [ %index.next192, %vector.body187 ]
%71 = add i64 %index188, 0
%72 = getelementptr inbounds i8, ptr %dst.addr.181.us.i, i64 %71
%73 = getelementptr inbounds i8, ptr %72, i32 0
%wide.load189 = load <4 x i8>, ptr %73, align 1, !tbaa !15, !alias.scope !163, !noalias !166
%74 = zext <4 x i8> %wide.load189 to <4 x i32>
%75 = load i32, ptr %i_scale31.i, align 4, !tbaa !104, !alias.scope !166
%broadcast.splatinsert = insertelement <4 x i32> poison, i32 %75, i64 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
%76 = mul nsw <4 x i32> %broadcast.splat, %74
%77 = load i32, ptr %i_offset33.i, align 8, !tbaa !108, !alias.scope !166
%broadcast.splatinsert190 = insertelement <4 x i32> poison, i32 %77, i64 0
%broadcast.splat191 = shufflevector <4 x i32> %broadcast.splatinsert190, <4 x i32> poison, <4 x i32> zeroinitializer
%78 = add nsw <4 x i32> %76, %broadcast.splat191
%79 = icmp ult <4 x i32> %78, <i32 256, i32 256, i32 256, i32 256>
%80 = icmp sgt <4 x i32> %78, zeroinitializer
%81 = sext <4 x i1> %80 to <4 x i32>
%82 = select <4 x i1> %79, <4 x i32> %78, <4 x i32> %81
%83 = trunc <4 x i32> %82 to <4 x i8>
store <4 x i8> %83, ptr %73, align 1, !tbaa !15, !alias.scope !163, !noalias !166
%index.next192 = add nuw i64 %index188, 4
%84 = icmp eq i64 %index.next192, %n.vec184
br i1 %84, label %middle.block180, label %vector.body187, !llvm.loop !168
middle.block180: ; preds = %vector.body187
%cmp.n186 = icmp eq i64 %wide.trip.count.i61, %n.vec184
br i1 %cmp.n186, label %for.cond23.for.cond.cleanup26_crit_edge.us.i, label %scalar.ph
scalar.ph: ; preds = %vector.memcheck173, %for.cond23.preheader.us.i, %middle.block180
%bc.resume.val185 = phi i64 [ %n.vec184, %middle.block180 ], [ 0, %for.cond23.preheader.us.i ], [ 0, %vector.memcheck173 ]
br label %for.body27.us.i
for.body27.us.i: ; preds = %for.body27.us.i, %scalar.ph
%indvars.iv.i62 = phi i64 [ %bc.resume.val185, %scalar.ph ], [ %indvars.iv.next.i63, %for.body27.us.i ]
%arrayidx29.us.i = getelementptr inbounds i8, ptr %dst.addr.181.us.i, i64 %indvars.iv.i62
%85 = load i8, ptr %arrayidx29.us.i, align 1, !tbaa !15
%conv30.us.i = zext i8 %85 to i32
%86 = load i32, ptr %i_scale31.i, align 4, !tbaa !104
%mul32.us.i = mul nsw i32 %86, %conv30.us.i
%87 = load i32, ptr %i_offset33.i, align 8, !tbaa !108
%add34.us.i = add nsw i32 %mul32.us.i, %87
%tobool.not.i72.us.i = icmp ult i32 %add34.us.i, 256
%88 = icmp sgt i32 %add34.us.i, 0
%shr.i73.us.i = sext i1 %88 to i32
%cond.i74.us.i = select i1 %tobool.not.i72.us.i, i32 %add34.us.i, i32 %shr.i73.us.i
%conv.i75.us.i = trunc i32 %cond.i74.us.i to i8
store i8 %conv.i75.us.i, ptr %arrayidx29.us.i, align 1, !tbaa !15
%indvars.iv.next.i63 = add nuw nsw i64 %indvars.iv.i62, 1
%exitcond.not.i64 = icmp eq i64 %indvars.iv.next.i63, %wide.trip.count.i61
br i1 %exitcond.not.i64, label %for.cond23.for.cond.cleanup26_crit_edge.us.i, label %for.body27.us.i, !llvm.loop !169
for.cond23.for.cond.cleanup26_crit_edge.us.i: ; preds = %middle.block180, %for.body27.us.i
%inc42.us.i = add nuw nsw i32 %y16.082.us.i, 1
%add.ptr44.us.i = getelementptr i8, ptr %dst.addr.181.us.i, i64 %idx.ext43.i
%exitcond93.not.i = icmp eq i32 %inc42.us.i, %i_height
br i1 %exitcond93.not.i, label %cleanup.loopexit153, label %for.cond23.preheader.us.i, !llvm.loop !113
for.cond.preheader.i: ; preds = %if.then23
br i1 %cmp29.i, label %for.cond2.preheader.lr.ph.i, label %cleanup
for.cond2.preheader.lr.ph.i: ; preds = %for.cond.preheader.i
%cmp383.i = icmp sgt i32 %i_width, 0
%i_scale.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 3
%i_offset.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4
%idx.ext.i65 = sext i32 %64 to i64
br i1 %cmp383.i, label %for.cond2.preheader.us.preheader.i, label %cleanup
for.cond2.preheader.us.preheader.i: ; preds = %for.cond2.preheader.lr.ph.i
%wide.trip.count97.i = zext nneg i32 %i_width to i64
%89 = add i32 %i_height, -1
%90 = zext i32 %89 to i64
%91 = mul i64 %idx.ext.i65, %90
%92 = add i64 %91, %wide.trip.count97.i
%scevgep194 = getelementptr i8, ptr %dst, i64 %92
%scevgep195 = getelementptr i8, ptr %weight, i64 44
br label %for.cond2.preheader.us.i
for.cond2.preheader.us.i: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i, %for.cond2.preheader.us.preheader.i
%y.088.us.i = phi i32 [ %inc12.us.i, %for.cond2.for.cond.cleanup4_crit_edge.us.i ], [ 0, %for.cond2.preheader.us.preheader.i ]
%dst.addr.087.us.i = phi ptr [ %add.ptr.us.i71, %for.cond2.for.cond.cleanup4_crit_edge.us.i ], [ %dst, %for.cond2.preheader.us.preheader.i ]
%min.iters.check202 = icmp ult i64 %wide.trip.count97.i, 4
br i1 %min.iters.check202, label %scalar.ph201, label %vector.memcheck193
vector.memcheck193: ; preds = %for.cond2.preheader.us.i
%bound0196 = icmp ult ptr %dst, %scevgep195
%bound1197 = icmp ult ptr %i_denom.i, %scevgep194
%found.conflict198 = and i1 %bound0196, %bound1197
%stride.check199 = icmp slt i64 %idx.ext.i65, 0
%93 = or i1 %found.conflict198, %stride.check199
br i1 %93, label %scalar.ph201, label %vector.ph203
vector.ph203: ; preds = %vector.memcheck193
%n.mod.vf204 = urem i64 %wide.trip.count97.i, 4
%n.vec205 = sub i64 %wide.trip.count97.i, %n.mod.vf204
br label %vector.body208
vector.body208: ; preds = %vector.body208, %vector.ph203
%index209 = phi i64 [ 0, %vector.ph203 ], [ %index.next217, %vector.body208 ]
%94 = add i64 %index209, 0
%95 = getelementptr inbounds i8, ptr %dst.addr.087.us.i, i64 %94
%96 = getelementptr inbounds i8, ptr %95, i32 0
%wide.load210 = load <4 x i8>, ptr %96, align 1, !tbaa !15, !alias.scope !170, !noalias !173
%97 = zext <4 x i8> %wide.load210 to <4 x i32>
%98 = load i32, ptr %i_scale.i, align 4, !tbaa !104, !alias.scope !173
%broadcast.splatinsert211 = insertelement <4 x i32> poison, i32 %98, i64 0
%broadcast.splat212 = shufflevector <4 x i32> %broadcast.splatinsert211, <4 x i32> poison, <4 x i32> zeroinitializer
%99 = mul nsw <4 x i32> %broadcast.splat212, %97
%100 = load i32, ptr %i_denom.i, align 16, !tbaa !103, !alias.scope !173
%broadcast.splatinsert213 = insertelement <4 x i32> poison, i32 %100, i64 0
%broadcast.splat214 = shufflevector <4 x i32> %broadcast.splatinsert213, <4 x i32> poison, <4 x i32> zeroinitializer
%101 = add nsw <4 x i32> %broadcast.splat214, <i32 -1, i32 -1, i32 -1, i32 -1>
%102 = shl nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %101
%103 = add nsw <4 x i32> %102, %99
%104 = ashr <4 x i32> %103, %broadcast.splat214
%105 = load i32, ptr %i_offset.i, align 8, !tbaa !108, !alias.scope !173
%broadcast.splatinsert215 = insertelement <4 x i32> poison, i32 %105, i64 0
%broadcast.splat216 = shufflevector <4 x i32> %broadcast.splatinsert215, <4 x i32> poison, <4 x i32> zeroinitializer
%106 = add nsw <4 x i32> %104, %broadcast.splat216
%107 = icmp ult <4 x i32> %106, <i32 256, i32 256, i32 256, i32 256>
%108 = icmp sgt <4 x i32> %106, zeroinitializer
%109 = sext <4 x i1> %108 to <4 x i32>
%110 = select <4 x i1> %107, <4 x i32> %106, <4 x i32> %109
%111 = trunc <4 x i32> %110 to <4 x i8>
store <4 x i8> %111, ptr %96, align 1, !tbaa !15, !alias.scope !170, !noalias !173
%index.next217 = add nuw i64 %index209, 4
%112 = icmp eq i64 %index.next217, %n.vec205
br i1 %112, label %middle.block200, label %vector.body208, !llvm.loop !175
middle.block200: ; preds = %vector.body208
%cmp.n207 = icmp eq i64 %wide.trip.count97.i, %n.vec205
br i1 %cmp.n207, label %for.cond2.for.cond.cleanup4_crit_edge.us.i, label %scalar.ph201
scalar.ph201: ; preds = %vector.memcheck193, %for.cond2.preheader.us.i, %middle.block200
%bc.resume.val206 = phi i64 [ %n.vec205, %middle.block200 ], [ 0, %for.cond2.preheader.us.i ], [ 0, %vector.memcheck193 ]
br label %for.body5.us.i
for.body5.us.i: ; preds = %for.body5.us.i, %scalar.ph201
%indvars.iv94.i = phi i64 [ %bc.resume.val206, %scalar.ph201 ], [ %indvars.iv.next95.i, %for.body5.us.i ]
%arrayidx.us.i66 = getelementptr inbounds i8, ptr %dst.addr.087.us.i, i64 %indvars.iv94.i
%113 = load i8, ptr %arrayidx.us.i66, align 1, !tbaa !15
%conv.us.i67 = zext i8 %113 to i32
%114 = load i32, ptr %i_scale.i, align 4, !tbaa !104
%mul.us.i = mul nsw i32 %114, %conv.us.i67
%115 = load i32, ptr %i_denom.i, align 16, !tbaa !103
%sub.us.i = add nsw i32 %115, -1
%shl.us.i = shl nuw i32 1, %sub.us.i
%add.us.i68 = add nsw i32 %shl.us.i, %mul.us.i
%shr.us.i69 = ashr i32 %add.us.i68, %115
%116 = load i32, ptr %i_offset.i, align 8, !tbaa !108
%add8.us.i70 = add nsw i32 %shr.us.i69, %116
%tobool.not.i.us.i = icmp ult i32 %add8.us.i70, 256
%117 = icmp sgt i32 %add8.us.i70, 0
%shr.i.us.i = sext i1 %117 to i32
%cond.i.us.i = select i1 %tobool.not.i.us.i, i32 %add8.us.i70, i32 %shr.i.us.i
%conv.i.us.i = trunc i32 %cond.i.us.i to i8
store i8 %conv.i.us.i, ptr %arrayidx.us.i66, align 1, !tbaa !15
%indvars.iv.next95.i = add nuw nsw i64 %indvars.iv94.i, 1
%exitcond98.not.i = icmp eq i64 %indvars.iv.next95.i, %wide.trip.count97.i
br i1 %exitcond98.not.i, label %for.cond2.for.cond.cleanup4_crit_edge.us.i, label %for.body5.us.i, !llvm.loop !176
for.cond2.for.cond.cleanup4_crit_edge.us.i: ; preds = %middle.block200, %for.body5.us.i
%inc12.us.i = add nuw nsw i32 %y.088.us.i, 1
%add.ptr.us.i71 = getelementptr i8, ptr %dst.addr.087.us.i, i64 %idx.ext.i65
%exitcond99.not.i = icmp eq i32 %inc12.us.i, %i_height
br i1 %exitcond99.not.i, label %cleanup.loopexit152, label %for.cond2.preheader.us.i, !llvm.loop !121
if.else: ; preds = %entry
%weightfn24 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 5
%118 = load ptr, ptr %weightfn24, align 16, !tbaa !101
%tobool25.not = icmp eq ptr %118, null
br i1 %tobool25.not, label %if.else27, label %if.then26
if.then26: ; preds = %if.else
%119 = load i32, ptr %i_dst_stride, align 4, !tbaa !13
%i_denom.i72 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 2
%120 = load i32, ptr %i_denom.i72, align 16, !tbaa !103
%cmp.i73 = icmp sgt i32 %120, 0
%cmp185.i74 = icmp sgt i32 %i_height, 0
br i1 %cmp.i73, label %for.cond.preheader.i106, label %for.cond17.preheader.i75
for.cond17.preheader.i75: ; preds = %if.then26
br i1 %cmp185.i74, label %for.cond23.preheader.lr.ph.i76, label %cleanup
for.cond23.preheader.lr.ph.i76: ; preds = %for.cond17.preheader.i75
%cmp2477.i77 = icmp sgt i32 %i_width, 0
%i_scale31.i78 = getelementptr %struct.x264_weight_t, ptr %weight, i64 0, i32 3
%i_offset33.i79 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4
%idx.ext43.i80 = sext i32 %119 to i64
%idx.ext45.i81 = sext i32 %i_src_stride to i64
br i1 %cmp2477.i77, label %for.cond23.preheader.us.preheader.i82, label %cleanup
for.cond23.preheader.us.preheader.i82: ; preds = %for.cond23.preheader.lr.ph.i76
%wide.trip.count.i83 = zext nneg i32 %i_width to i64
%121 = add i32 %i_height, -1
%122 = zext i32 %121 to i64
%123 = mul i64 %idx.ext43.i80, %122
%124 = add i64 %123, %wide.trip.count.i83
%scevgep219 = getelementptr i8, ptr %dst, i64 %124
%125 = mul i64 %idx.ext45.i81, %122
%126 = add i64 %125, %idx.ext8
%127 = add i64 %126, %idx.ext
%128 = add i64 %127, %wide.trip.count.i83
%scevgep220 = getelementptr i8, ptr %1, i64 %128
%scevgep221 = getelementptr i8, ptr %weight, i64 44
br label %for.cond23.preheader.us.i84
for.cond23.preheader.us.i84: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i101, %for.cond23.preheader.us.preheader.i82
%y16.082.us.i85 = phi i32 [ %inc42.us.i102, %for.cond23.for.cond.cleanup26_crit_edge.us.i101 ], [ 0, %for.cond23.preheader.us.preheader.i82 ]
%dst.addr.181.us.i86 = phi ptr [ %add.ptr44.us.i103, %for.cond23.for.cond.cleanup26_crit_edge.us.i101 ], [ %dst, %for.cond23.preheader.us.preheader.i82 ]
%src.addr.180.us.i87 = phi ptr [ %add.ptr46.us.i104, %for.cond23.for.cond.cleanup26_crit_edge.us.i101 ], [ %add.ptr9, %for.cond23.preheader.us.preheader.i82 ]
%min.iters.check234 = icmp ult i64 %wide.trip.count.i83, 12
br i1 %min.iters.check234, label %scalar.ph233, label %vector.memcheck218
vector.memcheck218: ; preds = %for.cond23.preheader.us.i84
%bound0222 = icmp ult ptr %dst, %scevgep220
%bound1223 = icmp ult ptr %add.ptr9, %scevgep219
%found.conflict224 = and i1 %bound0222, %bound1223
%stride.check225 = icmp slt i64 %idx.ext43.i80, 0
%129 = or i1 %found.conflict224, %stride.check225
%stride.check226 = icmp slt i64 %idx.ext45.i81, 0
%130 = or i1 %129, %stride.check226
%bound0227 = icmp ult ptr %dst, %scevgep221
%bound1228 = icmp ult ptr %i_scale31.i78, %scevgep219
%found.conflict229 = and i1 %bound0227, %bound1228
%stride.check230 = icmp slt i64 %idx.ext43.i80, 0
%131 = or i1 %found.conflict229, %stride.check230
%conflict.rdx231 = or i1 %130, %131
br i1 %conflict.rdx231, label %scalar.ph233, label %vector.ph235
vector.ph235: ; preds = %vector.memcheck218
%n.mod.vf236 = urem i64 %wide.trip.count.i83, 4
%n.vec237 = sub i64 %wide.trip.count.i83, %n.mod.vf236
br label %vector.body240
vector.body240: ; preds = %vector.body240, %vector.ph235
%index241 = phi i64 [ 0, %vector.ph235 ], [ %index.next247, %vector.body240 ]
%132 = add i64 %index241, 0
%133 = getelementptr inbounds i8, ptr %src.addr.180.us.i87, i64 %132
%134 = getelementptr inbounds i8, ptr %133, i32 0
%wide.load242 = load <4 x i8>, ptr %134, align 1, !tbaa !15, !alias.scope !177
%135 = zext <4 x i8> %wide.load242 to <4 x i32>
%136 = load i32, ptr %i_scale31.i78, align 4, !tbaa !104, !alias.scope !180
%broadcast.splatinsert243 = insertelement <4 x i32> poison, i32 %136, i64 0
%broadcast.splat244 = shufflevector <4 x i32> %broadcast.splatinsert243, <4 x i32> poison, <4 x i32> zeroinitializer
%137 = mul nsw <4 x i32> %broadcast.splat244, %135
%138 = load i32, ptr %i_offset33.i79, align 8, !tbaa !108, !alias.scope !180
%broadcast.splatinsert245 = insertelement <4 x i32> poison, i32 %138, i64 0
%broadcast.splat246 = shufflevector <4 x i32> %broadcast.splatinsert245, <4 x i32> poison, <4 x i32> zeroinitializer
%139 = add nsw <4 x i32> %137, %broadcast.splat246
%140 = icmp ult <4 x i32> %139, <i32 256, i32 256, i32 256, i32 256>
%141 = icmp sgt <4 x i32> %139, zeroinitializer
%142 = sext <4 x i1> %141 to <4 x i32>
%143 = select <4 x i1> %140, <4 x i32> %139, <4 x i32> %142
%144 = trunc <4 x i32> %143 to <4 x i8>
%145 = getelementptr inbounds i8, ptr %dst.addr.181.us.i86, i64 %132
%146 = getelementptr inbounds i8, ptr %145, i32 0
store <4 x i8> %144, ptr %146, align 1, !tbaa !15, !alias.scope !182, !noalias !184
%index.next247 = add nuw i64 %index241, 4
%147 = icmp eq i64 %index.next247, %n.vec237
br i1 %147, label %middle.block232, label %vector.body240, !llvm.loop !185
middle.block232: ; preds = %vector.body240
%cmp.n239 = icmp eq i64 %wide.trip.count.i83, %n.vec237
br i1 %cmp.n239, label %for.cond23.for.cond.cleanup26_crit_edge.us.i101, label %scalar.ph233
scalar.ph233: ; preds = %vector.memcheck218, %for.cond23.preheader.us.i84, %middle.block232
%bc.resume.val238 = phi i64 [ %n.vec237, %middle.block232 ], [ 0, %for.cond23.preheader.us.i84 ], [ 0, %vector.memcheck218 ]
br label %for.body27.us.i88
for.body27.us.i88: ; preds = %for.body27.us.i88, %scalar.ph233
%indvars.iv.i89 = phi i64 [ %bc.resume.val238, %scalar.ph233 ], [ %indvars.iv.next.i99, %for.body27.us.i88 ]
%arrayidx29.us.i90 = getelementptr inbounds i8, ptr %src.addr.180.us.i87, i64 %indvars.iv.i89
%148 = load i8, ptr %arrayidx29.us.i90, align 1, !tbaa !15
%conv30.us.i91 = zext i8 %148 to i32
%149 = load i32, ptr %i_scale31.i78, align 4, !tbaa !104
%mul32.us.i92 = mul nsw i32 %149, %conv30.us.i91
%150 = load i32, ptr %i_offset33.i79, align 8, !tbaa !108
%add34.us.i93 = add nsw i32 %mul32.us.i92, %150
%tobool.not.i72.us.i94 = icmp ult i32 %add34.us.i93, 256
%151 = icmp sgt i32 %add34.us.i93, 0
%shr.i73.us.i95 = sext i1 %151 to i32
%cond.i74.us.i96 = select i1 %tobool.not.i72.us.i94, i32 %add34.us.i93, i32 %shr.i73.us.i95
%conv.i75.us.i97 = trunc i32 %cond.i74.us.i96 to i8
%arrayidx37.us.i98 = getelementptr inbounds i8, ptr %dst.addr.181.us.i86, i64 %indvars.iv.i89
store i8 %conv.i75.us.i97, ptr %arrayidx37.us.i98, align 1, !tbaa !15
%indvars.iv.next.i99 = add nuw nsw i64 %indvars.iv.i89, 1
%exitcond.not.i100 = icmp eq i64 %indvars.iv.next.i99, %wide.trip.count.i83
br i1 %exitcond.not.i100, label %for.cond23.for.cond.cleanup26_crit_edge.us.i101, label %for.body27.us.i88, !llvm.loop !186
for.cond23.for.cond.cleanup26_crit_edge.us.i101: ; preds = %middle.block232, %for.body27.us.i88
%inc42.us.i102 = add nuw nsw i32 %y16.082.us.i85, 1
%add.ptr44.us.i103 = getelementptr inbounds i8, ptr %dst.addr.181.us.i86, i64 %idx.ext43.i80
%add.ptr46.us.i104 = getelementptr inbounds i8, ptr %src.addr.180.us.i87, i64 %idx.ext45.i81
%exitcond93.not.i105 = icmp eq i32 %inc42.us.i102, %i_height
br i1 %exitcond93.not.i105, label %cleanup.loopexit151, label %for.cond23.preheader.us.i84, !llvm.loop !113
for.cond.preheader.i106: ; preds = %if.then26
br i1 %cmp185.i74, label %for.cond2.preheader.lr.ph.i107, label %cleanup
for.cond2.preheader.lr.ph.i107: ; preds = %for.cond.preheader.i106
%cmp383.i108 = icmp sgt i32 %i_width, 0
%i_scale.i109 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 3
%i_offset.i110 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4
%idx.ext.i111 = sext i32 %119 to i64
%idx.ext13.i112 = sext i32 %i_src_stride to i64
br i1 %cmp383.i108, label %for.cond2.preheader.us.preheader.i113, label %cleanup
for.cond2.preheader.us.preheader.i113: ; preds = %for.cond2.preheader.lr.ph.i107
%wide.trip.count97.i114 = zext nneg i32 %i_width to i64
%152 = add i32 %i_height, -1
%153 = zext i32 %152 to i64
%154 = mul i64 %idx.ext.i111, %153
%155 = add i64 %154, %wide.trip.count97.i114
%scevgep249 = getelementptr i8, ptr %dst, i64 %155
%156 = mul i64 %idx.ext13.i112, %153
%157 = add i64 %156, %idx.ext8
%158 = add i64 %157, %idx.ext
%159 = add i64 %158, %wide.trip.count97.i114
%scevgep250 = getelementptr i8, ptr %1, i64 %159
%scevgep251 = getelementptr i8, ptr %weight, i64 44
br label %for.cond2.preheader.us.i115
for.cond2.preheader.us.i115: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i136, %for.cond2.preheader.us.preheader.i113
%y.088.us.i116 = phi i32 [ %inc12.us.i137, %for.cond2.for.cond.cleanup4_crit_edge.us.i136 ], [ 0, %for.cond2.preheader.us.preheader.i113 ]
%dst.addr.087.us.i117 = phi ptr [ %add.ptr.us.i138, %for.cond2.for.cond.cleanup4_crit_edge.us.i136 ], [ %dst, %for.cond2.preheader.us.preheader.i113 ]
%src.addr.086.us.i118 = phi ptr [ %add.ptr14.us.i139, %for.cond2.for.cond.cleanup4_crit_edge.us.i136 ], [ %add.ptr9, %for.cond2.preheader.us.preheader.i113 ]
%min.iters.check264 = icmp ult i64 %wide.trip.count97.i114, 8
br i1 %min.iters.check264, label %scalar.ph263, label %vector.memcheck248
vector.memcheck248: ; preds = %for.cond2.preheader.us.i115
%bound0252 = icmp ult ptr %dst, %scevgep250
%bound1253 = icmp ult ptr %add.ptr9, %scevgep249
%found.conflict254 = and i1 %bound0252, %bound1253
%stride.check255 = icmp slt i64 %idx.ext.i111, 0
%160 = or i1 %found.conflict254, %stride.check255
%stride.check256 = icmp slt i64 %idx.ext13.i112, 0
%161 = or i1 %160, %stride.check256
%bound0257 = icmp ult ptr %dst, %scevgep251
%bound1258 = icmp ult ptr %i_denom.i72, %scevgep249
%found.conflict259 = and i1 %bound0257, %bound1258
%stride.check260 = icmp slt i64 %idx.ext.i111, 0
%162 = or i1 %found.conflict259, %stride.check260
%conflict.rdx261 = or i1 %161, %162
br i1 %conflict.rdx261, label %scalar.ph263, label %vector.ph265
vector.ph265: ; preds = %vector.memcheck248
%n.mod.vf266 = urem i64 %wide.trip.count97.i114, 4
%n.vec267 = sub i64 %wide.trip.count97.i114, %n.mod.vf266
br label %vector.body270
vector.body270: ; preds = %vector.body270, %vector.ph265
%index271 = phi i64 [ 0, %vector.ph265 ], [ %index.next279, %vector.body270 ]
%163 = add i64 %index271, 0
%164 = getelementptr inbounds i8, ptr %src.addr.086.us.i118, i64 %163
%165 = getelementptr inbounds i8, ptr %164, i32 0
%wide.load272 = load <4 x i8>, ptr %165, align 1, !tbaa !15, !alias.scope !187
%166 = zext <4 x i8> %wide.load272 to <4 x i32>
%167 = load i32, ptr %i_scale.i109, align 4, !tbaa !104, !alias.scope !190
%broadcast.splatinsert273 = insertelement <4 x i32> poison, i32 %167, i64 0
%broadcast.splat274 = shufflevector <4 x i32> %broadcast.splatinsert273, <4 x i32> poison, <4 x i32> zeroinitializer
%168 = mul nsw <4 x i32> %broadcast.splat274, %166
%169 = load i32, ptr %i_denom.i72, align 16, !tbaa !103, !alias.scope !190
%broadcast.splatinsert275 = insertelement <4 x i32> poison, i32 %169, i64 0
%broadcast.splat276 = shufflevector <4 x i32> %broadcast.splatinsert275, <4 x i32> poison, <4 x i32> zeroinitializer
%170 = add nsw <4 x i32> %broadcast.splat276, <i32 -1, i32 -1, i32 -1, i32 -1>
%171 = shl nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %170
%172 = add nsw <4 x i32> %171, %168
%173 = ashr <4 x i32> %172, %broadcast.splat276
%174 = load i32, ptr %i_offset.i110, align 8, !tbaa !108, !alias.scope !190
%broadcast.splatinsert277 = insertelement <4 x i32> poison, i32 %174, i64 0
%broadcast.splat278 = shufflevector <4 x i32> %broadcast.splatinsert277, <4 x i32> poison, <4 x i32> zeroinitializer
%175 = add nsw <4 x i32> %173, %broadcast.splat278
%176 = icmp ult <4 x i32> %175, <i32 256, i32 256, i32 256, i32 256>
%177 = icmp sgt <4 x i32> %175, zeroinitializer
%178 = sext <4 x i1> %177 to <4 x i32>
%179 = select <4 x i1> %176, <4 x i32> %175, <4 x i32> %178
%180 = trunc <4 x i32> %179 to <4 x i8>
%181 = getelementptr inbounds i8, ptr %dst.addr.087.us.i117, i64 %163
%182 = getelementptr inbounds i8, ptr %181, i32 0
store <4 x i8> %180, ptr %182, align 1, !tbaa !15, !alias.scope !192, !noalias !194
%index.next279 = add nuw i64 %index271, 4
%183 = icmp eq i64 %index.next279, %n.vec267
br i1 %183, label %middle.block262, label %vector.body270, !llvm.loop !195
middle.block262: ; preds = %vector.body270
%cmp.n269 = icmp eq i64 %wide.trip.count97.i114, %n.vec267
br i1 %cmp.n269, label %for.cond2.for.cond.cleanup4_crit_edge.us.i136, label %scalar.ph263
scalar.ph263: ; preds = %vector.memcheck248, %for.cond2.preheader.us.i115, %middle.block262
%bc.resume.val268 = phi i64 [ %n.vec267, %middle.block262 ], [ 0, %for.cond2.preheader.us.i115 ], [ 0, %vector.memcheck248 ]
br label %for.body5.us.i119
for.body5.us.i119: ; preds = %for.body5.us.i119, %scalar.ph263
%indvars.iv94.i120 = phi i64 [ %bc.resume.val268, %scalar.ph263 ], [ %indvars.iv.next95.i134, %for.body5.us.i119 ]
%arrayidx.us.i121 = getelementptr inbounds i8, ptr %src.addr.086.us.i118, i64 %indvars.iv94.i120
%184 = load i8, ptr %arrayidx.us.i121, align 1, !tbaa !15
%conv.us.i122 = zext i8 %184 to i32
%185 = load i32, ptr %i_scale.i109, align 4, !tbaa !104
%mul.us.i123 = mul nsw i32 %185, %conv.us.i122
%186 = load i32, ptr %i_denom.i72, align 16, !tbaa !103
%sub.us.i124 = add nsw i32 %186, -1
%shl.us.i125 = shl nuw i32 1, %sub.us.i124
%add.us.i126 = add nsw i32 %shl.us.i125, %mul.us.i123
%shr.us.i127 = ashr i32 %add.us.i126, %186
%187 = load i32, ptr %i_offset.i110, align 8, !tbaa !108
%add8.us.i128 = add nsw i32 %shr.us.i127, %187
%tobool.not.i.us.i129 = icmp ult i32 %add8.us.i128, 256
%188 = icmp sgt i32 %add8.us.i128, 0
%shr.i.us.i130 = sext i1 %188 to i32
%cond.i.us.i131 = select i1 %tobool.not.i.us.i129, i32 %add8.us.i128, i32 %shr.i.us.i130
%conv.i.us.i132 = trunc i32 %cond.i.us.i131 to i8
%arrayidx10.us.i133 = getelementptr inbounds i8, ptr %dst.addr.087.us.i117, i64 %indvars.iv94.i120
store i8 %conv.i.us.i132, ptr %arrayidx10.us.i133, align 1, !tbaa !15
%indvars.iv.next95.i134 = add nuw nsw i64 %indvars.iv94.i120, 1
%exitcond98.not.i135 = icmp eq i64 %indvars.iv.next95.i134, %wide.trip.count97.i114
br i1 %exitcond98.not.i135, label %for.cond2.for.cond.cleanup4_crit_edge.us.i136, label %for.body5.us.i119, !llvm.loop !196
for.cond2.for.cond.cleanup4_crit_edge.us.i136: ; preds = %middle.block262, %for.body5.us.i119
%inc12.us.i137 = add nuw nsw i32 %y.088.us.i116, 1
%add.ptr.us.i138 = getelementptr inbounds i8, ptr %dst.addr.087.us.i117, i64 %idx.ext.i111
%add.ptr14.us.i139 = getelementptr inbounds i8, ptr %src.addr.086.us.i118, i64 %idx.ext13.i112
%exitcond99.not.i140 = icmp eq i32 %inc12.us.i137, %i_height
br i1 %exitcond99.not.i140, label %cleanup.loopexit, label %for.cond2.preheader.us.i115, !llvm.loop !121
if.else27: ; preds = %if.else
store i32 %i_src_stride, ptr %i_dst_stride, align 4, !tbaa !13
br label %cleanup
cleanup.loopexit: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i136
br label %cleanup
cleanup.loopexit151: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i101
br label %cleanup
cleanup.loopexit152: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i
br label %cleanup
cleanup.loopexit153: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i
br label %cleanup
cleanup: ; preds = %cleanup.loopexit153, %cleanup.loopexit152, %cleanup.loopexit151, %cleanup.loopexit, %for.cond2.preheader.lr.ph.i107, %for.cond.preheader.i106, %for.cond23.preheader.lr.ph.i76, %for.cond17.preheader.i75, %for.cond2.preheader.lr.ph.i, %for.cond.preheader.i, %for.cond23.preheader.lr.ph.i, %for.cond17.preheader.i, %pixel_avg.exit, %if.else27
%retval.0 = phi ptr [ %add.ptr9, %if.else27 ], [ %dst, %pixel_avg.exit ], [ %dst, %for.cond17.preheader.i ], [ %dst, %for.cond23.preheader.lr.ph.i ], [ %dst, %for.cond.preheader.i ], [ %dst, %for.cond2.preheader.lr.ph.i ], [ %dst, %for.cond17.preheader.i75 ], [ %dst, %for.cond23.preheader.lr.ph.i76 ], [ %dst, %for.cond.preheader.i106 ], [ %dst, %for.cond2.preheader.lr.ph.i107 ], [ %dst, %cleanup.loopexit ], [ %dst, %cleanup.loopexit151 ], [ %dst, %cleanup.loopexit152 ], [ %dst, %cleanup.loopexit153 ]
ret ptr %retval.0
}
; *** IR Dump Before LoopVectorizePass on get_ref ***
; Function Attrs: nofree norecurse nosync nounwind memory(readwrite, inaccessiblemem: none) uwtable
define internal ptr @get_ref(ptr noundef %dst, ptr nocapture noundef %i_dst_stride, ptr nocapture noundef readonly %src, i32 noundef %i_src_stride, i32 noundef %mvx, i32 noundef %mvy, i32 noundef %i_width, i32 noundef %i_height, ptr nocapture noundef readonly %weight) #0 {
entry:
%and = and i32 %mvy, 3
%shl = shl nuw nsw i32 %and, 2
%and1 = and i32 %mvx, 3
%add = or disjoint i32 %shl, %and1
%shr = ashr i32 %mvy, 2
%mul = mul nsw i32 %shr, %i_src_stride
%shr2 = ashr i32 %mvx, 2
%add3 = add nsw i32 %mul, %shr2
%idxprom = zext nneg i32 %add to i64
%arrayidx = getelementptr inbounds [16 x i8], ptr @hpel_ref0, i64 0, i64 %idxprom
%0 = load i8, ptr %arrayidx, align 1, !tbaa !15
%idxprom4 = zext i8 %0 to i64
%arrayidx5 = getelementptr inbounds ptr, ptr %src, i64 %idxprom4
%1 = load ptr, ptr %arrayidx5, align 8, !tbaa !9
%idx.ext = sext i32 %add3 to i64
%add.ptr = getelementptr inbounds i8, ptr %1, i64 %idx.ext
%cmp = icmp eq i32 %and, 3
%mul7 = select i1 %cmp, i32 %i_src_stride, i32 0
%idx.ext8 = sext i32 %mul7 to i64
%add.ptr9 = getelementptr inbounds i8, ptr %add.ptr, i64 %idx.ext8
%and10 = and i32 %add, 5
%tobool.not = icmp eq i32 %and10, 0
br i1 %tobool.not, label %if.else, label %if.then
if.then: ; preds = %entry
%arrayidx12 = getelementptr inbounds [16 x i8], ptr @hpel_ref1, i64 0, i64 %idxprom
%2 = load i8, ptr %arrayidx12, align 1, !tbaa !15
%idxprom13 = zext i8 %2 to i64
%arrayidx14 = getelementptr inbounds ptr, ptr %src, i64 %idxprom13
%3 = load ptr, ptr %arrayidx14, align 8, !tbaa !9
%add.ptr16 = getelementptr inbounds i8, ptr %3, i64 %idx.ext
%cmp18 = icmp eq i32 %and1, 3
%idx.ext20 = zext i1 %cmp18 to i64
%add.ptr21 = getelementptr inbounds i8, ptr %add.ptr16, i64 %idx.ext20
%cmp29.i = icmp sgt i32 %i_height, 0
br i1 %cmp29.i, label %for.cond1.preheader.lr.ph.i, label %pixel_avg.exit
for.cond1.preheader.lr.ph.i: ; preds = %if.then
%4 = load i32, ptr %i_dst_stride, align 4, !tbaa !13
%cmp227.i = icmp sgt i32 %i_width, 0
%idx.ext.i = sext i32 %4 to i64
%idx.ext12.i = sext i32 %i_src_stride to i64
br i1 %cmp227.i, label %for.cond1.preheader.us.preheader.i, label %pixel_avg.exit
for.cond1.preheader.us.preheader.i: ; preds = %for.cond1.preheader.lr.ph.i
%wide.trip.count.i = zext nneg i32 %i_width to i64
br label %for.cond1.preheader.us.i
for.cond1.preheader.us.i: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us.i, %for.cond1.preheader.us.preheader.i
%y.033.us.i = phi i32 [ %inc17.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ 0, %for.cond1.preheader.us.preheader.i ]
%dst.addr.032.us.i = phi ptr [ %add.ptr.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ %dst, %for.cond1.preheader.us.preheader.i ]
%src1.addr.031.us.i = phi ptr [ %add.ptr13.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ %add.ptr9, %for.cond1.preheader.us.preheader.i ]
%src2.addr.030.us.i = phi ptr [ %add.ptr15.us.i, %for.cond1.for.cond.cleanup3_crit_edge.us.i ], [ %add.ptr21, %for.cond1.preheader.us.preheader.i ]
br label %for.body4.us.i
for.body4.us.i: ; preds = %for.body4.us.i, %for.cond1.preheader.us.i
%indvars.iv.i = phi i64 [ 0, %for.cond1.preheader.us.i ], [ %indvars.iv.next.i, %for.body4.us.i ]
%arrayidx.us.i = getelementptr inbounds i8, ptr %src1.addr.031.us.i, i64 %indvars.iv.i
%5 = load i8, ptr %arrayidx.us.i, align 1, !tbaa !15
%conv.us.i = zext i8 %5 to i16
%arrayidx6.us.i = getelementptr inbounds i8, ptr %src2.addr.030.us.i, i64 %indvars.iv.i
%6 = load i8, ptr %arrayidx6.us.i, align 1, !tbaa !15
%conv7.us.i = zext i8 %6 to i16
%add.us.i = add nuw nsw i16 %conv.us.i, 1
%add8.us.i = add nuw nsw i16 %add.us.i, %conv7.us.i
%shr.us.i = lshr i16 %add8.us.i, 1
%conv9.us.i = trunc i16 %shr.us.i to i8
%arrayidx11.us.i = getelementptr inbounds i8, ptr %dst.addr.032.us.i, i64 %indvars.iv.i
store i8 %conv9.us.i, ptr %arrayidx11.us.i, align 1, !tbaa !15
%indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1
%exitcond.not.i = icmp eq i64 %indvars.iv.next.i, %wide.trip.count.i
br i1 %exitcond.not.i, label %for.cond1.for.cond.cleanup3_crit_edge.us.i, label %for.body4.us.i, !llvm.loop !144
for.cond1.for.cond.cleanup3_crit_edge.us.i: ; preds = %for.body4.us.i
%add.ptr.us.i = getelementptr inbounds i8, ptr %dst.addr.032.us.i, i64 %idx.ext.i
%add.ptr13.us.i = getelementptr inbounds i8, ptr %src1.addr.031.us.i, i64 %idx.ext12.i
%add.ptr15.us.i = getelementptr inbounds i8, ptr %src2.addr.030.us.i, i64 %idx.ext12.i
%inc17.us.i = add nuw nsw i32 %y.033.us.i, 1
%exitcond36.not.i = icmp eq i32 %inc17.us.i, %i_height
br i1 %exitcond36.not.i, label %pixel_avg.exit.loopexit, label %for.cond1.preheader.us.i, !llvm.loop !100
pixel_avg.exit.loopexit: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us.i
br label %pixel_avg.exit
pixel_avg.exit: ; preds = %pixel_avg.exit.loopexit, %if.then, %for.cond1.preheader.lr.ph.i
%weightfn = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 5
%7 = load ptr, ptr %weightfn, align 16, !tbaa !101
%tobool22.not = icmp eq ptr %7, null
br i1 %tobool22.not, label %cleanup, label %if.then23
if.then23: ; preds = %pixel_avg.exit
%8 = load i32, ptr %i_dst_stride, align 4, !tbaa !13
%i_denom.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 2
%9 = load i32, ptr %i_denom.i, align 16, !tbaa !103
%cmp.i = icmp sgt i32 %9, 0
br i1 %cmp.i, label %for.cond.preheader.i, label %for.cond17.preheader.i
for.cond17.preheader.i: ; preds = %if.then23
br i1 %cmp29.i, label %for.cond23.preheader.lr.ph.i, label %cleanup
for.cond23.preheader.lr.ph.i: ; preds = %for.cond17.preheader.i
%cmp2477.i = icmp sgt i32 %i_width, 0
%i_scale31.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 3
%i_offset33.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4
%idx.ext43.i = sext i32 %8 to i64
br i1 %cmp2477.i, label %for.cond23.preheader.us.preheader.i, label %cleanup
for.cond23.preheader.us.preheader.i: ; preds = %for.cond23.preheader.lr.ph.i
%wide.trip.count.i61 = zext nneg i32 %i_width to i64
br label %for.cond23.preheader.us.i
for.cond23.preheader.us.i: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i, %for.cond23.preheader.us.preheader.i
%y16.082.us.i = phi i32 [ %inc42.us.i, %for.cond23.for.cond.cleanup26_crit_edge.us.i ], [ 0, %for.cond23.preheader.us.preheader.i ]
%dst.addr.181.us.i = phi ptr [ %add.ptr44.us.i, %for.cond23.for.cond.cleanup26_crit_edge.us.i ], [ %dst, %for.cond23.preheader.us.preheader.i ]
br label %for.body27.us.i
for.body27.us.i: ; preds = %for.body27.us.i, %for.cond23.preheader.us.i
%indvars.iv.i62 = phi i64 [ 0, %for.cond23.preheader.us.i ], [ %indvars.iv.next.i63, %for.body27.us.i ]
%arrayidx29.us.i = getelementptr inbounds i8, ptr %dst.addr.181.us.i, i64 %indvars.iv.i62
%10 = load i8, ptr %arrayidx29.us.i, align 1, !tbaa !15
%conv30.us.i = zext i8 %10 to i32
%11 = load i32, ptr %i_scale31.i, align 4, !tbaa !104
%mul32.us.i = mul nsw i32 %11, %conv30.us.i
%12 = load i32, ptr %i_offset33.i, align 8, !tbaa !108
%add34.us.i = add nsw i32 %mul32.us.i, %12
%tobool.not.i72.us.i = icmp ult i32 %add34.us.i, 256
%13 = icmp sgt i32 %add34.us.i, 0
%shr.i73.us.i = sext i1 %13 to i32
%cond.i74.us.i = select i1 %tobool.not.i72.us.i, i32 %add34.us.i, i32 %shr.i73.us.i
%conv.i75.us.i = trunc i32 %cond.i74.us.i to i8
store i8 %conv.i75.us.i, ptr %arrayidx29.us.i, align 1, !tbaa !15
%indvars.iv.next.i63 = add nuw nsw i64 %indvars.iv.i62, 1
%exitcond.not.i64 = icmp eq i64 %indvars.iv.next.i63, %wide.trip.count.i61
br i1 %exitcond.not.i64, label %for.cond23.for.cond.cleanup26_crit_edge.us.i, label %for.body27.us.i, !llvm.loop !145
for.cond23.for.cond.cleanup26_crit_edge.us.i: ; preds = %for.body27.us.i
%inc42.us.i = add nuw nsw i32 %y16.082.us.i, 1
%add.ptr44.us.i = getelementptr i8, ptr %dst.addr.181.us.i, i64 %idx.ext43.i
%exitcond93.not.i = icmp eq i32 %inc42.us.i, %i_height
br i1 %exitcond93.not.i, label %cleanup.loopexit153, label %for.cond23.preheader.us.i, !llvm.loop !113
for.cond.preheader.i: ; preds = %if.then23
br i1 %cmp29.i, label %for.cond2.preheader.lr.ph.i, label %cleanup
for.cond2.preheader.lr.ph.i: ; preds = %for.cond.preheader.i
%cmp383.i = icmp sgt i32 %i_width, 0
%i_scale.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 3
%i_offset.i = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4
%idx.ext.i65 = sext i32 %8 to i64
br i1 %cmp383.i, label %for.cond2.preheader.us.preheader.i, label %cleanup
for.cond2.preheader.us.preheader.i: ; preds = %for.cond2.preheader.lr.ph.i
%wide.trip.count97.i = zext nneg i32 %i_width to i64
br label %for.cond2.preheader.us.i
for.cond2.preheader.us.i: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i, %for.cond2.preheader.us.preheader.i
%y.088.us.i = phi i32 [ %inc12.us.i, %for.cond2.for.cond.cleanup4_crit_edge.us.i ], [ 0, %for.cond2.preheader.us.preheader.i ]
%dst.addr.087.us.i = phi ptr [ %add.ptr.us.i71, %for.cond2.for.cond.cleanup4_crit_edge.us.i ], [ %dst, %for.cond2.preheader.us.preheader.i ]
br label %for.body5.us.i
for.body5.us.i: ; preds = %for.body5.us.i, %for.cond2.preheader.us.i
%indvars.iv94.i = phi i64 [ 0, %for.cond2.preheader.us.i ], [ %indvars.iv.next95.i, %for.body5.us.i ]
%arrayidx.us.i66 = getelementptr inbounds i8, ptr %dst.addr.087.us.i, i64 %indvars.iv94.i
%14 = load i8, ptr %arrayidx.us.i66, align 1, !tbaa !15
%conv.us.i67 = zext i8 %14 to i32
%15 = load i32, ptr %i_scale.i, align 4, !tbaa !104
%mul.us.i = mul nsw i32 %15, %conv.us.i67
%16 = load i32, ptr %i_denom.i, align 16, !tbaa !103
%sub.us.i = add nsw i32 %16, -1
%shl.us.i = shl nuw i32 1, %sub.us.i
%add.us.i68 = add nsw i32 %shl.us.i, %mul.us.i
%shr.us.i69 = ashr i32 %add.us.i68, %16
%17 = load i32, ptr %i_offset.i, align 8, !tbaa !108
%add8.us.i70 = add nsw i32 %shr.us.i69, %17
%tobool.not.i.us.i = icmp ult i32 %add8.us.i70, 256
%18 = icmp sgt i32 %add8.us.i70, 0
%shr.i.us.i = sext i1 %18 to i32
%cond.i.us.i = select i1 %tobool.not.i.us.i, i32 %add8.us.i70, i32 %shr.i.us.i
%conv.i.us.i = trunc i32 %cond.i.us.i to i8
store i8 %conv.i.us.i, ptr %arrayidx.us.i66, align 1, !tbaa !15
%indvars.iv.next95.i = add nuw nsw i64 %indvars.iv94.i, 1
%exitcond98.not.i = icmp eq i64 %indvars.iv.next95.i, %wide.trip.count97.i
br i1 %exitcond98.not.i, label %for.cond2.for.cond.cleanup4_crit_edge.us.i, label %for.body5.us.i, !llvm.loop !146
for.cond2.for.cond.cleanup4_crit_edge.us.i: ; preds = %for.body5.us.i
%inc12.us.i = add nuw nsw i32 %y.088.us.i, 1
%add.ptr.us.i71 = getelementptr i8, ptr %dst.addr.087.us.i, i64 %idx.ext.i65
%exitcond99.not.i = icmp eq i32 %inc12.us.i, %i_height
br i1 %exitcond99.not.i, label %cleanup.loopexit152, label %for.cond2.preheader.us.i, !llvm.loop !121
if.else: ; preds = %entry
%weightfn24 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 5
%19 = load ptr, ptr %weightfn24, align 16, !tbaa !101
%tobool25.not = icmp eq ptr %19, null
br i1 %tobool25.not, label %if.else27, label %if.then26
if.then26: ; preds = %if.else
%20 = load i32, ptr %i_dst_stride, align 4, !tbaa !13
%i_denom.i72 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 2
%21 = load i32, ptr %i_denom.i72, align 16, !tbaa !103
%cmp.i73 = icmp sgt i32 %21, 0
%cmp185.i74 = icmp sgt i32 %i_height, 0
br i1 %cmp.i73, label %for.cond.preheader.i106, label %for.cond17.preheader.i75
for.cond17.preheader.i75: ; preds = %if.then26
br i1 %cmp185.i74, label %for.cond23.preheader.lr.ph.i76, label %cleanup
for.cond23.preheader.lr.ph.i76: ; preds = %for.cond17.preheader.i75
%cmp2477.i77 = icmp sgt i32 %i_width, 0
%i_scale31.i78 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 3
%i_offset33.i79 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4
%idx.ext43.i80 = sext i32 %20 to i64
%idx.ext45.i81 = sext i32 %i_src_stride to i64
br i1 %cmp2477.i77, label %for.cond23.preheader.us.preheader.i82, label %cleanup
for.cond23.preheader.us.preheader.i82: ; preds = %for.cond23.preheader.lr.ph.i76
%wide.trip.count.i83 = zext nneg i32 %i_width to i64
br label %for.cond23.preheader.us.i84
for.cond23.preheader.us.i84: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i101, %for.cond23.preheader.us.preheader.i82
%y16.082.us.i85 = phi i32 [ %inc42.us.i102, %for.cond23.for.cond.cleanup26_crit_edge.us.i101 ], [ 0, %for.cond23.preheader.us.preheader.i82 ]
%dst.addr.181.us.i86 = phi ptr [ %add.ptr44.us.i103, %for.cond23.for.cond.cleanup26_crit_edge.us.i101 ], [ %dst, %for.cond23.preheader.us.preheader.i82 ]
%src.addr.180.us.i87 = phi ptr [ %add.ptr46.us.i104, %for.cond23.for.cond.cleanup26_crit_edge.us.i101 ], [ %add.ptr9, %for.cond23.preheader.us.preheader.i82 ]
br label %for.body27.us.i88
for.body27.us.i88: ; preds = %for.body27.us.i88, %for.cond23.preheader.us.i84
%indvars.iv.i89 = phi i64 [ 0, %for.cond23.preheader.us.i84 ], [ %indvars.iv.next.i99, %for.body27.us.i88 ]
%arrayidx29.us.i90 = getelementptr inbounds i8, ptr %src.addr.180.us.i87, i64 %indvars.iv.i89
%22 = load i8, ptr %arrayidx29.us.i90, align 1, !tbaa !15
%conv30.us.i91 = zext i8 %22 to i32
%23 = load i32, ptr %i_scale31.i78, align 4, !tbaa !104
%mul32.us.i92 = mul nsw i32 %23, %conv30.us.i91
%24 = load i32, ptr %i_offset33.i79, align 8, !tbaa !108
%add34.us.i93 = add nsw i32 %mul32.us.i92, %24
%tobool.not.i72.us.i94 = icmp ult i32 %add34.us.i93, 256
%25 = icmp sgt i32 %add34.us.i93, 0
%shr.i73.us.i95 = sext i1 %25 to i32
%cond.i74.us.i96 = select i1 %tobool.not.i72.us.i94, i32 %add34.us.i93, i32 %shr.i73.us.i95
%conv.i75.us.i97 = trunc i32 %cond.i74.us.i96 to i8
%arrayidx37.us.i98 = getelementptr inbounds i8, ptr %dst.addr.181.us.i86, i64 %indvars.iv.i89
store i8 %conv.i75.us.i97, ptr %arrayidx37.us.i98, align 1, !tbaa !15
%indvars.iv.next.i99 = add nuw nsw i64 %indvars.iv.i89, 1
%exitcond.not.i100 = icmp eq i64 %indvars.iv.next.i99, %wide.trip.count.i83
br i1 %exitcond.not.i100, label %for.cond23.for.cond.cleanup26_crit_edge.us.i101, label %for.body27.us.i88, !llvm.loop !145
for.cond23.for.cond.cleanup26_crit_edge.us.i101: ; preds = %for.body27.us.i88
%inc42.us.i102 = add nuw nsw i32 %y16.082.us.i85, 1
%add.ptr44.us.i103 = getelementptr inbounds i8, ptr %dst.addr.181.us.i86, i64 %idx.ext43.i80
%add.ptr46.us.i104 = getelementptr inbounds i8, ptr %src.addr.180.us.i87, i64 %idx.ext45.i81
%exitcond93.not.i105 = icmp eq i32 %inc42.us.i102, %i_height
br i1 %exitcond93.not.i105, label %cleanup.loopexit151, label %for.cond23.preheader.us.i84, !llvm.loop !113
for.cond.preheader.i106: ; preds = %if.then26
br i1 %cmp185.i74, label %for.cond2.preheader.lr.ph.i107, label %cleanup
for.cond2.preheader.lr.ph.i107: ; preds = %for.cond.preheader.i106
%cmp383.i108 = icmp sgt i32 %i_width, 0
%i_scale.i109 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 3
%i_offset.i110 = getelementptr inbounds %struct.x264_weight_t, ptr %weight, i64 0, i32 4
%idx.ext.i111 = sext i32 %20 to i64
%idx.ext13.i112 = sext i32 %i_src_stride to i64
br i1 %cmp383.i108, label %for.cond2.preheader.us.preheader.i113, label %cleanup
for.cond2.preheader.us.preheader.i113: ; preds = %for.cond2.preheader.lr.ph.i107
%wide.trip.count97.i114 = zext nneg i32 %i_width to i64
br label %for.cond2.preheader.us.i115
for.cond2.preheader.us.i115: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i136, %for.cond2.preheader.us.preheader.i113
%y.088.us.i116 = phi i32 [ %inc12.us.i137, %for.cond2.for.cond.cleanup4_crit_edge.us.i136 ], [ 0, %for.cond2.preheader.us.preheader.i113 ]
%dst.addr.087.us.i117 = phi ptr [ %add.ptr.us.i138, %for.cond2.for.cond.cleanup4_crit_edge.us.i136 ], [ %dst, %for.cond2.preheader.us.preheader.i113 ]
%src.addr.086.us.i118 = phi ptr [ %add.ptr14.us.i139, %for.cond2.for.cond.cleanup4_crit_edge.us.i136 ], [ %add.ptr9, %for.cond2.preheader.us.preheader.i113 ]
br label %for.body5.us.i119
for.body5.us.i119: ; preds = %for.body5.us.i119, %for.cond2.preheader.us.i115
%indvars.iv94.i120 = phi i64 [ 0, %for.cond2.preheader.us.i115 ], [ %indvars.iv.next95.i134, %for.body5.us.i119 ]
%arrayidx.us.i121 = getelementptr inbounds i8, ptr %src.addr.086.us.i118, i64 %indvars.iv94.i120
%26 = load i8, ptr %arrayidx.us.i121, align 1, !tbaa !15
%conv.us.i122 = zext i8 %26 to i32
%27 = load i32, ptr %i_scale.i109, align 4, !tbaa !104
%mul.us.i123 = mul nsw i32 %27, %conv.us.i122
%28 = load i32, ptr %i_denom.i72, align 16, !tbaa !103
%sub.us.i124 = add nsw i32 %28, -1
%shl.us.i125 = shl nuw i32 1, %sub.us.i124
%add.us.i126 = add nsw i32 %shl.us.i125, %mul.us.i123
%shr.us.i127 = ashr i32 %add.us.i126, %28
%29 = load i32, ptr %i_offset.i110, align 8, !tbaa !108
%add8.us.i128 = add nsw i32 %shr.us.i127, %29
%tobool.not.i.us.i129 = icmp ult i32 %add8.us.i128, 256
%30 = icmp sgt i32 %add8.us.i128, 0
%shr.i.us.i130 = sext i1 %30 to i32
%cond.i.us.i131 = select i1 %tobool.not.i.us.i129, i32 %add8.us.i128, i32 %shr.i.us.i130
%conv.i.us.i132 = trunc i32 %cond.i.us.i131 to i8
%arrayidx10.us.i133 = getelementptr inbounds i8, ptr %dst.addr.087.us.i117, i64 %indvars.iv94.i120
store i8 %conv.i.us.i132, ptr %arrayidx10.us.i133, align 1, !tbaa !15
%indvars.iv.next95.i134 = add nuw nsw i64 %indvars.iv94.i120, 1
%exitcond98.not.i135 = icmp eq i64 %indvars.iv.next95.i134, %wide.trip.count97.i114
br i1 %exitcond98.not.i135, label %for.cond2.for.cond.cleanup4_crit_edge.us.i136, label %for.body5.us.i119, !llvm.loop !146
for.cond2.for.cond.cleanup4_crit_edge.us.i136: ; preds = %for.body5.us.i119
%inc12.us.i137 = add nuw nsw i32 %y.088.us.i116, 1
%add.ptr.us.i138 = getelementptr inbounds i8, ptr %dst.addr.087.us.i117, i64 %idx.ext.i111
%add.ptr14.us.i139 = getelementptr inbounds i8, ptr %src.addr.086.us.i118, i64 %idx.ext13.i112
%exitcond99.not.i140 = icmp eq i32 %inc12.us.i137, %i_height
br i1 %exitcond99.not.i140, label %cleanup.loopexit, label %for.cond2.preheader.us.i115, !llvm.loop !121
if.else27: ; preds = %if.else
store i32 %i_src_stride, ptr %i_dst_stride, align 4, !tbaa !13
br label %cleanup
cleanup.loopexit: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i136
br label %cleanup
cleanup.loopexit151: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i101
br label %cleanup
cleanup.loopexit152: ; preds = %for.cond2.for.cond.cleanup4_crit_edge.us.i
br label %cleanup
cleanup.loopexit153: ; preds = %for.cond23.for.cond.cleanup26_crit_edge.us.i
br label %cleanup
cleanup: ; preds = %cleanup.loopexit153, %cleanup.loopexit152, %cleanup.loopexit151, %cleanup.loopexit, %for.cond2.preheader.lr.ph.i107, %for.cond.preheader.i106, %for.cond23.preheader.lr.ph.i76, %for.cond17.preheader.i75, %for.cond2.preheader.lr.ph.i, %for.cond.preheader.i, %for.cond23.preheader.lr.ph.i, %for.cond17.preheader.i, %pixel_avg.exit, %if.else27
%retval.0 = phi ptr [ %add.ptr9, %if.else27 ], [ %dst, %pixel_avg.exit ], [ %dst, %for.cond17.preheader.i ], [ %dst, %for.cond23.preheader.lr.ph.i ], [ %dst, %for.cond.preheader.i ], [ %dst, %for.cond2.preheader.lr.ph.i ], [ %dst, %for.cond17.preheader.i75 ], [ %dst, %for.cond23.preheader.lr.ph.i76 ], [ %dst, %for.cond.preheader.i106 ], [ %dst, %for.cond2.preheader.lr.ph.i107 ], [ %dst, %cleanup.loopexit ], [ %dst, %cleanup.loopexit151 ], [ %dst, %cleanup.loopexit152 ], [ %dst, %cleanup.loopexit153 ]
ret ptr %retval.0
}
; *** IR Dump After SLPVectorizerPass on x264_pixel_sad_16x16 ***
; Function Attrs: nofree norecurse nosync nounwind memory(read, inaccessiblemem: none) uwtable
define internal i32 @x264_pixel_sad_16x16(ptr nocapture noundef readonly %pix1, i32 noundef %i_stride_pix1, ptr nocapture noundef readonly %pix2, i32 noundef %i_stride_pix2) #4 {
entry:
%idx.ext = sext i32 %i_stride_pix1 to i64
%idx.ext8 = sext i32 %i_stride_pix2 to i64
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %entry, %for.cond1.preheader
%y.025 = phi i32 [ 0, %entry ], [ %inc11, %for.cond1.preheader ]
%i_sum.024 = phi i32 [ 0, %entry ], [ %op.rdx, %for.cond1.preheader ]
%pix1.addr.023 = phi ptr [ %pix1, %entry ], [ %add.ptr, %for.cond1.preheader ]
%pix2.addr.022 = phi ptr [ %pix2, %entry ], [ %add.ptr9, %for.cond1.preheader ]
%0 = load <16 x i8>, ptr %pix1.addr.023, align 1, !tbaa !14
%1 = zext <16 x i8> %0 to <16 x i32>
%2 = load <16 x i8>, ptr %pix2.addr.022, align 1, !tbaa !14
%3 = zext <16 x i8> %2 to <16 x i32>
%4 = sub nsw <16 x i32> %1, %3
%5 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %4, i1 true)
%6 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %5)
%op.rdx = add i32 %6, %i_sum.024
%add.ptr = getelementptr inbounds i8, ptr %pix1.addr.023, i64 %idx.ext
%add.ptr9 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 %idx.ext8
%inc11 = add nuw nsw i32 %y.025, 1
%exitcond.not = icmp eq i32 %inc11, 16
br i1 %exitcond.not, label %for.cond.cleanup, label %for.cond1.preheader, !llvm.loop !30
for.cond.cleanup: ; preds = %for.cond1.preheader
ret i32 %op.rdx
}
; *** IR Dump Before SLPVectorizerPass on x264_pixel_sad_16x16 ***
; Function Attrs: nofree norecurse nosync nounwind memory(read, inaccessiblemem: none) uwtable
define internal i32 @x264_pixel_sad_16x16(ptr nocapture noundef readonly %pix1, i32 noundef %i_stride_pix1, ptr nocapture noundef readonly %pix2, i32 noundef %i_stride_pix2) #4 {
entry:
%idx.ext = sext i32 %i_stride_pix1 to i64
%idx.ext8 = sext i32 %i_stride_pix2 to i64
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %entry, %for.cond1.preheader
%y.025 = phi i32 [ 0, %entry ], [ %inc11, %for.cond1.preheader ]
%i_sum.024 = phi i32 [ 0, %entry ], [ %add.15, %for.cond1.preheader ]
%pix1.addr.023 = phi ptr [ %pix1, %entry ], [ %add.ptr, %for.cond1.preheader ]
%pix2.addr.022 = phi ptr [ %pix2, %entry ], [ %add.ptr9, %for.cond1.preheader ]
%0 = load i8, ptr %pix1.addr.023, align 1, !tbaa !14
%conv = zext i8 %0 to i32
%1 = load i8, ptr %pix2.addr.022, align 1, !tbaa !14
%conv7 = zext i8 %1 to i32
%sub = sub nsw i32 %conv, %conv7
%2 = tail call i32 @llvm.abs.i32(i32 %sub, i1 true)
%add = add nsw i32 %2, %i_sum.024
%arrayidx.1 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 1
%3 = load i8, ptr %arrayidx.1, align 1, !tbaa !14
%conv.1 = zext i8 %3 to i32
%arrayidx6.1 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 1
%4 = load i8, ptr %arrayidx6.1, align 1, !tbaa !14
%conv7.1 = zext i8 %4 to i32
%sub.1 = sub nsw i32 %conv.1, %conv7.1
%5 = tail call i32 @llvm.abs.i32(i32 %sub.1, i1 true)
%add.1 = add nsw i32 %5, %add
%arrayidx.2 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 2
%6 = load i8, ptr %arrayidx.2, align 1, !tbaa !14
%conv.2 = zext i8 %6 to i32
%arrayidx6.2 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 2
%7 = load i8, ptr %arrayidx6.2, align 1, !tbaa !14
%conv7.2 = zext i8 %7 to i32
%sub.2 = sub nsw i32 %conv.2, %conv7.2
%8 = tail call i32 @llvm.abs.i32(i32 %sub.2, i1 true)
%add.2 = add nsw i32 %8, %add.1
%arrayidx.3 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 3
%9 = load i8, ptr %arrayidx.3, align 1, !tbaa !14
%conv.3 = zext i8 %9 to i32
%arrayidx6.3 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 3
%10 = load i8, ptr %arrayidx6.3, align 1, !tbaa !14
%conv7.3 = zext i8 %10 to i32
%sub.3 = sub nsw i32 %conv.3, %conv7.3
%11 = tail call i32 @llvm.abs.i32(i32 %sub.3, i1 true)
%add.3 = add nsw i32 %11, %add.2
%arrayidx.4 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 4
%12 = load i8, ptr %arrayidx.4, align 1, !tbaa !14
%conv.4 = zext i8 %12 to i32
%arrayidx6.4 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 4
%13 = load i8, ptr %arrayidx6.4, align 1, !tbaa !14
%conv7.4 = zext i8 %13 to i32
%sub.4 = sub nsw i32 %conv.4, %conv7.4
%14 = tail call i32 @llvm.abs.i32(i32 %sub.4, i1 true)
%add.4 = add nsw i32 %14, %add.3
%arrayidx.5 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 5
%15 = load i8, ptr %arrayidx.5, align 1, !tbaa !14
%conv.5 = zext i8 %15 to i32
%arrayidx6.5 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 5
%16 = load i8, ptr %arrayidx6.5, align 1, !tbaa !14
%conv7.5 = zext i8 %16 to i32
%sub.5 = sub nsw i32 %conv.5, %conv7.5
%17 = tail call i32 @llvm.abs.i32(i32 %sub.5, i1 true)
%add.5 = add nsw i32 %17, %add.4
%arrayidx.6 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 6
%18 = load i8, ptr %arrayidx.6, align 1, !tbaa !14
%conv.6 = zext i8 %18 to i32
%arrayidx6.6 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 6
%19 = load i8, ptr %arrayidx6.6, align 1, !tbaa !14
%conv7.6 = zext i8 %19 to i32
%sub.6 = sub nsw i32 %conv.6, %conv7.6
%20 = tail call i32 @llvm.abs.i32(i32 %sub.6, i1 true)
%add.6 = add nsw i32 %20, %add.5
%arrayidx.7 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 7
%21 = load i8, ptr %arrayidx.7, align 1, !tbaa !14
%conv.7 = zext i8 %21 to i32
%arrayidx6.7 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 7
%22 = load i8, ptr %arrayidx6.7, align 1, !tbaa !14
%conv7.7 = zext i8 %22 to i32
%sub.7 = sub nsw i32 %conv.7, %conv7.7
%23 = tail call i32 @llvm.abs.i32(i32 %sub.7, i1 true)
%add.7 = add nsw i32 %23, %add.6
%arrayidx.8 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 8
%24 = load i8, ptr %arrayidx.8, align 1, !tbaa !14
%conv.8 = zext i8 %24 to i32
%arrayidx6.8 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 8
%25 = load i8, ptr %arrayidx6.8, align 1, !tbaa !14
%conv7.8 = zext i8 %25 to i32
%sub.8 = sub nsw i32 %conv.8, %conv7.8
%26 = tail call i32 @llvm.abs.i32(i32 %sub.8, i1 true)
%add.8 = add nsw i32 %26, %add.7
%arrayidx.9 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 9
%27 = load i8, ptr %arrayidx.9, align 1, !tbaa !14
%conv.9 = zext i8 %27 to i32
%arrayidx6.9 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 9
%28 = load i8, ptr %arrayidx6.9, align 1, !tbaa !14
%conv7.9 = zext i8 %28 to i32
%sub.9 = sub nsw i32 %conv.9, %conv7.9
%29 = tail call i32 @llvm.abs.i32(i32 %sub.9, i1 true)
%add.9 = add nsw i32 %29, %add.8
%arrayidx.10 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 10
%30 = load i8, ptr %arrayidx.10, align 1, !tbaa !14
%conv.10 = zext i8 %30 to i32
%arrayidx6.10 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 10
%31 = load i8, ptr %arrayidx6.10, align 1, !tbaa !14
%conv7.10 = zext i8 %31 to i32
%sub.10 = sub nsw i32 %conv.10, %conv7.10
%32 = tail call i32 @llvm.abs.i32(i32 %sub.10, i1 true)
%add.10 = add nsw i32 %32, %add.9
%arrayidx.11 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 11
%33 = load i8, ptr %arrayidx.11, align 1, !tbaa !14
%conv.11 = zext i8 %33 to i32
%arrayidx6.11 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 11
%34 = load i8, ptr %arrayidx6.11, align 1, !tbaa !14
%conv7.11 = zext i8 %34 to i32
%sub.11 = sub nsw i32 %conv.11, %conv7.11
%35 = tail call i32 @llvm.abs.i32(i32 %sub.11, i1 true)
%add.11 = add nsw i32 %35, %add.10
%arrayidx.12 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 12
%36 = load i8, ptr %arrayidx.12, align 1, !tbaa !14
%conv.12 = zext i8 %36 to i32
%arrayidx6.12 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 12
%37 = load i8, ptr %arrayidx6.12, align 1, !tbaa !14
%conv7.12 = zext i8 %37 to i32
%sub.12 = sub nsw i32 %conv.12, %conv7.12
%38 = tail call i32 @llvm.abs.i32(i32 %sub.12, i1 true)
%add.12 = add nsw i32 %38, %add.11
%arrayidx.13 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 13
%39 = load i8, ptr %arrayidx.13, align 1, !tbaa !14
%conv.13 = zext i8 %39 to i32
%arrayidx6.13 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 13
%40 = load i8, ptr %arrayidx6.13, align 1, !tbaa !14
%conv7.13 = zext i8 %40 to i32
%sub.13 = sub nsw i32 %conv.13, %conv7.13
%41 = tail call i32 @llvm.abs.i32(i32 %sub.13, i1 true)
%add.13 = add nsw i32 %41, %add.12
%arrayidx.14 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 14
%42 = load i8, ptr %arrayidx.14, align 1, !tbaa !14
%conv.14 = zext i8 %42 to i32
%arrayidx6.14 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 14
%43 = load i8, ptr %arrayidx6.14, align 1, !tbaa !14
%conv7.14 = zext i8 %43 to i32
%sub.14 = sub nsw i32 %conv.14, %conv7.14
%44 = tail call i32 @llvm.abs.i32(i32 %sub.14, i1 true)
%add.14 = add nsw i32 %44, %add.13
%arrayidx.15 = getelementptr inbounds i8, ptr %pix1.addr.023, i64 15
%45 = load i8, ptr %arrayidx.15, align 1, !tbaa !14
%conv.15 = zext i8 %45 to i32
%arrayidx6.15 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 15
%46 = load i8, ptr %arrayidx6.15, align 1, !tbaa !14
%conv7.15 = zext i8 %46 to i32
%sub.15 = sub nsw i32 %conv.15, %conv7.15
%47 = tail call i32 @llvm.abs.i32(i32 %sub.15, i1 true)
%add.15 = add nsw i32 %47, %add.14
%add.ptr = getelementptr inbounds i8, ptr %pix1.addr.023, i64 %idx.ext
%add.ptr9 = getelementptr inbounds i8, ptr %pix2.addr.022, i64 %idx.ext8
%inc11 = add nuw nsw i32 %y.025, 1
%exitcond.not = icmp eq i32 %inc11, 16
br i1 %exitcond.not, label %for.cond.cleanup, label %for.cond1.preheader, !llvm.loop !30
for.cond.cleanup: ; preds = %for.cond1.preheader
ret i32 %add.15
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment