[WebAssembly] Add support for memcmp expansion#148298
Conversation
|
@llvm/pr-subscribers-backend-webassembly Author: jjasmine (badumbatish) ChangesFixes #61400 Added test case in llvm/test/CodeGen/WebAssembly/memcmp-expand.ll Full diff: https://github.com/llvm/llvm-project/pull/148298.diff 4 Files Affected:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index bf2e04caa0a61..a91a58db2a422 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -46,6 +46,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
: TargetLowering(TM), Subtarget(&STI) {
auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
+ // Set the load count for memcmp expand optimization
+ MaxLoadsPerMemcmp = 3;
+ MaxLoadsPerMemcmpOptSize = 2;
+
// Booleans always contain 0 or 1.
setBooleanContents(ZeroOrOneBooleanContent);
// Except in SIMD vectors
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index 4f159996e4c6c..3686fce33f3ca 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -141,6 +141,16 @@ InstructionCost WebAssemblyTTIImpl::getCastInstrCost(
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
}
+WebAssemblyTTIImpl::TTI::MemCmpExpansionOptions
+WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+ TTI::MemCmpExpansionOptions Options;
+ // INFO: I'm not sure what determines this, setting 2 conservatively
+ Options.NumLoadsPerBlock = 2;
+ Options.LoadSizes.append({8, 4, 2, 1});
+ Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+ return Options;
+}
+
InstructionCost WebAssemblyTTIImpl::getMemoryOpCost(
unsigned Opcode, Type *Ty, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index d83b8d1f45dbd..c915eeb07d4fd 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -73,6 +73,10 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) const override;
+
+ TTI::MemCmpExpansionOptions
+ enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
+
InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll
new file mode 100644
index 0000000000000..64476e3b0b844
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll
@@ -0,0 +1,146 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -O3 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+declare i32 @memcmp(ptr, ptr, i32)
+
+define i1 @memcmp_expand_3(ptr %a, ptr %b) {
+; CHECK-LABEL: memcmp_expand_3:
+; CHECK: .functype memcmp_expand_3 (i32, i32) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.load16_u $push7=, 0($0):p2align=0
+; CHECK-NEXT: i32.load16_u $push6=, 0($1):p2align=0
+; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6
+; CHECK-NEXT: i32.const $push0=, 2
+; CHECK-NEXT: i32.add $push3=, $0, $pop0
+; CHECK-NEXT: i32.load8_u $push4=, 0($pop3)
+; CHECK-NEXT: i32.const $push13=, 2
+; CHECK-NEXT: i32.add $push1=, $1, $pop13
+; CHECK-NEXT: i32.load8_u $push2=, 0($pop1)
+; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2
+; CHECK-NEXT: i32.or $push9=, $pop8, $pop5
+; CHECK-NEXT: i32.const $push10=, 65535
+; CHECK-NEXT: i32.and $push11=, $pop9, $pop10
+; CHECK-NEXT: i32.eqz $push12=, $pop11
+; CHECK-NEXT: return $pop12
+ %cmp_3 = call i32 @memcmp(ptr %a, ptr %b, i32 3)
+ %res = icmp eq i32 %cmp_3, 0
+ ret i1 %res
+}
+
+define i1 @memcmp_expand_5(ptr %a, ptr %b) {
+; CHECK-LABEL: memcmp_expand_5:
+; CHECK: .functype memcmp_expand_5 (i32, i32) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.load $push7=, 0($0):p2align=0
+; CHECK-NEXT: i32.load $push6=, 0($1):p2align=0
+; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6
+; CHECK-NEXT: i32.const $push0=, 4
+; CHECK-NEXT: i32.add $push3=, $0, $pop0
+; CHECK-NEXT: i32.load8_u $push4=, 0($pop3)
+; CHECK-NEXT: i32.const $push11=, 4
+; CHECK-NEXT: i32.add $push1=, $1, $pop11
+; CHECK-NEXT: i32.load8_u $push2=, 0($pop1)
+; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2
+; CHECK-NEXT: i32.or $push9=, $pop8, $pop5
+; CHECK-NEXT: i32.eqz $push10=, $pop9
+; CHECK-NEXT: return $pop10
+ %cmp_5 = call i32 @memcmp(ptr %a, ptr %b, i32 5)
+ %res = icmp eq i32 %cmp_5, 0
+ ret i1 %res
+}
+
+define i1 @memcmp_expand_7(ptr %a, ptr %b) {
+; CHECK-LABEL: memcmp_expand_7:
+; CHECK: .functype memcmp_expand_7 (i32, i32) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: block
+; CHECK-NEXT: block
+; CHECK-NEXT: i32.load $push7=, 0($0):p2align=0
+; CHECK-NEXT: i32.load $push6=, 0($1):p2align=0
+; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6
+; CHECK-NEXT: i32.const $push0=, 4
+; CHECK-NEXT: i32.add $push3=, $0, $pop0
+; CHECK-NEXT: i32.load16_u $push4=, 0($pop3):p2align=0
+; CHECK-NEXT: i32.const $push17=, 4
+; CHECK-NEXT: i32.add $push1=, $1, $pop17
+; CHECK-NEXT: i32.load16_u $push2=, 0($pop1):p2align=0
+; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2
+; CHECK-NEXT: i32.or $push9=, $pop8, $pop5
+; CHECK-NEXT: br_if 0, $pop9 # 0: down to label1
+; CHECK-NEXT: # %bb.1: # %loadbb1
+; CHECK-NEXT: i32.const $2=, 0
+; CHECK-NEXT: i32.const $push10=, 6
+; CHECK-NEXT: i32.add $push13=, $0, $pop10
+; CHECK-NEXT: i32.load8_u $push14=, 0($pop13)
+; CHECK-NEXT: i32.const $push18=, 6
+; CHECK-NEXT: i32.add $push11=, $1, $pop18
+; CHECK-NEXT: i32.load8_u $push12=, 0($pop11)
+; CHECK-NEXT: i32.eq $push15=, $pop14, $pop12
+; CHECK-NEXT: br_if 1, $pop15 # 1: down to label0
+; CHECK-NEXT: .LBB2_2: # %res_block
+; CHECK-NEXT: end_block # label1:
+; CHECK-NEXT: i32.const $2=, 1
+; CHECK-NEXT: .LBB2_3: # %endblock
+; CHECK-NEXT: end_block # label0:
+; CHECK-NEXT: i32.eqz $push16=, $2
+; CHECK-NEXT: return $pop16
+ %cmp_7 = call i32 @memcmp(ptr %a, ptr %b, i32 7)
+ %res = icmp eq i32 %cmp_7, 0
+ ret i1 %res
+}
+
+define i1 @memcmp_expand_2(ptr %a, ptr %b) {
+; CHECK-LABEL: memcmp_expand_2:
+; CHECK: .functype memcmp_expand_2 (i32, i32) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.load16_u $push1=, 0($0):p2align=0
+; CHECK-NEXT: i32.load16_u $push0=, 0($1):p2align=0
+; CHECK-NEXT: i32.eq $push2=, $pop1, $pop0
+; CHECK-NEXT: return $pop2
+ %cmp_2 = call i32 @memcmp(ptr %a, ptr %b, i32 2)
+ %res = icmp eq i32 %cmp_2, 0
+ ret i1 %res
+}
+
+
+define i1 @memcmp_expand_8(ptr %a, ptr %b) {
+; CHECK-LABEL: memcmp_expand_8:
+; CHECK: .functype memcmp_expand_8 (i32, i32) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i64.load $push1=, 0($0):p2align=0
+; CHECK-NEXT: i64.load $push0=, 0($1):p2align=0
+; CHECK-NEXT: i64.eq $push2=, $pop1, $pop0
+; CHECK-NEXT: return $pop2
+ %cmp_8 = call i32 @memcmp(ptr %a, ptr %b, i32 8)
+ %res = icmp eq i32 %cmp_8, 0
+ ret i1 %res
+}
+
+
+define i1 @memcmp_expand_16(ptr %a, ptr %b) {
+; CHECK-LABEL: memcmp_expand_16:
+; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i64.load $push7=, 0($0):p2align=0
+; CHECK-NEXT: i64.load $push6=, 0($1):p2align=0
+; CHECK-NEXT: i64.xor $push8=, $pop7, $pop6
+; CHECK-NEXT: i32.const $push0=, 8
+; CHECK-NEXT: i32.add $push3=, $0, $pop0
+; CHECK-NEXT: i64.load $push4=, 0($pop3):p2align=0
+; CHECK-NEXT: i32.const $push11=, 8
+; CHECK-NEXT: i32.add $push1=, $1, $pop11
+; CHECK-NEXT: i64.load $push2=, 0($pop1):p2align=0
+; CHECK-NEXT: i64.xor $push5=, $pop4, $pop2
+; CHECK-NEXT: i64.or $push9=, $pop8, $pop5
+; CHECK-NEXT: i64.eqz $push10=, $pop9
+; CHECK-NEXT: return $pop10
+ %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16)
+ %res = icmp eq i32 %cmp_16, 0
+ ret i1 %res
+}
+
+
+
+
|
|
@lukel97 hi Luke! would love your review :) |
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
lukel97
left a comment
There was a problem hiding this comment.
LGTM, some last little nits
| ret i1 %res | ||
| } | ||
|
|
||
| define i1 @memcmp_expand_2(ptr %a, ptr %b) { |
There was a problem hiding this comment.
Can you add a test where the pointers are aligned and see if we omit the p2aligns?
So basically the same test as this but just with
define i1 @memcmp_expand_2_align(ptr align(2) %a, ptr align(2) %b) {
There was a problem hiding this comment.
yep looks like it works
define i1 @memcmp_expand_2(ptr %a, ptr %b) {
; CHECK-LABEL: memcmp_expand_2:
; CHECK: .functype memcmp_expand_2 (i32, i32) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.load16_u $push1=, 0($0):p2align=0
; CHECK-NEXT: i32.load16_u $push0=, 0($1):p2align=0
; CHECK-NEXT: i32.eq $push2=, $pop1, $pop0
; CHECK-NEXT: return $pop2
%cmp_2 = call i32 @memcmp(ptr %a, ptr %b, i32 2)
%res = icmp eq i32 %cmp_2, 0
ret i1 %res
}
define i1 @memcmp_expand_2_align(ptr align(2) %a, ptr align(2) %b) {
; CHECK-LABEL: memcmp_expand_2_align:
; CHECK: .functype memcmp_expand_2_align (i32, i32) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.load16_u $push1=, 0($0)
; CHECK-NEXT: i32.load16_u $push0=, 0($1)
; CHECK-NEXT: i32.eq $push2=, $pop1, $pop0
; CHECK-NEXT: return $pop2
%cmp_2 = call i32 @memcmp(ptr %a, ptr %b, i32 2)
%res = icmp eq i32 %cmp_2, 0
ret i1 %res
}
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/27209 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/195/builds/12089 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/52/builds/9823 Here is the relevant piece of the build log for the reference |
Fixes llvm#61400 Added test case in llvm/test/CodeGen/WebAssembly/memcmp-expand.ll
Fixes #61400
Added test case in llvm/test/CodeGen/WebAssembly/memcmp-expand.ll