forked from MihaZupan/runtime-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X64] [MihaZupan] Add SearchValues<char> implementation for two sets of 128 chars #464
Comments
Top method regressions15 (3.78 % of base) - System.Buffers.IndexOfAnyAsciiSearcher:ComputeAnyByteState(System.ReadOnlySpan`1[ubyte],byref) ; Assembly listing for method System.Buffers.IndexOfAnyAsciiSearcher:ComputeAnyByteState(System.ReadOnlySpan`1[ubyte],byref) (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; fully interruptible
; No PGO data
-; 0 inlinees with PGO data; 12 single block inlinees; 4 inlinees without PGO data
+; 0 inlinees with PGO data; 14 single block inlinees; 4 inlinees without PGO data
; Final local variable assignments
;
-; V00 arg0 [V00,T11] ( 3, 3 ) struct (16) [rbp-0x118] do-not-enreg[SA] multireg-arg single-def ptr <System.ReadOnlySpan`1[ubyte]>
-; V01 arg1 [V01,T10] ( 3, 3 ) byref -> rdx single-def
+; V00 arg0 [V00,T10] ( 3, 3 ) struct (16) [rbp-0x118] do-not-enreg[SA] multireg-arg single-def ptr <System.ReadOnlySpan`1[ubyte]>
+; V01 arg1 [V01,T09] ( 3, 3 ) byref -> rdx single-def
; V02 loc0 [V02 ] ( 3, 4 ) simd16 -> [rbp-0x130] do-not-enreg[XS] addr-exposed ld-addr-op <System.Runtime.Intrinsics.Vector128`1[ubyte]>
; V03 loc1 [V03 ] ( 3, 4 ) simd16 -> [rbp-0x140] do-not-enreg[XS] addr-exposed ld-addr-op <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V04 loc2 [V04,T17] ( 0, 0 ) long -> zero-ref ptr
-;* V05 loc3 [V05,T18] ( 0, 0 ) long -> zero-ref ptr
+;* V04 loc2 [V04,T18] ( 0, 0 ) long -> zero-ref ptr
+;* V05 loc3 [V05,T19] ( 0, 0 ) long -> zero-ref ptr
; V06 loc4 [V06 ] ( 3, 6 ) struct (32) [rbp-0x28] do-not-enreg[XS] addr-exposed ld-addr-op unsafe-buffer <System.Buffers.BitVector256>
;* V07 loc5 [V07 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op <System.ReadOnlySpan`1[ubyte]>
-;* V08 loc6 [V08,T19] ( 0, 0 ) int -> zero-ref ptr
-; V09 loc7 [V09,T01] ( 5, 20 ) ubyte -> rsi
-; V10 loc8 [V10,T03] ( 4, 12 ) int -> r8
-; V11 loc9 [V11,T06] ( 3, 8 ) int -> rsi
-; V12 tmp0 [V12,T20] ( 1, 1 ) int -> [rbp-0x144] do-not-enreg[V] "GSCookie dummy"
-;# V13 OutArgs [V13 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V14 tmp2 [V14,T04] ( 3, 12 ) long -> rsi "dup spill"
-; V15 tmp3 [V15,T05] ( 3, 12 ) long -> rsi "dup spill"
-; V16 tmp4 [V16,T14] ( 2, 4 ) struct (192) [rbp-0xE8] do-not-enreg[SF] ld-addr-op unsafe-buffer "NewObj constructor temp" <System.Buffers.IndexOfAnyAsciiSearcher+AnyByteState>
-; V17 tmp5 [V17,T07] ( 2, 8 ) int -> r8 "Inline stloc first use temp"
-; V18 tmp6 [V18,T08] ( 2, 8 ) int -> r9 "Inline stloc first use temp"
-; V19 tmp7 [V19,T00] ( 3, 24 ) byref -> r8 "dup spill"
-; V20 tmp8 [V20,T21] ( 3, 6 ) simd16 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-; V21 tmp9 [V21,T22] ( 3, 6 ) simd16 -> mm1 "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-; V22 tmp10 [V22,T15] ( 2, 4 ) struct (32) [rbp-0x108] do-not-enreg[S] unsafe-buffer "Inlining Arg" <System.Buffers.BitVector256>
-; V23 tmp11 [V23,T23] ( 3, 6 ) simd32 -> mm0 "spilled call-like call argument"
-;* V24 tmp12 [V24 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-; V25 tmp13 [V25,T27] ( 1, 1 ) simd64 -> [rbp-0x1B0] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
-; V26 tmp14 [V26,T24] ( 3, 6 ) simd32 -> mm1 "spilled call-like call argument"
-;* V27 tmp15 [V27 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-; V28 tmp16 [V28,T28] ( 1, 1 ) simd64 -> [rbp-0x1F0] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
-; V29 tmp17 [V29,T12] ( 2, 5 ) byref -> rax single-def "field V07._reference (fldOffset=0x0)" P-INDEP
-; V30 tmp18 [V30,T09] ( 3, 6 ) int -> rcx "field V07._length (fldOffset=0x8)" P-INDEP
-; V31 tmp19 [V31,T25] ( 2, 2 ) simd64 -> mm0 "V16.[000..064)"
-; V32 tmp20 [V32,T26] ( 2, 2 ) simd64 -> mm1 "V16.[064..128)"
-; V33 GsCookie [V33 ] ( 1, 1 ) long -> [rbp-0x08] do-not-enreg[X] addr-exposed "GSSecurityCookie"
-; V34 tmp22 [V34,T16] ( 3, 3 ) struct (16) [rbp-0x200] do-not-enreg[SA] multireg-arg must-init ptr "shadowVar" <System.ReadOnlySpan`1[ubyte]>
-; V35 tmp23 [V35,T13] ( 4, 4 ) byref -> rdx single-def "shadowVar"
-; V36 rat0 [V36,T02] ( 5, 17 ) long -> rdi "Widened IV V08"
+;* V08 loc6 [V08,T20] ( 0, 0 ) int -> zero-ref ptr
+; V09 loc7 [V09,T01] ( 7, 22 ) ubyte -> rsi
+; V10 tmp0 [V10,T21] ( 1, 1 ) int -> [rbp-0x144] do-not-enreg[V] "GSCookie dummy"
+;# V11 OutArgs [V11 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+; V12 tmp2 [V12,T13] ( 2, 4 ) struct (192) [rbp-0xE8] do-not-enreg[SF] ld-addr-op unsafe-buffer "NewObj constructor temp" <System.Buffers.IndexOfAnyAsciiSearcher+AnyByteState>
+; V13 tmp3 [V13,T06] ( 2, 8 ) int -> r8 "Inline stloc first use temp"
+; V14 tmp4 [V14,T07] ( 2, 8 ) int -> r9 "Inline stloc first use temp"
+; V15 tmp5 [V15,T00] ( 3, 24 ) byref -> r8 "dup spill"
+; V16 tmp6 [V16,T14] ( 2, 4 ) int -> r8 "Inline stloc first use temp"
+;* V17 tmp7 [V17 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V18 tmp8 [V18,T03] ( 3, 12 ) long -> rsi "dup spill"
+; V19 tmp9 [V19,T04] ( 3, 12 ) int -> rsi "Inlining Arg"
+; V20 tmp10 [V20,T15] ( 2, 4 ) int -> r8 "Inline stloc first use temp"
+;* V21 tmp11 [V21 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V22 tmp12 [V22,T05] ( 3, 12 ) long -> rsi "dup spill"
+; V23 tmp13 [V23,T22] ( 3, 6 ) simd16 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+; V24 tmp14 [V24,T23] ( 3, 6 ) simd16 -> mm1 "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+; V25 tmp15 [V25,T16] ( 2, 4 ) struct (32) [rbp-0x108] do-not-enreg[S] unsafe-buffer "Inlining Arg" <System.Buffers.BitVector256>
+; V26 tmp16 [V26,T24] ( 3, 6 ) simd32 -> mm0 "spilled call-like call argument"
+;* V27 tmp17 [V27 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+; V28 tmp18 [V28,T28] ( 1, 1 ) simd64 -> [rbp-0x1B0] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+; V29 tmp19 [V29,T25] ( 3, 6 ) simd32 -> mm1 "spilled call-like call argument"
+;* V30 tmp20 [V30 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+; V31 tmp21 [V31,T29] ( 1, 1 ) simd64 -> [rbp-0x1F0] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+; V32 tmp22 [V32,T11] ( 2, 5 ) byref -> rax single-def "field V07._reference (fldOffset=0x0)" P-INDEP
+; V33 tmp23 [V33,T08] ( 3, 6 ) int -> rcx "field V07._length (fldOffset=0x8)" P-INDEP
+; V34 tmp24 [V34,T26] ( 2, 2 ) simd64 -> mm0 "V12.[000..064)"
+; V35 tmp25 [V35,T27] ( 2, 2 ) simd64 -> mm1 "V12.[064..128)"
+; V36 GsCookie [V36 ] ( 1, 1 ) long -> [rbp-0x08] do-not-enreg[X] addr-exposed "GSSecurityCookie"
+; V37 tmp27 [V37,T17] ( 3, 3 ) struct (16) [rbp-0x200] do-not-enreg[SA] multireg-arg must-init ptr "shadowVar" <System.ReadOnlySpan`1[ubyte]>
+; V38 tmp28 [V38,T12] ( 4, 4 ) byref -> rdx single-def "shadowVar"
+; V39 rat0 [V39,T02] ( 5, 17 ) long -> rdi "Widened IV V08"
;
; Lcl frame size = 512
G_M64303_IG01:
push rbp
sub rsp, 512
lea rbp, [rsp+0x200]
xor eax, eax
mov qword ptr [rbp-0x200], rax
mov qword ptr [rbp-0x08], 0xD1FFAB1E
mov bword ptr [rbp-0x118], rdi
mov qword ptr [rbp-0x110], rsi
;; size=47 bbWeight=1 PerfScore 6.00
G_M64303_IG02:
vmovdqu xmm0, xmmword ptr [rbp-0x118]
vmovdqu xmmword ptr [rbp-0x200], xmm0
;; size=16 bbWeight=1 PerfScore 4.00
G_M64303_IG03:
vxorps xmm0, xmm0, xmm0
vmovaps xmmword ptr [rbp-0x130], xmm0
vmovaps xmmword ptr [rbp-0x140], xmm0
vxorps ymm0, ymm0, ymm0
vmovdqu ymmword ptr [rbp-0x28], ymm0
mov rax, bword ptr [rbp-0x200]
mov ecx, dword ptr [rbp-0x1F8]
xor edi, edi
test ecx, ecx
- jle SHORT G_M64303_IG08
+ jle G_M64303_IG08
align [0 bytes for IG04]
- ;; size=48 bbWeight=1 PerfScore 7.17
+ ;; size=52 bbWeight=1 PerfScore 7.17
G_M64303_IG04:
movzx rsi, byte ptr [rax+rdi]
mov r8d, esi
sar r8d, 5
mov r9d, 1
shlx r9d, r9d, esi
lea r10, bword ptr [rbp-0x28]
lea r8, bword ptr [r10+4*r8]
or dword ptr [r8], r9d
- mov r8d, esi
- sar r8d, 4
- and esi, 15
- cmp r8d, 8
+ cmp esi, 128
jge SHORT G_M64303_IG06
- ;; size=50 bbWeight=4 PerfScore 39.00
+ ;; size=42 bbWeight=4 PerfScore 35.00
G_M64303_IG05:
+ mov r8d, esi
+ sar r8d, 4
lea r9, [rbp-0x130]
- mov esi, esi
+ and esi, 15
add rsi, r9
mov r9d, 1
shlx r8d, r9d, r8d
movzx r8, r8b
or byte ptr [rsi], r8b
jmp SHORT G_M64303_IG07
- ;; size=32 bbWeight=2 PerfScore 14.00
+ ;; size=40 bbWeight=2 PerfScore 15.50
G_M64303_IG06:
+ add esi, -128
+ mov r8d, esi
+ sar r8d, 4
lea r9, [rbp-0x140]
- mov esi, esi
+ and esi, 15
add rsi, r9
- add r8d, -8
mov r9d, 1
shlx r8d, r9d, r8d
movzx r8, r8b
or byte ptr [rsi], r8b
- ;; size=34 bbWeight=2 PerfScore 10.50
+ ;; size=41 bbWeight=2 PerfScore 12.00
G_M64303_IG07:
inc edi
cmp edi, ecx
- jl SHORT G_M64303_IG04
- ;; size=6 bbWeight=4 PerfScore 6.00
+ jl G_M64303_IG04
+ ;; size=10 bbWeight=4 PerfScore 6.00
G_M64303_IG08:
vmovaps xmm0, xmmword ptr [rbp-0x130]
vmovaps xmm1, xmmword ptr [rbp-0x140]
vmovdqu ymm2, ymmword ptr [rbp-0x28]
vmovdqu ymmword ptr [rbp-0x108], ymm2
vmovaps ymm2, ymm0
vinserti128 ymm0, ymm2, xmm0, 1
vmovups zmm2, zmmword ptr [rbp-0x1B0]
vinsertf64x4 zmm2, zmm2, ymm0, 0
vinsertf64x4 zmm0, zmm2, ymm0, 1
vmovaps ymm2, ymm1
vinserti128 ymm1, ymm2, xmm1, 1
vmovups zmm2, zmmword ptr [rbp-0x1F0]
vinsertf64x4 zmm2, zmm2, ymm1, 0
vinsertf64x4 zmm1, zmm2, ymm1, 1
vmovdqu ymm2, ymmword ptr [rbp-0x108]
vmovdqu ymmword ptr [rbp-0x68], ymm2
vmovups zmmword ptr [rdx], zmm0
vmovups zmmword ptr [rdx+0x40], zmm1
vmovups ymm0, ymmword ptr [rbp-0x68]
vmovups ymmword ptr [rdx+0x80], ymm0
cmp qword ptr [rbp-0x08], 0xD1FFAB1E
je SHORT G_M64303_IG09
call CORINFO_HELP_FAIL_FAST
;; size=151 bbWeight=1 PerfScore 48.50
G_M64303_IG09:
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M64303_IG10:
vzeroupper
add rsp, 512
pop rbp
ret
;; size=12 bbWeight=1 PerfScore 2.75
-; Total bytes of code 397, prolog size 47, PerfScore 138.17, instruction count 81, allocated bytes for code 401 (MethodHash=97c904d0) for method System.Buffers.IndexOfAnyAsciiSearcher:ComputeAnyByteState(System.ReadOnlySpan`1[ubyte],byref) (FullOpts)
+; Total bytes of code 412, prolog size 47, PerfScore 137.17, instruction count 82, allocated bytes for code 412 (MethodHash=97c904d0) for method System.Buffers.IndexOfAnyAsciiSearcher:ComputeAnyByteState(System.ReadOnlySpan`1[ubyte],byref) (FullOpts) Note: some changes were skipped as they were too large to fit into a comment. Larger list of diffs: https://gist.github.com/MihuBot/4d0dd4484f7c8523a4e0a64b9031560c |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Job completed in 14 minutes.
dotnet/runtime#103216
Diffs
Artifacts:
The text was updated successfully, but these errors were encountered: