Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Relax validation of SCATTER/GATHER memory ops to spec, Fixes #193 #233

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion operand/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ func IsVmz(op Op) bool {

func isvm(op Op, idx func(Op) bool) bool {
m, ok := op.(Mem)
return ok && IsR64(m.Base) && idx(m.Index)
return ok && (m.Base == nil || IsR64(m.Base)) && idx(m.Index)
}

// IsREL8 returns true if op is an 8-bit offset relative to instruction pointer.
Expand Down
2 changes: 1 addition & 1 deletion pass/verify.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func VerifyMemOperands(i *ir.Instruction) error {
continue
}

if m.Base == nil {
if m.Base == nil && !(operand.IsVmx(m) || operand.IsVmy(m) || operand.IsVmz(m)) {
return errors.New("bad memory operand: missing base register")
}

Expand Down
64 changes: 64 additions & 0 deletions tests/fixedbugs/issue193/asm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//go:build ignore
// +build ignore

package main

import (
. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
. "github.com/mmcloughlin/avo/reg"
)

var offQ Mem

func generateAddSub(useBase bool) {
funcName := "AddSubPairs"
if !useBase {
funcName += "NoBase"
}

TEXT(funcName, 0, "func(a *[8]int64)")
Doc("Destructively Add/Subtract successive pairs")
offsets, evenVals, oddVals, result := YMM(), YMM(), YMM(), YMM()

VMOVDQU64(offQ, offsets)
var aMem Mem

if useBase {
aPtr := Load(Param("a"), GP64())
aMem = Mem{Base: aPtr, Index: offsets, Scale: 8}
} else {
aPtrMem, err := Param("a").Resolve()
if err != nil {
panic(err)
}
VPSLLQ(Imm(3), offsets, offsets)
VPADDQ_BCST(aPtrMem.Addr, offsets, offsets)
aMem = Mem{Index: offsets, Scale: 1}
}

k := K()
KXNORB(K0, K0, k)
VPGATHERQQ(aMem, k, evenVals)
KXNORB(K0, K0, k)
VPGATHERQQ(aMem.Offset(8), k, oddVals)
VPADDQ(oddVals, evenVals, result)
KXNORB(K0, K0, k)
VPSCATTERQQ(result, k, aMem)
VPSUBQ(oddVals, evenVals, result)
KXNORB(K0, K0, k)
VPSCATTERQQ(result, k, aMem.Offset(8))
RET()
}

func main() {
offQ = GLOBL("offQ", RODATA|NOPTR)
for i := 0; i < 4; i++ {
DATA(i*8, U64(2*i))
}

generateAddSub(true)
generateAddSub(false)

Generate()
}
21 changes: 21 additions & 0 deletions tests/fixedbugs/issue193/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Package issue193 tests for the ability to generate VPSCATTER{D,Q}{D,Q} and
// VPGATHER{D,Q}{D,Q} instructions *without* specifying a base register in
// the destination/source VSIB addressing (VM{64,32}{x,y,z}) memory operands.
package issue193

// From: https://www.felixcloutier.com/x86/vpscatterdd:vpscatterdq:vpscatterqd:vpscatterqq
//
// BASE_ADDR stands for the memory operand base address (a GPR); *may not exist*
// VINDEX stands for the memory operand vector of indices (a ZMM register)
// SCALE stands for the memory operand scalar (1, 2, 4 or 8)
// DISP is the optional 1 or 4 byte displacement
//
// For example, prior to this fix Avo requires:
//
// VPSCATTERQQ Z6, K1, 8(BX)(Z7*1)
//
// with BX = 0, when:
//
// VPSCATTERQQ Z6, K1, 8(Z7*1)
//
// is perfectly valid and will suffice.
44 changes: 44 additions & 0 deletions tests/fixedbugs/issue193/issue193.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Code generated by command: go run asm.go -out issue193.s -stubs stub.go. DO NOT EDIT.

#include "textflag.h"

DATA offQ<>+0(SB)/8, $0x0000000000000000
DATA offQ<>+8(SB)/8, $0x0000000000000002
DATA offQ<>+16(SB)/8, $0x0000000000000004
DATA offQ<>+24(SB)/8, $0x0000000000000006
GLOBL offQ<>(SB), RODATA|NOPTR, $32

// func AddSubPairs(a *[8]int64)
// Requires: AVX2, AVX512DQ, AVX512F, AVX512VL
TEXT ·AddSubPairs(SB), $0-8
VMOVDQU64 offQ<>+0(SB), Y0
MOVQ a+0(FP), AX
KXNORB K0, K0, K1
VPGATHERQQ (AX)(Y0*8), K1, Y1
KXNORB K0, K0, K1
VPGATHERQQ 8(AX)(Y0*8), K1, Y2
VPADDQ Y2, Y1, Y3
KXNORB K0, K0, K1
VPSCATTERQQ Y3, K1, (AX)(Y0*8)
VPSUBQ Y2, Y1, Y3
KXNORB K0, K0, K1
VPSCATTERQQ Y3, K1, 8(AX)(Y0*8)
RET

// func AddSubPairsNoBase(a *[8]int64)
// Requires: AVX2, AVX512DQ, AVX512F, AVX512VL
TEXT ·AddSubPairsNoBase(SB), $0-8
VMOVDQU64 offQ<>+0(SB), Y0
VPSLLQ $0x03, Y0, Y0
VPADDQ.BCST a+0(FP), Y0, Y0
KXNORB K0, K0, K1
VPGATHERQQ (Y0*1), K1, Y1
KXNORB K0, K0, K1
VPGATHERQQ 8(Y0*1), K1, Y2
VPADDQ Y2, Y1, Y3
KXNORB K0, K0, K1
VPSCATTERQQ Y3, K1, (Y0*1)
VPSUBQ Y2, Y1, Y3
KXNORB K0, K0, K1
VPSCATTERQQ Y3, K1, 8(Y0*1)
RET
30 changes: 30 additions & 0 deletions tests/fixedbugs/issue193/issue193_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package issue193

import (
"testing"

"golang.org/x/sys/cpu"
)

//go:generate go run asm.go -out issue193.s -stubs stub.go

func TestAddSubs(t *testing.T) {
if !(cpu.X86.HasAVX2 && cpu.X86.HasAVX512F && cpu.X86.HasAVX512DQ && cpu.X86.HasAVX512VL) {
t.Skipf("Skipping issue193 test because runtime lacks needed CPUID features")
}

in := [8]int64{7, 6, 5, 4, 3, 2, 1, 0}
out := [8]int64{13, 1, 9, 1, 5, 1, 1, 1}

val := in
AddSubPairs(&val)
if val != out {
t.Errorf("Bad result %v", val)
}

val = in
AddSubPairsNoBase(&val)
if val != out {
t.Errorf("Bad result %v", val)
}
}
9 changes: 9 additions & 0 deletions tests/fixedbugs/issue193/stub.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.