cmd/internal/obj/ppc64: allow VR register arguments to VS registers

author Paul E. Murphy <murp@ibm.com>

Tue, 9 Mar 2021 22:55:31 +0000 (16:55 -0600)

committer Lynn Boger <laboger@linux.vnet.ibm.com>

Tue, 14 Sep 2021 13:53:08 +0000 (13:53 +0000)
author Paul E. Murphy <murp@ibm.com>
Tue, 9 Mar 2021 22:55:31 +0000 (16:55 -0600)
committer Lynn Boger <laboger@linux.vnet.ibm.com>
Tue, 14 Sep 2021 13:53:08 +0000 (13:53 +0000)
diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s

index b6c0aa5035c013652dcd7df91505d78cfd9ac6a7..28ceb621cb3315d643f0c51f88858abc59587dab 100644 (file)
--- a/src/cmd/asm/internal/asm/testdata/ppc64.s
+++ b/src/cmd/asm/internal/asm/testdata/ppc64.s
@@ -649,6 +649,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
         LXVB16X (R3)(R4), VS1           // 7c241ed8
         LXVW4X (R3)(R4), VS1            // 7c241e18
         LXV 16(R3), VS1                 // f4230011
+       LXV 16(R3), VS33                // f4230019
+       LXV 16(R3), V1                  // f4230019
         LXVL R3, R4, VS1                // 7c23221a
         LXVLL R3, R4, VS1               // 7c23225a
         LXVX R3, R4, VS1                // 7c232218
@@ -668,8 +670,13 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
         MTFPRD R3, F0                   // 7c030166
         MFVRD V0, R3                    // 7c030067
         MFVSRLD VS63,R4                 // 7fe40267
+       MFVSRLD V31,R4                  // 7fe40267
         MFVSRWZ VS33,R4                 // 7c2400e7
+       MFVSRWZ V1,R4                   // 7c2400e7
         MTVSRD R3, VS1                  // 7c230166
+       MTVSRDD R3, R4, VS1             // 7c232366
+       MTVSRDD R3, R4, VS33            // 7c232367
+       MTVSRDD R3, R4, V1              // 7c232367
         MTVRD R3, V13                   // 7da30167
         MTVSRWA R4, VS31                // 7fe401a6
         MTVSRWS R4, VS32                // 7c040327
@@ -678,6 +685,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
         XXBRW VS1, VS2                  // f04f0f6c
         XXBRH VS2, VS3                  // f067176c
         XXLAND VS1, VS2, VS3            // f0611410
+       XXLAND V1, V2, V3               // f0611417
+       XXLAND VS33, VS34, VS35         // f0611417
         XXLANDC VS1, VS2, VS3           // f0611450
         XXLEQV VS0, VS1, VS2            // f0400dd0
         XXLNAND VS0, VS1, VS2           // f0400d90
@@ -687,11 +696,17 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
         XXLORQ VS1, VS2, VS3            // f0611490
         XXLXOR VS1, VS2, VS3            // f06114d0
         XXSEL VS1, VS2, VS3, VS4        // f08110f0
+       XXSEL VS33, VS34, VS35, VS36    // f08110ff
+       XXSEL V1, V2, V3, V4            // f08110ff
         XXMRGHW VS1, VS2, VS3           // f0611090
         XXMRGLW VS1, VS2, VS3           // f0611190
         XXSPLTW VS1, $1, VS2            // f0410a90
+       XXSPLTW VS33, $1, VS34          // f0410a93
+       XXSPLTW V1, $1, V2              // f0410a93
         XXPERM VS1, VS2, VS3            // f06110d0
         XXSLDWI VS1, VS2, $1, VS3       // f0611110
+       XXSLDWI V1, V2, $1, V3          // f0611117
+       XXSLDWI VS33, VS34, $1, VS35    // f0611117
         XSCVDPSP VS1, VS2               // f0400c24
         XVCVDPSP VS1, VS2               // f0400e24
         XSCVSXDDP VS1, VS2              // f0400de0
diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go

index e57beb3276c0d7732f36d2d5a5039d86731ebd7b..dda24a0b966c4479ed088cc47b6c2fa5f84c3efe 100644 (file)
--- a/src/cmd/internal/obj/ppc64/a.out.go
+++ b/src/cmd/internal/obj/ppc64/a.out.go
@@ -79,8 +79,10 @@ const (
         REG_R30
         REG_R31
  
-       /* F0=4128 ... F31=4159 */
-       REG_F0
+       /* Align FPR and VSR vectors such that when masked with 0x3F they produce
+          an equivalent VSX register. */
+       /* F0=4160 ... F31=4191 */
+       REG_F0 = obj.RBasePPC64 + iota + 32
         REG_F1
         REG_F2
         REG_F3
@@ -113,7 +115,7 @@ const (
         REG_F30
         REG_F31
  
-       /* V0=4160 ... V31=4191 */
+       /* V0=4192 ... V31=4223 */
         REG_V0
         REG_V1
         REG_V2
@@ -147,7 +149,7 @@ const (
         REG_V30
         REG_V31
  
-       /* VS0=4192 ... VS63=4255 */
+       /* VS0=4224 ... VS63=4287 */
         REG_VS0
         REG_VS1
         REG_VS2
diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go

index e642413590d7d94139aa5f9395de8d4da926586d..1d92c4866f15d99c940e8c5025b4138db3ef2bbb 100644 (file)
--- a/src/cmd/internal/obj/ppc64/asm9.go
+++ b/src/cmd/internal/obj/ppc64/asm9.go
@@ -428,15 +428,13 @@ var optab = []Optab{
         {as: ASTXSIWX, a1: C_VSREG, a6: C_SOREG, type_: 86, size: 4}, /* vsx scalar as integer store, xx1-form */
  
         /* VSX move from VSR */
-       {as: AMFVSRD, a1: C_VSREG, a6: C_REG, type_: 88, size: 4}, /* vsx move from vsr, xx1-form */
+       {as: AMFVSRD, a1: C_VSREG, a6: C_REG, type_: 88, size: 4},
         {as: AMFVSRD, a1: C_FREG, a6: C_REG, type_: 88, size: 4},
-       {as: AMFVSRD, a1: C_VREG, a6: C_REG, type_: 88, size: 4},
  
         /* VSX move to VSR */
-       {as: AMTVSRD, a1: C_REG, a6: C_VSREG, type_: 88, size: 4}, /* vsx move to vsr, xx1-form */
-       {as: AMTVSRD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 88, size: 4},
-       {as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 88, size: 4},
-       {as: AMTVSRD, a1: C_REG, a6: C_VREG, type_: 88, size: 4},
+       {as: AMTVSRD, a1: C_REG, a6: C_VSREG, type_: 104, size: 4},
+       {as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 104, size: 4},
+       {as: AMTVSRDD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 104, size: 4},
  
         /* VSX logical */
         {as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx and, xx3-form */
@@ -1036,13 +1034,14 @@ func (c *ctxt9) oplook(p *obj.Prog) *Optab {
         // c.ctxt.Logf("oplook %v %d %d %d %d\n", p, a1, a2, a3, a4, a5, a6)
         ops := oprange[p.As&obj.AMask]
         c1 := &xcmp[a1]
+       c2 := &xcmp[a2]
         c3 := &xcmp[a3]
         c4 := &xcmp[a4]
         c5 := &xcmp[a5]
         c6 := &xcmp[a6]
         for i := range ops {
                 op := &ops[i]
-               if int(op.a2) == a2 && c1[op.a1] && c3[op.a3] && c4[op.a4] && c5[op.a5] && c6[op.a6] {
+               if c1[op.a1] && c2[op.a2] && c3[op.a3] && c4[op.a4] && c5[op.a5] && c6[op.a6] {
                         p.Optab = uint16(cap(optab) - cap(ops) + i + 1)
                         return op
                 }
@@ -1116,6 +1115,12 @@ func cmp(a int, b int) bool {
                         return r0iszero != 0 /*TypeKind(100016)*/
                 }
  
+       case C_VSREG:
+               /* Allow any VR argument as a VSR operand. */
+               if b == C_VREG {
+                       return true
+               }
+
         case C_ANY:
                 return true
         }
@@ -1594,7 +1599,6 @@ func buildop(ctxt *obj.Link) {
                         opset(AMTVRD, r0)
                         opset(AMTVSRWA, r0)
                         opset(AMTVSRWZ, r0)
-                       opset(AMTVSRDD, r0)
                         opset(AMTVSRWS, r0)
  
                 case AXXLAND: /* xxland, xxlandc, xxleqv, xxlnand */
@@ -1977,6 +1981,7 @@ func buildop(ctxt *obj.Link) {
                         ACMPEQB,
                         AECIWX,
                         ACLRLSLWI,
+                       AMTVSRDD,
                         obj.ANOP,
                         obj.ATEXT,
                         obj.AUNDEF,
@@ -2075,50 +2080,32 @@ func AOP_IR(op uint32, d uint32, simm uint32) uint32 {
  }
  
  /* XX1-form 3-register operands, 1 VSR operand */
-func AOP_XX1(op uint32, d uint32, a uint32, b uint32) uint32 {
-       /* For the XX-form encodings, we need the VSX register number to be exactly */
-       /* between 0-63, so we can properly set the rightmost bits. */
-       r := d - REG_VS0
+func AOP_XX1(op uint32, r uint32, a uint32, b uint32) uint32 {
         return op | (r&31)<<21 | (a&31)<<16 | (b&31)<<11 | (r&32)>>5
  }
  
  /* XX2-form 3-register operands, 2 VSR operands */
-func AOP_XX2(op uint32, d uint32, a uint32, b uint32) uint32 {
-       xt := d - REG_VS0
-       xb := b - REG_VS0
+func AOP_XX2(op uint32, xt uint32, a uint32, xb uint32) uint32 {
         return op | (xt&31)<<21 | (a&3)<<16 | (xb&31)<<11 | (xb&32)>>4 | (xt&32)>>5
  }
  
  /* XX3-form 3 VSR operands */
-func AOP_XX3(op uint32, d uint32, a uint32, b uint32) uint32 {
-       xt := d - REG_VS0
-       xa := a - REG_VS0
-       xb := b - REG_VS0
+func AOP_XX3(op uint32, xt uint32, xa uint32, xb uint32) uint32 {
         return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
  }
  
  /* XX3-form 3 VSR operands + immediate */
-func AOP_XX3I(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 {
-       xt := d - REG_VS0
-       xa := a - REG_VS0
-       xb := b - REG_VS0
+func AOP_XX3I(op uint32, xt uint32, xa uint32, xb uint32, c uint32) uint32 {
         return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (c&3)<<8 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
  }
  
  /* XX4-form, 4 VSR operands */
-func AOP_XX4(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 {
-       xt := d - REG_VS0
-       xa := a - REG_VS0
-       xb := b - REG_VS0
-       xc := c - REG_VS0
+func AOP_XX4(op uint32, xt uint32, xa uint32, xb uint32, xc uint32) uint32 {
         return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xc&31)<<6 | (xc&32)>>2 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
  }
  
  /* DQ-form, VSR register, register + offset operands */
-func AOP_DQ(op uint32, d uint32, a uint32, b uint32) uint32 {
-       /* For the DQ-form encodings, we need the VSX register number to be exactly */
-       /* between 0-63, so we can properly set the SX bit. */
-       r := d - REG_VS0
+func AOP_DQ(op uint32, xt uint32, a uint32, b uint32) uint32 {
         /* The EA for this instruction form is (RA) + DQ << 4, where DQ is a 12-bit signed integer. */
         /* In order to match the output of the GNU objdump (and make the usage in Go asm easier), the */
         /* instruction is called using the sign extended value (i.e. a valid offset would be -32752 or 32752, */
@@ -2126,7 +2113,7 @@ func AOP_DQ(op uint32, d uint32, a uint32, b uint32) uint32 {
         /* bits 0 to 3 in 'dq' need to be zero, otherwise this will generate an illegal instruction. */
         /* If in doubt how this instruction form is encoded, refer to ISA 3.0b, pages 492 and 507. */
         dq := b >> 4
-       return op | (r&31)<<21 | (a&31)<<16 | (dq&4095)<<4 | (r&32)>>2
+       return op | (xt&31)<<21 | (a&31)<<16 | (dq&4095)<<4 | (xt&32)>>2
  }
  
  /* Z23-form, 3-register operands + CY field */
@@ -3586,33 +3573,8 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                 /* 3-register operand order: (RB)(RA*1), XT */
                 o1 = AOP_XX1(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(p.From.Reg))
  
-       case 88: /* VSX instructions, XX1-form */
-               /* reg reg none OR reg reg reg */
-               /* 3-register operand order: RA, RB, XT */
-               /* 2-register operand order: XS, RA or RA, XT */
-               xt := int32(p.To.Reg)
-               xs := int32(p.From.Reg)
-               /* We need to treat the special case of extended mnemonics that may have a FREG/VREG as an argument */
-               if REG_V0 <= xt && xt <= REG_V31 {
-                       /* Convert V0-V31 to VS32-VS63 */
-                       xt = xt + 64
-                       o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg))
-               } else if REG_F0 <= xt && xt <= REG_F31 {
-                       /* Convert F0-F31 to VS0-VS31 */
-                       xt = xt + 64
-                       o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg))
-               } else if REG_VS0 <= xt && xt <= REG_VS63 {
-                       o1 = AOP_XX1(c.oprrr(p.As), uint32(xt), uint32(p.From.Reg), uint32(p.Reg))
-               } else if REG_V0 <= xs && xs <= REG_V31 {
-                       /* Likewise for XS */
-                       xs = xs + 64
-                       o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg))
-               } else if REG_F0 <= xs && xs <= REG_F31 {
-                       xs = xs + 64
-                       o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg))
-               } else if REG_VS0 <= xs && xs <= REG_VS63 {
-                       o1 = AOP_XX1(c.oprrr(p.As), uint32(xs), uint32(p.To.Reg), uint32(p.Reg))
-               }
+       case 88: /* VSX mfvsr* instructions, XX1-form XS,RA */
+               o1 = AOP_XX1(c.oprrr(p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg))
  
         case 89: /* VSX instructions, XX2-form */
                 /* reg none reg OR reg imm reg */
@@ -3743,6 +3705,9 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                 mb := uint32(c.regoff(&p.RestArgs[0].Addr))
                 me := uint32(c.regoff(&p.RestArgs[1].Addr))
                 o1 = OP_RLW(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), mb, me)
+
+       case 104: /* VSX mtvsr* instructions, XX1-form RA,RB,XT */
+               o1 = AOP_XX1(c.oprrr(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg))
         }
  
         out[0] = o1
diff --git a/src/cmd/internal/obj/ppc64/asm_test.go b/src/cmd/internal/obj/ppc64/asm_test.go

index 70dabc20175a61ffddb75683b8056b3f6cfe6d0a..b851d3c86b99a7f220e224869723175816d4ebe0 100644 (file)
--- a/src/cmd/internal/obj/ppc64/asm_test.go
+++ b/src/cmd/internal/obj/ppc64/asm_test.go
@@ -107,3 +107,44 @@ func TestPCalign(t *testing.T) {
                 t.Errorf("Invalid alignment not detected for PCALIGN\n")
         }
  }
+
+// Verify register constants are correctly aligned. Much of the ppc64 assembler assumes masking out significant
+// bits will produce a valid register number:
+// REG_Rx & 31 == x
+// REG_Fx & 31 == x
+// REG_Vx & 31 == x
+// REG_VSx & 63 == x
+// REG_SPRx & 1023 == x
+// REG_CRx & 7 == x
+//
+// VR and FPR disjointly overlap VSR, interpreting as VSR registers should produce the correctly overlapped VSR.
+// REG_FPx & 63 == x
+// REG_Vx & 63 == x + 32
+func TestRegValueAlignment(t *testing.T) {
+       tstFunc := func(rstart, rend, msk, rout int) {
+               for i := rstart; i <= rend; i++ {
+                       if i&msk != rout {
+                               t.Errorf("%v is not aligned to 0x%X (expected %d, got %d)\n", rconv(i), msk, rout, rstart&msk)
+                       }
+                       rout++
+               }
+       }
+       var testType = []struct {
+               rstart int
+               rend   int
+               msk    int
+               rout   int
+       }{
+               {REG_VS0, REG_VS63, 63, 0},
+               {REG_R0, REG_R31, 31, 0},
+               {REG_F0, REG_F31, 31, 0},
+               {REG_V0, REG_V31, 31, 0},
+               {REG_V0, REG_V31, 63, 32},
+               {REG_F0, REG_F31, 63, 0},
+               {REG_SPR0, REG_SPR0 + 1023, 1023, 0},
+               {REG_CR0, REG_CR7, 7, 0},
+       }
+       for _, t := range testType {
+               tstFunc(t.rstart, t.rend, t.msk, t.rout)
+       }
+}
author	Paul E. Murphy <murp@ibm.com>
	Tue, 9 Mar 2021 22:55:31 +0000 (16:55 -0600)
committer	Lynn Boger <laboger@linux.vnet.ibm.com>
	Tue, 14 Sep 2021 13:53:08 +0000 (13:53 +0000)
src/cmd/asm/internal/asm/testdata/ppc64.s		patch \| blob \| history
src/cmd/internal/obj/ppc64/a.out.go		patch \| blob \| history
src/cmd/internal/obj/ppc64/asm9.go		patch \| blob \| history
src/cmd/internal/obj/ppc64/asm_test.go		patch \| blob \| history