[PATCH 0/4] Arm64: corrections to recent F64MM / I8MM additions

classic Classic list List threaded Threaded
10 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH 0/4] Arm64: corrections to recent F64MM / I8MM additions

Jan Beulich-2
Judging from the specification, there seem to be a number of issues.
However, I may well be wrong with some or all of them - both the
documentation or my reading of it may as well be incorrect.

1: correct 64-bit element fmmla encoding
2: correct uzp{1,2} mnemonics
3: correct {su,us}dot SIMD encodings
4: correct address index operands for LD1RO{H,W,D}

Jan
Reply | Threaded
Open this post in threaded view
|

[PATCH 1/4] Arm64: correct 64-bit element fmmla encoding

Jan Beulich-2
There's just one bit of difference to the 32-bit element form, as
per the documentation.

gas/
2020-01-XX  Jan Beulich  <[hidden email]>

        * testsuite/gas/aarch64/f64mm.d,
        testsuite/gas/aarch64/sve-movprfx-mm.d: Adjust expectations.

opcodes/
2020-01-XX  Jan Beulich  <[hidden email]>

        * opcodes/aarch64-tbl.h (aarch64_opcode_table): Correct 64-bit
        FMMLA encoding.
        * opcodes/aarch64-dis-2.c: Re-generate.

--- a/gas/testsuite/gas/aarch64/f64mm.d
+++ b/gas/testsuite/gas/aarch64/f64mm.d
@@ -6,8 +6,8 @@
 Disassembly of section \.text:
 
 0+ <\.text>:
- *[0-9a-f]+: 64dbe6b1 fmmla z17\.d, z21\.d, z27\.d
- *[0-9a-f]+: 64c0e400 fmmla z0\.d, z0\.d, z0\.d
+ *[0-9a-f]+: 64fbe6b1 fmmla z17\.d, z21\.d, z27\.d
+ *[0-9a-f]+: 64e0e400 fmmla z0\.d, z0\.d, z0\.d
  *[0-9a-f]+: a43b17f1 ld1rob {z17\.b}, p5/z, \[sp, x27\]
  *[0-9a-f]+: a42003e0 ld1rob {z0\.b}, p0/z, \[sp, x0\]
  *[0-9a-f]+: a4bb17f1 ld1roh {z17\.h}, p5/z, \[sp, x27\]
--- a/gas/testsuite/gas/aarch64/sve-movprfx-mm.d
+++ b/gas/testsuite/gas/aarch64/sve-movprfx-mm.d
@@ -21,4 +21,4 @@ Disassembly of section \.text:
  *[0-9a-f]+: 0420bc11 movprfx z17, z0
  *[0-9a-f]+: 64bbe6b1 fmmla z17\.s, z21\.s, z27\.s
  *[0-9a-f]+: 0420bc11 movprfx z17, z0
- *[0-9a-f]+: 64dbe6b1 fmmla z17\.d, z21\.d, z27\.d
+ *[0-9a-f]+: 64fbe6b1 fmmla z17\.d, z21\.d, z27\.d
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -5073,7 +5073,7 @@ struct aarch64_opcode aarch64_opcode_tab
   INT8MATMUL_SVE_INSNC ("usdot",  0x44a01800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_INDEX), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
   INT8MATMUL_SVE_INSNC ("sudot",  0x44a01c00, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_INDEX), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
   F32MATMUL_SVE_INSNC ("fmmla",   0x64a0e400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_S, 0, C_SCAN_MOVPRFX, 0),
-  F64MATMUL_SVE_INSNC ("fmmla",   0x64c0e400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_D, 0, C_SCAN_MOVPRFX, 0),
+  F64MATMUL_SVE_INSNC ("fmmla",   0x64e0e400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_D, 0, C_SCAN_MOVPRFX, 0),
   F64MATMUL_SVE_INSN ("ld1rob",  0xa4200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_BZU, F_OD(1), 0),
   F64MATMUL_SVE_INSN ("ld1roh",  0xa4a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_HZU, F_OD(1), 0),
   F64MATMUL_SVE_INSN ("ld1row",  0xa5200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_SZU, F_OD(1), 0),

Reply | Threaded
Open this post in threaded view
|

[PATCH 2/4] Arm64: correct uzp{1,2} mnemonics

Jan Beulich-2
In reply to this post by Jan Beulich-2
According to the specification, and in line with the pre-existing
predicate forms, the mnemonics do not include an 'i'.

gas/
2020-01-XX  Jan Beulich  <[hidden email]>

        * testsuite/gas/aarch64/f64mm.s: Drop 'i' from uzip<n>.
        * testsuite/gas/aarch64/f64mm.d: Adjust expectations.

opcodes/
2020-01-XX  Jan Beulich  <[hidden email]>

        * opcodes/aarch64-tbl.h (aarch64_opcode_table): Drop 'i' from
        uzip{1,2}.
        * opcodes/aarch64-dis-2.c: Re-generate.

--- a/gas/testsuite/gas/aarch64/f64mm.d
+++ b/gas/testsuite/gas/aarch64/f64mm.d
@@ -52,10 +52,10 @@ Disassembly of section \.text:
  *[0-9a-f]+: 05a00000 zip1 z0\.q, z0\.q, z0\.q
  *[0-9a-f]+: 05a506b1 zip2 z17\.q, z21\.q, z5\.q
  *[0-9a-f]+: 05a00400 zip2 z0\.q, z0\.q, z0\.q
- *[0-9a-f]+: 05a50ab1 uzip1 z17\.q, z21\.q, z5\.q
- *[0-9a-f]+: 05a00800 uzip1 z0\.q, z0\.q, z0\.q
- *[0-9a-f]+: 05a50eb1 uzip2 z17\.q, z21\.q, z5\.q
- *[0-9a-f]+: 05a00c00 uzip2 z0\.q, z0\.q, z0\.q
+ *[0-9a-f]+: 05a50ab1 uzp1 z17\.q, z21\.q, z5\.q
+ *[0-9a-f]+: 05a00800 uzp1 z0\.q, z0\.q, z0\.q
+ *[0-9a-f]+: 05a50eb1 uzp2 z17\.q, z21\.q, z5\.q
+ *[0-9a-f]+: 05a00c00 uzp2 z0\.q, z0\.q, z0\.q
  *[0-9a-f]+: 05a51ab1 trn1 z17\.q, z21\.q, z5\.q
  *[0-9a-f]+: 05a01800 trn1 z0\.q, z0\.q, z0\.q
  *[0-9a-f]+: 05a51eb1 trn2 z17\.q, z21\.q, z5\.q
--- a/gas/testsuite/gas/aarch64/f64mm.s
+++ b/gas/testsuite/gas/aarch64/f64mm.s
@@ -60,10 +60,10 @@ zip1 z0.q, z0.q, z0.q
 zip2 z17.q, z21.q, z5.q
 zip2 z0.q, z0.q, z0.q
 
-uzip1 z17.q, z21.q, z5.q
-uzip1 z0.q, z0.q, z0.q
-uzip2 z17.q, z21.q, z5.q
-uzip2 z0.q, z0.q, z0.q
+uzp1 z17.q, z21.q, z5.q
+uzp1 z0.q, z0.q, z0.q
+uzp2 z17.q, z21.q, z5.q
+uzp2 z0.q, z0.q, z0.q
 
 trn1 z17.q, z21.q, z5.q
 trn1 z0.q, z0.q, z0.q
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -5084,8 +5084,8 @@ struct aarch64_opcode aarch64_opcode_tab
   F64MATMUL_SVE_INSN ("ld1rod",  0xa5a02000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_DZU, F_OD(1), 0),
   F64MATMUL_SVE_INSN ("zip1",    0x05a00000, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
   F64MATMUL_SVE_INSN ("zip2",    0x05a00400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
-  F64MATMUL_SVE_INSN ("uzip1",   0x05a00800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
-  F64MATMUL_SVE_INSN ("uzip2",   0x05a00c00, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
+  F64MATMUL_SVE_INSN ("uzp1",    0x05a00800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
+  F64MATMUL_SVE_INSN ("uzp2",    0x05a00c00, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
   F64MATMUL_SVE_INSN ("trn1",    0x05a01800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
   F64MATMUL_SVE_INSN ("trn2",    0x05a01c00, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
   /* Matrix Multiply advanced SIMD instructions.  */

Reply | Threaded
Open this post in threaded view
|

[PATCH 3/4] Arm64: correct {su,us}dot SIMD encodings

Jan Beulich-2
In reply to this post by Jan Beulich-2
According to the specification these permit the Q bit to control the
vector length operated on, and hence this bit should not already be set
in the opcode table entries (it rather needs setting dynamically). Note
how the test case output did also not match its input. Besides
correcting the test case also extend it to cover both forms.

gas/
2020-01-XX  Jan Beulich  <[hidden email]>

        * testsuite/gas/aarch64/i8mm.s: Add 128-bit form tests for
        by-element usdot. Add 64-bit form tests for by-element sudot.
        * testsuite/gas/aarch64/i8mm.d: Adjust expectations.

opcodes/
2020-01-XX  Jan Beulich  <[hidden email]>

        * opcodes/aarch64-tbl.h (aarch64_opcode_table): Correct SIMD
        forms of SUDOT and USDOT.

--- a/gas/testsuite/gas/aarch64/i8mm.d
+++ b/gas/testsuite/gas/aarch64/i8mm.d
@@ -29,15 +29,23 @@ Disassembly of section \.text:
  *[0-9a-f]+: 6e80a400 ummla v0\.4s, v0\.16b, v0\.16b
  *[0-9a-f]+: 4e80ac00 usmmla v0\.4s, v0\.16b, v0\.16b
  *[0-9a-f]+: 4e9baeb1 usmmla v17\.4s, v21\.16b, v27\.16b
- *[0-9a-f]+: 4e9b9eb1 usdot v17\.2s, v21\.8b, v27\.8b
- *[0-9a-f]+: 4e809c00 usdot v0\.2s, v0\.8b, v0\.8b
- *[0-9a-f]+: 4e9b9eb1 usdot v17\.2s, v21\.8b, v27\.8b
- *[0-9a-f]+: 4e809c00 usdot v0\.2s, v0\.8b, v0\.8b
- *[0-9a-f]+: 4fbbfab1 usdot v17\.2s, v21\.8b, v27\.4b\[3\]
- *[0-9a-f]+: 4fa0f800 usdot v0\.2s, v0\.8b, v0\.4b\[3\]
- *[0-9a-f]+: 4f9bf2b1 usdot v17\.2s, v21\.8b, v27\.4b\[0\]
- *[0-9a-f]+: 4f80f000 usdot v0\.2s, v0\.8b, v0\.4b\[0\]
- *[0-9a-f]+: 4f3bfab1 sudot v17\.2s, v21\.8b, v27\.4b\[3\]
- *[0-9a-f]+: 4f20f800 sudot v0\.2s, v0\.8b, v0\.4b\[3\]
- *[0-9a-f]+: 4f1bf2b1 sudot v17\.2s, v21\.8b, v27\.4b\[0\]
- *[0-9a-f]+: 4f00f000 sudot v0\.2s, v0\.8b, v0\.4b\[0\]
+ *[0-9a-f]+: 0e9b9eb1 usdot v17\.2s, v21\.8b, v27\.8b
+ *[0-9a-f]+: 0e809c00 usdot v0\.2s, v0\.8b, v0\.8b
+ *[0-9a-f]+: 4e9b9eb1 usdot v17\.4s, v21\.16b, v27\.16b
+ *[0-9a-f]+: 4e809c00 usdot v0\.4s, v0\.16b, v0\.16b
+ *[0-9a-f]+: 0fbbfab1 usdot v17\.2s, v21\.8b, v27\.4b\[3\]
+ *[0-9a-f]+: 0fa0f800 usdot v0\.2s, v0\.8b, v0\.4b\[3\]
+ *[0-9a-f]+: 0f9bf2b1 usdot v17\.2s, v21\.8b, v27\.4b\[0\]
+ *[0-9a-f]+: 0f80f000 usdot v0\.2s, v0\.8b, v0\.4b\[0\]
+ *[0-9a-f]+: 4fbbfab1 usdot v17\.4s, v21\.16b, v27\.4b\[3\]
+ *[0-9a-f]+: 4fa0f800 usdot v0\.4s, v0\.16b, v0\.4b\[3\]
+ *[0-9a-f]+: 4f9bf2b1 usdot v17\.4s, v21\.16b, v27\.4b\[0\]
+ *[0-9a-f]+: 4f80f000 usdot v0\.4s, v0\.16b, v0\.4b\[0\]
+ *[0-9a-f]+: 0f3bfab1 sudot v17\.2s, v21\.8b, v27\.4b\[3\]
+ *[0-9a-f]+: 0f20f800 sudot v0\.2s, v0\.8b, v0\.4b\[3\]
+ *[0-9a-f]+: 0f1bf2b1 sudot v17\.2s, v21\.8b, v27\.4b\[0\]
+ *[0-9a-f]+: 0f00f000 sudot v0\.2s, v0\.8b, v0\.4b\[0\]
+ *[0-9a-f]+: 4f3bfab1 sudot v17\.4s, v21\.16b, v27\.4b\[3\]
+ *[0-9a-f]+: 4f20f800 sudot v0\.4s, v0\.16b, v0\.4b\[3\]
+ *[0-9a-f]+: 4f1bf2b1 sudot v17\.4s, v21\.16b, v27\.4b\[0\]
+ *[0-9a-f]+: 4f00f000 sudot v0\.4s, v0\.16b, v0\.4b\[0\]
--- a/gas/testsuite/gas/aarch64/i8mm.s
+++ b/gas/testsuite/gas/aarch64/i8mm.s
@@ -49,7 +49,15 @@ usdot v17.2s, v21.8b, v27.4b[3]
 usdot v0.2s, v0.8b, v0.4b[3]
 usdot v17.2s, v21.8b, v27.4b[0]
 usdot v0.2s, v0.8b, v0.4b[0]
+usdot v17.4s, v21.16b, v27.4b[3]
+usdot v0.4s, v0.16b, v0.4b[3]
+usdot v17.4s, v21.16b, v27.4b[0]
+usdot v0.4s, v0.16b, v0.4b[0]
 
+sudot v17.2s, v21.8b, v27.4b[3]
+sudot v0.2s, v0.8b, v0.4b[3]
+sudot v17.2s, v21.8b, v27.4b[0]
+sudot v0.2s, v0.8b, v0.4b[0]
 sudot v17.4s, v21.16b, v27.4b[3]
 sudot v0.4s, v0.16b, v0.4b[3]
 sudot v17.4s, v21.16b, v27.4b[0]
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -5092,9 +5092,9 @@ struct aarch64_opcode aarch64_opcode_tab
   INT8MATMUL_INSN ("smmla",  0x4e80a400, 0xffe0fc00, aarch64_misc, OP3 (Vd, Vn, Vm), QL_MMLA64, 0),
   INT8MATMUL_INSN ("ummla",  0x6e80a400, 0xffe0fc00, aarch64_misc, OP3 (Vd, Vn, Vm), QL_MMLA64, 0),
   INT8MATMUL_INSN ("usmmla", 0x4e80ac00, 0xffe0fc00, aarch64_misc, OP3 (Vd, Vn, Vm), QL_MMLA64, 0),
-  INT8MATMUL_INSN ("usdot",  0x4e809c00, 0xffe0fc00, aarch64_misc, OP3 (Vd, Vn, Vm), QL_V3DOT, F_SIZEQ),
-  INT8MATMUL_INSN ("usdot",  0x4f80f000, 0xffc0f400, dotproduct, OP3 (Vd, Vn, Em), QL_V2DOT, F_SIZEQ),
-  INT8MATMUL_INSN ("sudot",  0x4f00f000, 0xffc0f400, dotproduct, OP3 (Vd, Vn, Em), QL_V2DOT, F_SIZEQ),
+  INT8MATMUL_INSN ("usdot",  0x0e809c00, 0xbfe0fc00, aarch64_misc, OP3 (Vd, Vn, Vm), QL_V3DOT, F_SIZEQ),
+  INT8MATMUL_INSN ("usdot",  0x0f80f000, 0xbfc0f400, dotproduct, OP3 (Vd, Vn, Em), QL_V2DOT, F_SIZEQ),
+  INT8MATMUL_INSN ("sudot",  0x0f00f000, 0xbfc0f400, dotproduct, OP3 (Vd, Vn, Em), QL_V2DOT, F_SIZEQ),
 
   /* BFloat instructions.  */
   BFLOAT16_SVE_INSNC ("bfdot",  0x64608000, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_SHH, 0, C_SCAN_MOVPRFX, 0),

Reply | Threaded
Open this post in threaded view
|

[PATCH 4/4] Arm64: correct address index operands for LD1RO{H,W,D}

Jan Beulich-2
In reply to this post by Jan Beulich-2
Just like their LD1RQ{H,W,D} counterparts, as per the specification the
index registers get scaled by element size.

gas/
2020-01-XX  Jan Beulich  <[hidden email]>

        * testsuite/gas/aarch64/f64mm.s: Scale index of LD1RO{H,W,D}.
        * testsuite/gas/aarch64/f64mm.d: Adjust expectations.

opcodes/
2020-01-XX  Jan Beulich  <[hidden email]>

        * opcodes/aarch64-tbl.h (aarch64_opcode_table): Use
        SVE_ADDR_RX_LSL{1,2,3} for LD1RO{H,W,D}.

--- a/gas/testsuite/gas/aarch64/f64mm.d
+++ b/gas/testsuite/gas/aarch64/f64mm.d
@@ -10,20 +10,20 @@ Disassembly of section \.text:
  *[0-9a-f]+: 64e0e400 fmmla z0\.d, z0\.d, z0\.d
  *[0-9a-f]+: a43b17f1 ld1rob {z17\.b}, p5/z, \[sp, x27\]
  *[0-9a-f]+: a42003e0 ld1rob {z0\.b}, p0/z, \[sp, x0\]
- *[0-9a-f]+: a4bb17f1 ld1roh {z17\.h}, p5/z, \[sp, x27\]
- *[0-9a-f]+: a4a003e0 ld1roh {z0\.h}, p0/z, \[sp, x0\]
- *[0-9a-f]+: a53b17f1 ld1row {z17\.s}, p5/z, \[sp, x27\]
- *[0-9a-f]+: a52003e0 ld1row {z0\.s}, p0/z, \[sp, x0\]
- *[0-9a-f]+: a5bb17f1 ld1rod {z17\.d}, p5/z, \[sp, x27\]
- *[0-9a-f]+: a5a003e0 ld1rod {z0\.d}, p0/z, \[sp, x0\]
+ *[0-9a-f]+: a4bb17f1 ld1roh {z17\.h}, p5/z, \[sp, x27, lsl #1\]
+ *[0-9a-f]+: a4a003e0 ld1roh {z0\.h}, p0/z, \[sp, x0, lsl #1\]
+ *[0-9a-f]+: a53b17f1 ld1row {z17\.s}, p5/z, \[sp, x27, lsl #2\]
+ *[0-9a-f]+: a52003e0 ld1row {z0\.s}, p0/z, \[sp, x0, lsl #2\]
+ *[0-9a-f]+: a5bb17f1 ld1rod {z17\.d}, p5/z, \[sp, x27, lsl #3\]
+ *[0-9a-f]+: a5a003e0 ld1rod {z0\.d}, p0/z, \[sp, x0, lsl #3\]
  *[0-9a-f]+: a43b1411 ld1rob {z17\.b}, p5/z, \[x0, x27\]
  *[0-9a-f]+: a4200000 ld1rob {z0\.b}, p0/z, \[x0, x0\]
- *[0-9a-f]+: a4bb1411 ld1roh {z17\.h}, p5/z, \[x0, x27\]
- *[0-9a-f]+: a4a00000 ld1roh {z0\.h}, p0/z, \[x0, x0\]
- *[0-9a-f]+: a53b1411 ld1row {z17\.s}, p5/z, \[x0, x27\]
- *[0-9a-f]+: a5200000 ld1row {z0\.s}, p0/z, \[x0, x0\]
- *[0-9a-f]+: a5bb1411 ld1rod {z17\.d}, p5/z, \[x0, x27\]
- *[0-9a-f]+: a5a00000 ld1rod {z0\.d}, p0/z, \[x0, x0\]
+ *[0-9a-f]+: a4bb1411 ld1roh {z17\.h}, p5/z, \[x0, x27, lsl #1\]
+ *[0-9a-f]+: a4a00000 ld1roh {z0\.h}, p0/z, \[x0, x0, lsl #1\]
+ *[0-9a-f]+: a53b1411 ld1row {z17\.s}, p5/z, \[x0, x27, lsl #2\]
+ *[0-9a-f]+: a5200000 ld1row {z0\.s}, p0/z, \[x0, x0, lsl #2\]
+ *[0-9a-f]+: a5bb1411 ld1rod {z17\.d}, p5/z, \[x0, x27, lsl #3\]
+ *[0-9a-f]+: a5a00000 ld1rod {z0\.d}, p0/z, \[x0, x0, lsl #3\]
  *[0-9a-f]+: a42037f1 ld1rob {z17\.b}, p5/z, \[sp\]
  *[0-9a-f]+: a42723e0 ld1rob {z0\.b}, p0/z, \[sp, #224\]
  *[0-9a-f]+: a42823e0 ld1rob {z0\.b}, p0/z, \[sp, #-256\]
--- a/gas/testsuite/gas/aarch64/f64mm.s
+++ b/gas/testsuite/gas/aarch64/f64mm.s
@@ -13,21 +13,21 @@ fmmla z0.d,  z0.d,  z0.d
 
 ld1rob { z17.b }, p5/z, [sp, x27]
 ld1rob { z0.b }, p0/z, [sp, x0]
-ld1roh { z17.h }, p5/z, [sp, x27]
-ld1roh { z0.h }, p0/z, [sp, x0]
-ld1row { z17.s }, p5/z, [sp, x27]
-ld1row { z0.s }, p0/z, [sp, x0]
-ld1rod { z17.d }, p5/z, [sp, x27]
-ld1rod { z0.d }, p0/z, [sp, x0]
+ld1roh { z17.h }, p5/z, [sp, x27, lsl #1]
+ld1roh { z0.h }, p0/z, [sp, x0, lsl #1]
+ld1row { z17.s }, p5/z, [sp, x27, lsl #2]
+ld1row { z0.s }, p0/z, [sp, x0, lsl #2]
+ld1rod { z17.d }, p5/z, [sp, x27, lsl #3]
+ld1rod { z0.d }, p0/z, [sp, x0, lsl #3]
 
 ld1rob { z17.b }, p5/z, [x0, x27]
 ld1rob { z0.b }, p0/z, [x0, x0]
-ld1roh { z17.h }, p5/z, [x0, x27]
-ld1roh { z0.h }, p0/z, [x0, x0]
-ld1row { z17.s }, p5/z, [x0, x27]
-ld1row { z0.s }, p0/z, [x0, x0]
-ld1rod { z17.d }, p5/z, [x0, x27]
-ld1rod { z0.d }, p0/z, [x0, x0]
+ld1roh { z17.h }, p5/z, [x0, x27, lsl #1]
+ld1roh { z0.h }, p0/z, [x0, x0, lsl #1]
+ld1row { z17.s }, p5/z, [x0, x27, lsl #2]
+ld1row { z0.s }, p0/z, [x0, x0, lsl #2]
+ld1rod { z17.d }, p5/z, [x0, x27, lsl #3]
+ld1rod { z0.d }, p0/z, [x0, x0, lsl #3]
 
 ld1rob { z17.b }, p5/z, [sp, #0]
 ld1rob { z0.b }, p0/z, [sp, #224]
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -5074,10 +5074,10 @@ struct aarch64_opcode aarch64_opcode_tab
   INT8MATMUL_SVE_INSNC ("sudot",  0x44a01c00, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_INDEX), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
   F32MATMUL_SVE_INSNC ("fmmla",   0x64a0e400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_S, 0, C_SCAN_MOVPRFX, 0),
   F64MATMUL_SVE_INSNC ("fmmla",   0x64e0e400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_D, 0, C_SCAN_MOVPRFX, 0),
-  F64MATMUL_SVE_INSN ("ld1rob",  0xa4200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_BZU, F_OD(1), 0),
-  F64MATMUL_SVE_INSN ("ld1roh",  0xa4a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_HZU, F_OD(1), 0),
-  F64MATMUL_SVE_INSN ("ld1row",  0xa5200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_SZU, F_OD(1), 0),
-  F64MATMUL_SVE_INSN ("ld1rod",  0xa5a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_DZU, F_OD(1), 0),
+  F64MATMUL_SVE_INSN ("ld1rob",  0xa4200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_BZU, F_OD(1), 0),
+  F64MATMUL_SVE_INSN ("ld1roh",  0xa4a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX_LSL1), OP_SVE_HZU, F_OD(1), 0),
+  F64MATMUL_SVE_INSN ("ld1row",  0xa5200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX_LSL2), OP_SVE_SZU, F_OD(1), 0),
+  F64MATMUL_SVE_INSN ("ld1rod",  0xa5a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX_LSL3), OP_SVE_DZU, F_OD(1), 0),
   F64MATMUL_SVE_INSN ("ld1rob",  0xa4202000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_BZU, F_OD(1), 0),
   F64MATMUL_SVE_INSN ("ld1roh",  0xa4a02000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_HZU, F_OD(1), 0),
   F64MATMUL_SVE_INSN ("ld1row",  0xa5202000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_SZU, F_OD(1), 0),

Reply | Threaded
Open this post in threaded view
|

RE: [PATCH 1/4] Arm64: correct 64-bit element fmmla encoding

Tamar Christina-2
In reply to this post by Jan Beulich-2
Hi Jan,

I'm not a maintainer so you still need approval to commit but this change is correct.

Thanks for the fix!,
Tamar

> -----Original Message-----
> From: [hidden email] <[hidden email]>
> On Behalf Of Jan Beulich
> Sent: Friday, December 27, 2019 10:39
> To: [hidden email]
> Cc: Marcus Shawcroft <[hidden email]>; Mihail Ionescu
> <[hidden email]>; Richard Earnshaw
> <[hidden email]>
> Subject: [PATCH 1/4] Arm64: correct 64-bit element fmmla encoding
>
> There's just one bit of difference to the 32-bit element form, as per the
> documentation.
>
> gas/
> 2020-01-XX  Jan Beulich  <[hidden email]>
>
> * testsuite/gas/aarch64/f64mm.d,
> testsuite/gas/aarch64/sve-movprfx-mm.d: Adjust expectations.
>
> opcodes/
> 2020-01-XX  Jan Beulich  <[hidden email]>
>
> * opcodes/aarch64-tbl.h (aarch64_opcode_table): Correct 64-bit
> FMMLA encoding.
> * opcodes/aarch64-dis-2.c: Re-generate.
>
> --- a/gas/testsuite/gas/aarch64/f64mm.d
> +++ b/gas/testsuite/gas/aarch64/f64mm.d
> @@ -6,8 +6,8 @@
>  Disassembly of section \.text:
>
>  0+ <\.text>:
> - *[0-9a-f]+: 64dbe6b1 fmmla z17\.d, z21\.d, z27\.d
> - *[0-9a-f]+: 64c0e400 fmmla z0\.d, z0\.d, z0\.d
> + *[0-9a-f]+: 64fbe6b1 fmmla z17\.d, z21\.d, z27\.d
> + *[0-9a-f]+: 64e0e400 fmmla z0\.d, z0\.d, z0\.d
>   *[0-9a-f]+: a43b17f1 ld1rob {z17\.b}, p5/z, \[sp, x27\]
>   *[0-9a-f]+: a42003e0 ld1rob {z0\.b}, p0/z, \[sp, x0\]
>   *[0-9a-f]+: a4bb17f1 ld1roh {z17\.h}, p5/z, \[sp, x27\]
> --- a/gas/testsuite/gas/aarch64/sve-movprfx-mm.d
> +++ b/gas/testsuite/gas/aarch64/sve-movprfx-mm.d
> @@ -21,4 +21,4 @@ Disassembly of section \.text:
>   *[0-9a-f]+: 0420bc11 movprfx z17, z0
>   *[0-9a-f]+: 64bbe6b1 fmmla z17\.s, z21\.s, z27\.s
>   *[0-9a-f]+: 0420bc11 movprfx z17, z0
> - *[0-9a-f]+: 64dbe6b1 fmmla z17\.d, z21\.d, z27\.d
> + *[0-9a-f]+: 64fbe6b1 fmmla z17\.d, z21\.d, z27\.d
> --- a/opcodes/aarch64-tbl.h
> +++ b/opcodes/aarch64-tbl.h
> @@ -5073,7 +5073,7 @@ struct aarch64_opcode aarch64_opcode_tab
>    INT8MATMUL_SVE_INSNC ("usdot",  0x44a01800, 0xffe0fc00, sve_misc,
> OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_INDEX), OP_SVE_SBB, 0,
> C_SCAN_MOVPRFX, 0),
>    INT8MATMUL_SVE_INSNC ("sudot",  0x44a01c00, 0xffe0fc00, sve_misc,
> OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_INDEX), OP_SVE_SBB, 0,
> C_SCAN_MOVPRFX, 0),
>    F32MATMUL_SVE_INSNC ("fmmla",   0x64a0e400, 0xffe0fc00, sve_misc,
> OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_S, 0, C_SCAN_MOVPRFX,
> 0),
> -  F64MATMUL_SVE_INSNC ("fmmla",   0x64c0e400, 0xffe0fc00, sve_misc,
> OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_D, 0, C_SCAN_MOVPRFX,
> 0),
> +  F64MATMUL_SVE_INSNC ("fmmla",   0x64e0e400, 0xffe0fc00, sve_misc,
> OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_D, 0, C_SCAN_MOVPRFX,
> 0),
>    F64MATMUL_SVE_INSN ("ld1rob",  0xa4200000, 0xffe0e000, sve_misc, OP3
> (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_BZU, F_OD(1), 0),
>    F64MATMUL_SVE_INSN ("ld1roh",  0xa4a00000, 0xffe0e000, sve_misc, OP3
> (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_HZU, F_OD(1), 0),
>    F64MATMUL_SVE_INSN ("ld1row",  0xa5200000, 0xffe0e000, sve_misc, OP3
> (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_SZU, F_OD(1), 0),

Reply | Threaded
Open this post in threaded view
|

RE: [PATCH 2/4] Arm64: correct uzp{1,2} mnemonics

Tamar Christina-2
In reply to this post by Jan Beulich-2
Hi Jan,

I'm not a maintainer so you still need approval but this fix is correct.

Thanks!,
Tamar

> -----Original Message-----
> From: [hidden email] <[hidden email]>
> On Behalf Of Jan Beulich
> Sent: Friday, December 27, 2019 10:40
> To: [hidden email]
> Cc: Marcus Shawcroft <[hidden email]>; Mihail Ionescu
> <[hidden email]>; Richard Earnshaw
> <[hidden email]>
> Subject: [PATCH 2/4] Arm64: correct uzp{1,2} mnemonics
>
> According to the specification, and in line with the pre-existing predicate
> forms, the mnemonics do not include an 'i'.
>
> gas/
> 2020-01-XX  Jan Beulich  <[hidden email]>
>
> * testsuite/gas/aarch64/f64mm.s: Drop 'i' from uzip<n>.
> * testsuite/gas/aarch64/f64mm.d: Adjust expectations.
>
> opcodes/
> 2020-01-XX  Jan Beulich  <[hidden email]>
>
> * opcodes/aarch64-tbl.h (aarch64_opcode_table): Drop 'i' from
> uzip{1,2}.
> * opcodes/aarch64-dis-2.c: Re-generate.
>
> --- a/gas/testsuite/gas/aarch64/f64mm.d
> +++ b/gas/testsuite/gas/aarch64/f64mm.d
> @@ -52,10 +52,10 @@ Disassembly of section \.text:
>   *[0-9a-f]+: 05a00000 zip1 z0\.q, z0\.q, z0\.q
>   *[0-9a-f]+: 05a506b1 zip2 z17\.q, z21\.q, z5\.q
>   *[0-9a-f]+: 05a00400 zip2 z0\.q, z0\.q, z0\.q
> - *[0-9a-f]+: 05a50ab1 uzip1 z17\.q, z21\.q, z5\.q
> - *[0-9a-f]+: 05a00800 uzip1 z0\.q, z0\.q, z0\.q
> - *[0-9a-f]+: 05a50eb1 uzip2 z17\.q, z21\.q, z5\.q
> - *[0-9a-f]+: 05a00c00 uzip2 z0\.q, z0\.q, z0\.q
> + *[0-9a-f]+: 05a50ab1 uzp1 z17\.q, z21\.q, z5\.q
> + *[0-9a-f]+: 05a00800 uzp1 z0\.q, z0\.q, z0\.q
> + *[0-9a-f]+: 05a50eb1 uzp2 z17\.q, z21\.q, z5\.q
> + *[0-9a-f]+: 05a00c00 uzp2 z0\.q, z0\.q, z0\.q
>   *[0-9a-f]+: 05a51ab1 trn1 z17\.q, z21\.q, z5\.q
>   *[0-9a-f]+: 05a01800 trn1 z0\.q, z0\.q, z0\.q
>   *[0-9a-f]+: 05a51eb1 trn2 z17\.q, z21\.q, z5\.q
> --- a/gas/testsuite/gas/aarch64/f64mm.s
> +++ b/gas/testsuite/gas/aarch64/f64mm.s
> @@ -60,10 +60,10 @@ zip1 z0.q, z0.q, z0.q
>  zip2 z17.q, z21.q, z5.q
>  zip2 z0.q, z0.q, z0.q
>
> -uzip1 z17.q, z21.q, z5.q
> -uzip1 z0.q, z0.q, z0.q
> -uzip2 z17.q, z21.q, z5.q
> -uzip2 z0.q, z0.q, z0.q
> +uzp1 z17.q, z21.q, z5.q
> +uzp1 z0.q, z0.q, z0.q
> +uzp2 z17.q, z21.q, z5.q
> +uzp2 z0.q, z0.q, z0.q
>
>  trn1 z17.q, z21.q, z5.q
>  trn1 z0.q, z0.q, z0.q
> --- a/opcodes/aarch64-tbl.h
> +++ b/opcodes/aarch64-tbl.h
> @@ -5084,8 +5084,8 @@ struct aarch64_opcode aarch64_opcode_tab
>    F64MATMUL_SVE_INSN ("ld1rod",  0xa5a02000, 0xfff0e000, sve_misc, OP3
> (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_DZU, F_OD(1), 0),
>    F64MATMUL_SVE_INSN ("zip1",    0x05a00000, 0xffe0fc00, sve_misc, OP3
> (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
>    F64MATMUL_SVE_INSN ("zip2",    0x05a00400, 0xffe0fc00, sve_misc, OP3
> (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
> -  F64MATMUL_SVE_INSN ("uzip1",   0x05a00800, 0xffe0fc00, sve_misc, OP3
> (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
> -  F64MATMUL_SVE_INSN ("uzip2",   0x05a00c00, 0xffe0fc00, sve_misc, OP3
> (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
> +  F64MATMUL_SVE_INSN ("uzp1",    0x05a00800, 0xffe0fc00, sve_misc, OP3
> (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
> +  F64MATMUL_SVE_INSN ("uzp2",    0x05a00c00, 0xffe0fc00, sve_misc, OP3
> (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
>    F64MATMUL_SVE_INSN ("trn1",    0x05a01800, 0xffe0fc00, sve_misc, OP3
> (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
>    F64MATMUL_SVE_INSN ("trn2",    0x05a01c00, 0xffe0fc00, sve_misc, OP3
> (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
>    /* Matrix Multiply advanced SIMD instructions.  */

Reply | Threaded
Open this post in threaded view
|

RE: [PATCH 3/4] Arm64: correct {su,us}dot SIMD encodings

Tamar Christina-2
In reply to this post by Jan Beulich-2
Hi Jan,

Thanks! Same as the rest this one looks ok too but you still need a maintainer to approve.

Cheers,
Tamar

> -----Original Message-----
> From: [hidden email] <[hidden email]>
> On Behalf Of Jan Beulich
> Sent: Friday, December 27, 2019 10:40
> To: [hidden email]
> Cc: Marcus Shawcroft <[hidden email]>; Mihail Ionescu
> <[hidden email]>; Richard Earnshaw
> <[hidden email]>
> Subject: [PATCH 3/4] Arm64: correct {su,us}dot SIMD encodings
>
> According to the specification these permit the Q bit to control the
> vector length operated on, and hence this bit should not already be set
> in the opcode table entries (it rather needs setting dynamically). Note
> how the test case output did also not match its input. Besides
> correcting the test case also extend it to cover both forms.
>
> gas/
> 2020-01-XX  Jan Beulich  <[hidden email]>
>
> * testsuite/gas/aarch64/i8mm.s: Add 128-bit form tests for
> by-element usdot. Add 64-bit form tests for by-element sudot.
> * testsuite/gas/aarch64/i8mm.d: Adjust expectations.
>
> opcodes/
> 2020-01-XX  Jan Beulich  <[hidden email]>
>
> * opcodes/aarch64-tbl.h (aarch64_opcode_table): Correct SIMD
> forms of SUDOT and USDOT.
>
> --- a/gas/testsuite/gas/aarch64/i8mm.d
> +++ b/gas/testsuite/gas/aarch64/i8mm.d
> @@ -29,15 +29,23 @@ Disassembly of section \.text:
>   *[0-9a-f]+: 6e80a400 ummla v0\.4s, v0\.16b, v0\.16b
>   *[0-9a-f]+: 4e80ac00 usmmla v0\.4s, v0\.16b, v0\.16b
>   *[0-9a-f]+: 4e9baeb1 usmmla v17\.4s, v21\.16b, v27\.16b
> - *[0-9a-f]+: 4e9b9eb1 usdot v17\.2s, v21\.8b, v27\.8b
> - *[0-9a-f]+: 4e809c00 usdot v0\.2s, v0\.8b, v0\.8b
> - *[0-9a-f]+: 4e9b9eb1 usdot v17\.2s, v21\.8b, v27\.8b
> - *[0-9a-f]+: 4e809c00 usdot v0\.2s, v0\.8b, v0\.8b
> - *[0-9a-f]+: 4fbbfab1 usdot v17\.2s, v21\.8b, v27\.4b\[3\]
> - *[0-9a-f]+: 4fa0f800 usdot v0\.2s, v0\.8b, v0\.4b\[3\]
> - *[0-9a-f]+: 4f9bf2b1 usdot v17\.2s, v21\.8b, v27\.4b\[0\]
> - *[0-9a-f]+: 4f80f000 usdot v0\.2s, v0\.8b, v0\.4b\[0\]
> - *[0-9a-f]+: 4f3bfab1 sudot v17\.2s, v21\.8b, v27\.4b\[3\]
> - *[0-9a-f]+: 4f20f800 sudot v0\.2s, v0\.8b, v0\.4b\[3\]
> - *[0-9a-f]+: 4f1bf2b1 sudot v17\.2s, v21\.8b, v27\.4b\[0\]
> - *[0-9a-f]+: 4f00f000 sudot v0\.2s, v0\.8b, v0\.4b\[0\]
> + *[0-9a-f]+: 0e9b9eb1 usdot v17\.2s, v21\.8b, v27\.8b
> + *[0-9a-f]+: 0e809c00 usdot v0\.2s, v0\.8b, v0\.8b
> + *[0-9a-f]+: 4e9b9eb1 usdot v17\.4s, v21\.16b, v27\.16b
> + *[0-9a-f]+: 4e809c00 usdot v0\.4s, v0\.16b, v0\.16b
> + *[0-9a-f]+: 0fbbfab1 usdot v17\.2s, v21\.8b, v27\.4b\[3\]
> + *[0-9a-f]+: 0fa0f800 usdot v0\.2s, v0\.8b, v0\.4b\[3\]
> + *[0-9a-f]+: 0f9bf2b1 usdot v17\.2s, v21\.8b, v27\.4b\[0\]
> + *[0-9a-f]+: 0f80f000 usdot v0\.2s, v0\.8b, v0\.4b\[0\]
> + *[0-9a-f]+: 4fbbfab1 usdot v17\.4s, v21\.16b, v27\.4b\[3\]
> + *[0-9a-f]+: 4fa0f800 usdot v0\.4s, v0\.16b, v0\.4b\[3\]
> + *[0-9a-f]+: 4f9bf2b1 usdot v17\.4s, v21\.16b, v27\.4b\[0\]
> + *[0-9a-f]+: 4f80f000 usdot v0\.4s, v0\.16b, v0\.4b\[0\]
> + *[0-9a-f]+: 0f3bfab1 sudot v17\.2s, v21\.8b, v27\.4b\[3\]
> + *[0-9a-f]+: 0f20f800 sudot v0\.2s, v0\.8b, v0\.4b\[3\]
> + *[0-9a-f]+: 0f1bf2b1 sudot v17\.2s, v21\.8b, v27\.4b\[0\]
> + *[0-9a-f]+: 0f00f000 sudot v0\.2s, v0\.8b, v0\.4b\[0\]
> + *[0-9a-f]+: 4f3bfab1 sudot v17\.4s, v21\.16b, v27\.4b\[3\]
> + *[0-9a-f]+: 4f20f800 sudot v0\.4s, v0\.16b, v0\.4b\[3\]
> + *[0-9a-f]+: 4f1bf2b1 sudot v17\.4s, v21\.16b, v27\.4b\[0\]
> + *[0-9a-f]+: 4f00f000 sudot v0\.4s, v0\.16b, v0\.4b\[0\]
> --- a/gas/testsuite/gas/aarch64/i8mm.s
> +++ b/gas/testsuite/gas/aarch64/i8mm.s
> @@ -49,7 +49,15 @@ usdot v17.2s, v21.8b, v27.4b[3]
>  usdot v0.2s, v0.8b, v0.4b[3]
>  usdot v17.2s, v21.8b, v27.4b[0]
>  usdot v0.2s, v0.8b, v0.4b[0]
> +usdot v17.4s, v21.16b, v27.4b[3]
> +usdot v0.4s, v0.16b, v0.4b[3]
> +usdot v17.4s, v21.16b, v27.4b[0]
> +usdot v0.4s, v0.16b, v0.4b[0]
>
> +sudot v17.2s, v21.8b, v27.4b[3]
> +sudot v0.2s, v0.8b, v0.4b[3]
> +sudot v17.2s, v21.8b, v27.4b[0]
> +sudot v0.2s, v0.8b, v0.4b[0]
>  sudot v17.4s, v21.16b, v27.4b[3]
>  sudot v0.4s, v0.16b, v0.4b[3]
>  sudot v17.4s, v21.16b, v27.4b[0]
> --- a/opcodes/aarch64-tbl.h
> +++ b/opcodes/aarch64-tbl.h
> @@ -5092,9 +5092,9 @@ struct aarch64_opcode aarch64_opcode_tab
>    INT8MATMUL_INSN ("smmla",  0x4e80a400, 0xffe0fc00, aarch64_misc, OP3
> (Vd, Vn, Vm), QL_MMLA64, 0),
>    INT8MATMUL_INSN ("ummla",  0x6e80a400, 0xffe0fc00, aarch64_misc, OP3
> (Vd, Vn, Vm), QL_MMLA64, 0),
>    INT8MATMUL_INSN ("usmmla", 0x4e80ac00, 0xffe0fc00, aarch64_misc,
> OP3 (Vd, Vn, Vm), QL_MMLA64, 0),
> -  INT8MATMUL_INSN ("usdot",  0x4e809c00, 0xffe0fc00, aarch64_misc, OP3
> (Vd, Vn, Vm), QL_V3DOT, F_SIZEQ),
> -  INT8MATMUL_INSN ("usdot",  0x4f80f000, 0xffc0f400, dotproduct, OP3 (Vd,
> Vn, Em), QL_V2DOT, F_SIZEQ),
> -  INT8MATMUL_INSN ("sudot",  0x4f00f000, 0xffc0f400, dotproduct, OP3 (Vd,
> Vn, Em), QL_V2DOT, F_SIZEQ),
> +  INT8MATMUL_INSN ("usdot",  0x0e809c00, 0xbfe0fc00, aarch64_misc, OP3
> (Vd, Vn, Vm), QL_V3DOT, F_SIZEQ),
> +  INT8MATMUL_INSN ("usdot",  0x0f80f000, 0xbfc0f400, dotproduct, OP3
> (Vd, Vn, Em), QL_V2DOT, F_SIZEQ),
> +  INT8MATMUL_INSN ("sudot",  0x0f00f000, 0xbfc0f400, dotproduct, OP3
> (Vd, Vn, Em), QL_V2DOT, F_SIZEQ),
>
>    /* BFloat instructions.  */
>    BFLOAT16_SVE_INSNC ("bfdot",  0x64608000, 0xffe0fc00, sve_misc, OP3
> (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_SHH, 0, C_SCAN_MOVPRFX, 0),

Reply | Threaded
Open this post in threaded view
|

RE: [PATCH 4/4] Arm64: correct address index operands for LD1RO{H,W,D}

Tamar Christina-2
In reply to this post by Jan Beulich-2
Hi Jan,

Thanks for fixing these. The changes look correct to me but I'm not a maintainer so can't approve.

Cheers,
Tamar

> -----Original Message-----
> From: [hidden email] <[hidden email]>
> On Behalf Of Jan Beulich
> Sent: Friday, December 27, 2019 10:40
> To: [hidden email]
> Cc: Marcus Shawcroft <[hidden email]>; Mihail Ionescu
> <[hidden email]>; Richard Earnshaw
> <[hidden email]>
> Subject: [PATCH 4/4] Arm64: correct address index operands for
> LD1RO{H,W,D}
>
> Just like their LD1RQ{H,W,D} counterparts, as per the specification the index
> registers get scaled by element size.
>
> gas/
> 2020-01-XX  Jan Beulich  <[hidden email]>
>
> * testsuite/gas/aarch64/f64mm.s: Scale index of LD1RO{H,W,D}.
> * testsuite/gas/aarch64/f64mm.d: Adjust expectations.
>
> opcodes/
> 2020-01-XX  Jan Beulich  <[hidden email]>
>
> * opcodes/aarch64-tbl.h (aarch64_opcode_table): Use
> SVE_ADDR_RX_LSL{1,2,3} for LD1RO{H,W,D}.
>
> --- a/gas/testsuite/gas/aarch64/f64mm.d
> +++ b/gas/testsuite/gas/aarch64/f64mm.d
> @@ -10,20 +10,20 @@ Disassembly of section \.text:
>   *[0-9a-f]+: 64e0e400 fmmla z0\.d, z0\.d, z0\.d
>   *[0-9a-f]+: a43b17f1 ld1rob {z17\.b}, p5/z, \[sp, x27\]
>   *[0-9a-f]+: a42003e0 ld1rob {z0\.b}, p0/z, \[sp, x0\]
> - *[0-9a-f]+: a4bb17f1 ld1roh {z17\.h}, p5/z, \[sp, x27\]
> - *[0-9a-f]+: a4a003e0 ld1roh {z0\.h}, p0/z, \[sp, x0\]
> - *[0-9a-f]+: a53b17f1 ld1row {z17\.s}, p5/z, \[sp, x27\]
> - *[0-9a-f]+: a52003e0 ld1row {z0\.s}, p0/z, \[sp, x0\]
> - *[0-9a-f]+: a5bb17f1 ld1rod {z17\.d}, p5/z, \[sp, x27\]
> - *[0-9a-f]+: a5a003e0 ld1rod {z0\.d}, p0/z, \[sp, x0\]
> + *[0-9a-f]+: a4bb17f1 ld1roh {z17\.h}, p5/z, \[sp, x27, lsl #1\]
> + *[0-9a-f]+: a4a003e0 ld1roh {z0\.h}, p0/z, \[sp, x0, lsl #1\]
> + *[0-9a-f]+: a53b17f1 ld1row {z17\.s}, p5/z, \[sp, x27, lsl #2\]
> + *[0-9a-f]+: a52003e0 ld1row {z0\.s}, p0/z, \[sp, x0, lsl #2\]
> + *[0-9a-f]+: a5bb17f1 ld1rod {z17\.d}, p5/z, \[sp, x27, lsl #3\]
> + *[0-9a-f]+: a5a003e0 ld1rod {z0\.d}, p0/z, \[sp, x0, lsl #3\]
>   *[0-9a-f]+: a43b1411 ld1rob {z17\.b}, p5/z, \[x0, x27\]
>   *[0-9a-f]+: a4200000 ld1rob {z0\.b}, p0/z, \[x0, x0\]
> - *[0-9a-f]+: a4bb1411 ld1roh {z17\.h}, p5/z, \[x0, x27\]
> - *[0-9a-f]+: a4a00000 ld1roh {z0\.h}, p0/z, \[x0, x0\]
> - *[0-9a-f]+: a53b1411 ld1row {z17\.s}, p5/z, \[x0, x27\]
> - *[0-9a-f]+: a5200000 ld1row {z0\.s}, p0/z, \[x0, x0\]
> - *[0-9a-f]+: a5bb1411 ld1rod {z17\.d}, p5/z, \[x0, x27\]
> - *[0-9a-f]+: a5a00000 ld1rod {z0\.d}, p0/z, \[x0, x0\]
> + *[0-9a-f]+: a4bb1411 ld1roh {z17\.h}, p5/z, \[x0, x27, lsl #1\]
> + *[0-9a-f]+: a4a00000 ld1roh {z0\.h}, p0/z, \[x0, x0, lsl #1\]
> + *[0-9a-f]+: a53b1411 ld1row {z17\.s}, p5/z, \[x0, x27, lsl #2\]
> + *[0-9a-f]+: a5200000 ld1row {z0\.s}, p0/z, \[x0, x0, lsl #2\]
> + *[0-9a-f]+: a5bb1411 ld1rod {z17\.d}, p5/z, \[x0, x27, lsl #3\]
> + *[0-9a-f]+: a5a00000 ld1rod {z0\.d}, p0/z, \[x0, x0, lsl #3\]
>   *[0-9a-f]+: a42037f1 ld1rob {z17\.b}, p5/z, \[sp\]
>   *[0-9a-f]+: a42723e0 ld1rob {z0\.b}, p0/z, \[sp, #224\]
>   *[0-9a-f]+: a42823e0 ld1rob {z0\.b}, p0/z, \[sp, #-256\]
> --- a/gas/testsuite/gas/aarch64/f64mm.s
> +++ b/gas/testsuite/gas/aarch64/f64mm.s
> @@ -13,21 +13,21 @@ fmmla z0.d,  z0.d,  z0.d
>
>  ld1rob { z17.b }, p5/z, [sp, x27]
>  ld1rob { z0.b }, p0/z, [sp, x0]
> -ld1roh { z17.h }, p5/z, [sp, x27]
> -ld1roh { z0.h }, p0/z, [sp, x0]
> -ld1row { z17.s }, p5/z, [sp, x27]
> -ld1row { z0.s }, p0/z, [sp, x0]
> -ld1rod { z17.d }, p5/z, [sp, x27]
> -ld1rod { z0.d }, p0/z, [sp, x0]
> +ld1roh { z17.h }, p5/z, [sp, x27, lsl #1] ld1roh { z0.h }, p0/z, [sp,
> +x0, lsl #1] ld1row { z17.s }, p5/z, [sp, x27, lsl #2] ld1row { z0.s },
> +p0/z, [sp, x0, lsl #2] ld1rod { z17.d }, p5/z, [sp, x27, lsl #3] ld1rod
> +{ z0.d }, p0/z, [sp, x0, lsl #3]
>
>  ld1rob { z17.b }, p5/z, [x0, x27]
>  ld1rob { z0.b }, p0/z, [x0, x0]
> -ld1roh { z17.h }, p5/z, [x0, x27]
> -ld1roh { z0.h }, p0/z, [x0, x0]
> -ld1row { z17.s }, p5/z, [x0, x27]
> -ld1row { z0.s }, p0/z, [x0, x0]
> -ld1rod { z17.d }, p5/z, [x0, x27]
> -ld1rod { z0.d }, p0/z, [x0, x0]
> +ld1roh { z17.h }, p5/z, [x0, x27, lsl #1] ld1roh { z0.h }, p0/z, [x0,
> +x0, lsl #1] ld1row { z17.s }, p5/z, [x0, x27, lsl #2] ld1row { z0.s },
> +p0/z, [x0, x0, lsl #2] ld1rod { z17.d }, p5/z, [x0, x27, lsl #3] ld1rod
> +{ z0.d }, p0/z, [x0, x0, lsl #3]
>
>  ld1rob { z17.b }, p5/z, [sp, #0]
>  ld1rob { z0.b }, p0/z, [sp, #224]
> --- a/opcodes/aarch64-tbl.h
> +++ b/opcodes/aarch64-tbl.h
> @@ -5074,10 +5074,10 @@ struct aarch64_opcode aarch64_opcode_tab
>    INT8MATMUL_SVE_INSNC ("sudot",  0x44a01c00, 0xffe0fc00, sve_misc,
> OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_INDEX), OP_SVE_SBB, 0,
> C_SCAN_MOVPRFX, 0),
>    F32MATMUL_SVE_INSNC ("fmmla",   0x64a0e400, 0xffe0fc00, sve_misc,
> OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_S, 0, C_SCAN_MOVPRFX,
> 0),
>    F64MATMUL_SVE_INSNC ("fmmla",   0x64e0e400, 0xffe0fc00, sve_misc,
> OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_D, 0, C_SCAN_MOVPRFX,
> 0),
> -  F64MATMUL_SVE_INSN ("ld1rob",  0xa4200000, 0xffe0e000, sve_misc, OP3
> (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_BZU, F_OD(1), 0),
> -  F64MATMUL_SVE_INSN ("ld1roh",  0xa4a00000, 0xffe0e000, sve_misc, OP3
> (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_HZU, F_OD(1), 0),
> -  F64MATMUL_SVE_INSN ("ld1row",  0xa5200000, 0xffe0e000, sve_misc,
> OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_SZU, F_OD(1), 0),
> -  F64MATMUL_SVE_INSN ("ld1rod",  0xa5a00000, 0xffe0e000, sve_misc, OP3
> (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_DZU, F_OD(1), 0),
> +  F64MATMUL_SVE_INSN ("ld1rob",  0xa4200000, 0xffe0e000, sve_misc,
> OP3
> + (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_BZU, F_OD(1), 0),
> + F64MATMUL_SVE_INSN ("ld1roh",  0xa4a00000, 0xffe0e000, sve_misc, OP3
> + (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX_LSL1), OP_SVE_HZU, F_OD(1), 0),
> + F64MATMUL_SVE_INSN ("ld1row",  0xa5200000, 0xffe0e000, sve_misc,
> OP3
> + (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX_LSL2), OP_SVE_SZU, F_OD(1), 0),
> + F64MATMUL_SVE_INSN ("ld1rod",  0xa5a00000, 0xffe0e000, sve_misc, OP3
> + (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX_LSL3), OP_SVE_DZU, F_OD(1), 0),
>    F64MATMUL_SVE_INSN ("ld1rob",  0xa4202000, 0xfff0e000, sve_misc, OP3
> (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_BZU, F_OD(1), 0),
>    F64MATMUL_SVE_INSN ("ld1roh",  0xa4a02000, 0xfff0e000, sve_misc, OP3
> (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_HZU, F_OD(1), 0),
>    F64MATMUL_SVE_INSN ("ld1row",  0xa5202000, 0xfff0e000, sve_misc, OP3
> (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_SZU, F_OD(1), 0),

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 0/4] Arm64: corrections to recent F64MM / I8MM additions

Nick Clifton
In reply to this post by Jan Beulich-2
Hi Jan,

> Judging from the specification, there seem to be a number of issues.
> However, I may well be wrong with some or all of them - both the
> documentation or my reading of it may as well be incorrect.
>
> 1: correct 64-bit element fmmla encoding
> 2: correct uzp{1,2} mnemonics
> 3: correct {su,us}dot SIMD encodings
> 4: correct address index operands for LD1RO{H,W,D}

Approved (all of them) - please apply.

Cheers
  Nick