[PATCH] x86-64: fix ZMM register state tracking

classic Classic list List threaded Threaded
24 messages Options
12
Reply | Threaded
Open this post in threaded view
|

[PATCH] x86-64: fix ZMM register state tracking

Jan Beulich-2
The three AVX512 state components are entirely independent - one being
in its "init state" has no implication whatsoever on either of the other
two. Fully separate X86_XSTATE_ZMM_H and X86_XSTATE_ZMM handling, to
prevent upper halves of the upper 16 ZMM registers to display as if they
were zero (when they aren't) after e.g. VZEROALL/VZEROUPPER.

gdb/
2018-09-05  Jan Beulich  <[hidden email]>

        * i387-tdep.c (i387_supply_xsave): Split handling of
        X86_XSTATE_ZMM_H and X86_XSTATE_ZMM.
        (i387_collect_xsave): Likewise.

--- a/gdb/i387-tdep.c
+++ b/gdb/i387-tdep.c
@@ -923,7 +923,8 @@ i387_supply_xsave (struct regcache *regc
   enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
   struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
   const gdb_byte *regs = (const gdb_byte *) xsave;
-  int i;
+  int i, zmm_endlo_regnum = I387_ZMM0H_REGNUM (tdep)
+    + std::min (tdep->num_zmm_regs, 16);
   ULONGEST clear_bv;
   static const gdb_byte zero[I386_MAX_REGISTER_SIZE] = { 0 };
   enum
@@ -1002,7 +1003,8 @@ i387_supply_xsave (struct regcache *regc
       return;
 
     case avx512_zmm_h:
-      if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
+      if ((clear_bv & (regnum < zmm_endlo_regnum ? X86_XSTATE_ZMM_H
+ : X86_XSTATE_ZMM)))
  regcache->raw_supply (regnum, zero);
       else
  regcache->raw_supply (regnum,
@@ -1080,21 +1082,17 @@ i387_supply_xsave (struct regcache *regc
     }
  }
 
-      /* Handle the upper ZMM registers.  */
-      if ((tdep->xcr0 & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
+      /* Handle the upper halves of the low 8/16 ZMM registers.  */
+      if ((tdep->xcr0 & X86_XSTATE_ZMM_H))
  {
-  if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
+  if ((clear_bv & X86_XSTATE_ZMM_H))
     {
-      for (i = I387_ZMM0H_REGNUM (tdep);
-   i < I387_ZMMENDH_REGNUM (tdep);
-   i++)
+      for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
  regcache->raw_supply (i, zero);
     }
   else
     {
-      for (i = I387_ZMM0H_REGNUM (tdep);
-   i < I387_ZMMENDH_REGNUM (tdep);
-   i++)
+      for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
  regcache->raw_supply (i,
       XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i));
     }
@@ -1119,11 +1117,13 @@ i387_supply_xsave (struct regcache *regc
     }
  }
 
-      /* Handle the YMM_AVX512 registers.  */
+      /* Handle the upper 16 ZMM/YMM/XMM registers (if any).  */
       if ((tdep->xcr0 & X86_XSTATE_ZMM))
  {
   if ((clear_bv & X86_XSTATE_ZMM))
     {
+      for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
+ regcache->raw_supply (i, zero);
       for (i = I387_YMM16H_REGNUM (tdep);
    i < I387_YMMH_AVX512_END_REGNUM (tdep);
    i++)
@@ -1135,6 +1135,9 @@ i387_supply_xsave (struct regcache *regc
     }
   else
     {
+      for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
+ regcache->raw_supply (i,
+      XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i));
       for (i = I387_YMM16H_REGNUM (tdep);
    i < I387_YMMH_AVX512_END_REGNUM (tdep);
    i++)
@@ -1340,7 +1343,8 @@ i387_collect_xsave (const struct regcach
   gdb_byte *p, *regs = (gdb_byte *) xsave;
   gdb_byte raw[I386_MAX_REGISTER_SIZE];
   ULONGEST initial_xstate_bv, clear_bv, xstate_bv = 0;
-  unsigned int i;
+  unsigned int i, zmm_endlo_regnum = I387_ZMM0H_REGNUM (tdep)
+     + std::min (tdep->num_zmm_regs, 16);
   enum
     {
       x87_ctrl_or_mxcsr = 0x1,
@@ -1441,9 +1445,8 @@ i387_collect_xsave (const struct regcach
      i < I387_MPXEND_REGNUM (tdep); i++)
   memset (XSAVE_MPX_ADDR (tdep, regs, i), 0, 8);
 
-      if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
- for (i = I387_ZMM0H_REGNUM (tdep);
-     i < I387_ZMMENDH_REGNUM (tdep); i++)
+      if ((clear_bv & X86_XSTATE_ZMM_H))
+ for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
   memset (XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i), 0, 32);
 
       if ((clear_bv & X86_XSTATE_K))
@@ -1453,6 +1456,8 @@ i387_collect_xsave (const struct regcach
 
       if ((clear_bv & X86_XSTATE_ZMM))
  {
+  for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
+    memset (XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i), 0, 32);
   for (i = I387_YMM16H_REGNUM (tdep);
        i < I387_YMMH_AVX512_END_REGNUM (tdep); i++)
     memset (XSAVE_YMM_AVX512_ADDR (tdep, regs, i), 0, 16);



Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] x86-64: fix ZMM register state tracking

Simon Marchi-2
Hi Jan,

Would it be possible to update or create a test to exercise that?
arch-specific tests are in testsuite/gdb.arch.

I have some comments about the form.  Tim, could you or somebody
else from Intel take a look at the functional side of the changes?

On 2018-09-05 02:22 PM, Jan Beulich wrote:

> The three AVX512 state components are entirely independent - one being
> in its "init state" has no implication whatsoever on either of the other
> two. Fully separate X86_XSTATE_ZMM_H and X86_XSTATE_ZMM handling, to
> prevent upper halves of the upper 16 ZMM registers to display as if they
> were zero (when they aren't) after e.g. VZEROALL/VZEROUPPER.
>
> gdb/
> 2018-09-05  Jan Beulich  <[hidden email]>
>
> * i387-tdep.c (i387_supply_xsave): Split handling of
> X86_XSTATE_ZMM_H and X86_XSTATE_ZMM.
> (i387_collect_xsave): Likewise.
>
> --- a/gdb/i387-tdep.c
> +++ b/gdb/i387-tdep.c
> @@ -923,7 +923,8 @@ i387_supply_xsave (struct regcache *regc
>    enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
>    struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
>    const gdb_byte *regs = (const gdb_byte *) xsave;
> -  int i;
> +  int i, zmm_endlo_regnum = I387_ZMM0H_REGNUM (tdep)
> +    + std::min (tdep->num_zmm_regs, 16);

The GNU standard requires to parenthesis when breaking lines:

  int i, zmm_endlo_regnum = (I387_ZMM0H_REGNUM (tdep)
                             + std::min (tdep->num_zmm_regs, 16));

Simon
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] x86-64: fix ZMM register state tracking

Jan Beulich-2
>>> Simon Marchi <[hidden email]> 09/08/18 1:13 AM >>>
>Would it be possible to update or create a test to exercise that?
>arch-specific tests are in testsuite/gdb.arch.

I'm sure it would be possible, but while I was happy to invest the time to
fix the actual bug (because it affects work I'm doing), I'm afraid I don't have
the time to learn how gdb test cases are to be constructed (I'm familiar
only with the binutils / gas side of things).


>On 2018-09-05 02:22 PM, Jan Beulich wrote:
>> --- a/gdb/i387-tdep.c
>> +++ b/gdb/i387-tdep.c
>> @@ -923,7 +923,8 @@ i387_supply_xsave (struct regcache *regc
>>    enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
>>    struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
>>    const gdb_byte *regs = (const gdb_byte *) xsave;
>> -  int i;
>> +  int i, zmm_endlo_regnum = I387_ZMM0H_REGNUM (tdep)
>> +    + std::min (tdep->num_zmm_regs, 16);
>
>The GNU standard requires to parenthesis when breaking lines:
>
>int i, zmm_endlo_regnum = (I387_ZMM0H_REGNUM (tdep)
>+ std::min (tdep->num_zmm_regs, 16));

This is easy enough to fix (perhaps even without the need to send a v2,
but just before/while committing?); I don't think this is a rule being
enforced on the binutils side, so I simply wasn't aware.

Jan



Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] x86-64: fix ZMM register state tracking

Simon Marchi
On 2018-09-10 07:25, Jan Beulich wrote:

>>>> Simon Marchi <[hidden email]> 09/08/18 1:13 AM >>>
>> Would it be possible to update or create a test to exercise that?
>> arch-specific tests are in testsuite/gdb.arch.
>
> I'm sure it would be possible, but while I was happy to invest the time
> to
> fix the actual bug (because it affects work I'm doing), I'm afraid I
> don't have
> the time to learn how gdb test cases are to be constructed (I'm
> familiar
> only with the binutils / gas side of things).

I understand.  If you can provide:

  - a minimal source file (C + assembly in this case, I suppose)
  - GDB commands to reproduce the bug
  - actual and expected output

I can take care of turning it in a GDB test case.

>> On 2018-09-05 02:22 PM, Jan Beulich wrote:
>>> --- a/gdb/i387-tdep.c
>>> +++ b/gdb/i387-tdep.c
>>> @@ -923,7 +923,8 @@ i387_supply_xsave (struct regcache *regc
>>>    enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
>>>    struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
>>>    const gdb_byte *regs = (const gdb_byte *) xsave;
>>> -  int i;
>>> +  int i, zmm_endlo_regnum = I387_ZMM0H_REGNUM (tdep)
>>> +    + std::min (tdep->num_zmm_regs, 16);
>>
>> The GNU standard requires to parenthesis when breaking lines:
>>
>> int i, zmm_endlo_regnum = (I387_ZMM0H_REGNUM (tdep)
>> + std::min (tdep->num_zmm_regs, 16));
>
> This is easy enough to fix (perhaps even without the need to send a v2,
> but just before/while committing?); I don't think this is a rule being
> enforced on the binutils side, so I simply wasn't aware.

Yeah, no problem, I can do it.  It's suggested in the GNU coding
standards, so I think it would apply to binutils too:

   https://www.gnu.org/prep/standards/html_node/Formatting.html

Search for "Adding a set of parentheses".

Simon
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] x86-64: fix ZMM register state tracking

Pedro Alves-7
In reply to this post by Jan Beulich-2
On 09/05/2018 02:22 PM, Jan Beulich wrote:

> The three AVX512 state components are entirely independent - one being
> in its "init state" has no implication whatsoever on either of the other
> two. Fully separate X86_XSTATE_ZMM_H and X86_XSTATE_ZMM handling, to
> prevent upper halves of the upper 16 ZMM registers to display as if they
> were zero (when they aren't) after e.g. VZEROALL/VZEROUPPER.
>
> gdb/
> 2018-09-05  Jan Beulich  <[hidden email]>
>
> * i387-tdep.c (i387_supply_xsave): Split handling of
> X86_XSTATE_ZMM_H and X86_XSTATE_ZMM.
> (i387_collect_xsave): Likewise.

Does gdb/gdbserver/i387-fp.c need similar treatment?

Thanks,
Pedro Alves
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] x86-64: fix ZMM register state tracking

Jan Beulich-2
>>> On 11.09.18 at 12:34, <[hidden email]> wrote:
> On 09/05/2018 02:22 PM, Jan Beulich wrote:
>> The three AVX512 state components are entirely independent - one being
>> in its "init state" has no implication whatsoever on either of the other
>> two. Fully separate X86_XSTATE_ZMM_H and X86_XSTATE_ZMM handling, to
>> prevent upper halves of the upper 16 ZMM registers to display as if they
>> were zero (when they aren't) after e.g. VZEROALL/VZEROUPPER.
>>
>> gdb/
>> 2018-09-05  Jan Beulich  <[hidden email]>
>>
>> * i387-tdep.c (i387_supply_xsave): Split handling of
>> X86_XSTATE_ZMM_H and X86_XSTATE_ZMM.
>> (i387_collect_xsave): Likewise.
>
> Does gdb/gdbserver/i387-fp.c need similar treatment?

Not afaics - there's no place where both flags would be tested at
the same time (other than was the case here before this patch).

Jan


Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] x86-64: fix ZMM register state tracking

Jan Beulich-2
In reply to this post by Simon Marchi
>>> On 10.09.18 at 15:01, <[hidden email]> wrote:

> On 2018-09-10 07:25, Jan Beulich wrote:
>>>>> Simon Marchi <[hidden email]> 09/08/18 1:13 AM >>>
>>> Would it be possible to update or create a test to exercise that?
>>> arch-specific tests are in testsuite/gdb.arch.
>>
>> I'm sure it would be possible, but while I was happy to invest the time
>> to
>> fix the actual bug (because it affects work I'm doing), I'm afraid I
>> don't have
>> the time to learn how gdb test cases are to be constructed (I'm
>> familiar
>> only with the binutils / gas side of things).
>
> I understand.  If you can provide:
>
>   - a minimal source file (C + assembly in this case, I suppose)
>   - GDB commands to reproduce the bug
>   - actual and expected output
Attached. vzero.s is the source file used (no C file needed). gdb.log
is a transcript of a session with a broken gdb (the one installed on
the system), while gdb.txt is a transcript for the fixed one that I've
built myself.

> I can take care of turning it in a GDB test case.

Thanks.

Jan



vzero.s (196 bytes) Download Attachment
gdb.log (2K) Download Attachment
gdb.txt (1K) Download Attachment
Reply | Threaded
Open this post in threaded view
|

RE: [PATCH] x86-64: fix ZMM register state tracking

Metzger, Markus T
In reply to this post by Jan Beulich-2
Hello Jan,

> The three AVX512 state components are entirely independent - one being in its "init
> state" has no implication whatsoever on either of the other two. Fully separate
> X86_XSTATE_ZMM_H and X86_XSTATE_ZMM handling, to prevent upper halves of
> the upper 16 ZMM registers to display as if they were zero (when they aren't) after
> e.g. VZEROALL/VZEROUPPER.
>
> gdb/
> 2018-09-05  Jan Beulich  <[hidden email]>
>
> * i387-tdep.c (i387_supply_xsave): Split handling of
> X86_XSTATE_ZMM_H and X86_XSTATE_ZMM.
> (i387_collect_xsave): Likewise.
>
> --- a/gdb/i387-tdep.c
> +++ b/gdb/i387-tdep.c
> @@ -923,7 +923,8 @@ i387_supply_xsave (struct regcache *regc
>    enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
>    struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
>    const gdb_byte *regs = (const gdb_byte *) xsave;
> -  int i;
> +  int i, zmm_endlo_regnum = I387_ZMM0H_REGNUM (tdep)
> +    + std::min (tdep->num_zmm_regs, 16);

It would be nice to comment on this magic 16 and the min operation.
It's how XSAVE organizes things but it isn't entirely intuitive.


>    ULONGEST clear_bv;
>    static const gdb_byte zero[I386_MAX_REGISTER_SIZE] = { 0 };
>    enum
> @@ -1002,7 +1003,8 @@ i387_supply_xsave (struct regcache *regc
>        return;
>
>      case avx512_zmm_h:
> -      if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
> +      if ((clear_bv & (regnum < zmm_endlo_regnum ? X86_XSTATE_ZMM_H
> + : X86_XSTATE_ZMM)))

A comment that XSAVE stores the lower 16 registers in a different place
than the higher 16 registers and also guards them by different XCR0 bits
would be nice.

We hid the different places behind those XSAVE_AVX512_ZMM_H_ADDR
macros but there's nothing similar for the guard bits.  Maybe add macros
for the guard check, as well?


>   regcache->raw_supply (regnum, zero);
>        else
>   regcache->raw_supply (regnum,
> @@ -1080,21 +1082,17 @@ i387_supply_xsave (struct regcache *regc
>      }
>   }
>
> -      /* Handle the upper ZMM registers.  */
> -      if ((tdep->xcr0 & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
> +      /* Handle the upper halves of the low 8/16 ZMM registers.  */
> +      if ((tdep->xcr0 & X86_XSTATE_ZMM_H))
>   {
> -  if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
> +  if ((clear_bv & X86_XSTATE_ZMM_H))
>      {
> -      for (i = I387_ZMM0H_REGNUM (tdep);
> -   i < I387_ZMMENDH_REGNUM (tdep);
> -   i++)
> +      for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
>   regcache->raw_supply (i, zero);
>      }
>    else
>      {
> -      for (i = I387_ZMM0H_REGNUM (tdep);
> -   i < I387_ZMMENDH_REGNUM (tdep);
> -   i++)
> +      for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
>   regcache->raw_supply (i,
>        XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i));
>      }
> @@ -1119,11 +1117,13 @@ i387_supply_xsave (struct regcache *regc
>      }
>   }
>
> -      /* Handle the YMM_AVX512 registers.  */
> +      /* Handle the upper 16 ZMM/YMM/XMM registers (if any).  */
>        if ((tdep->xcr0 & X86_XSTATE_ZMM))
>   {
>    if ((clear_bv & X86_XSTATE_ZMM))
>      {
> +      for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
> + regcache->raw_supply (i, zero);
>        for (i = I387_YMM16H_REGNUM (tdep);
>     i < I387_YMMH_AVX512_END_REGNUM (tdep);
>     i++)
> @@ -1135,6 +1135,9 @@ i387_supply_xsave (struct regcache *regc
>      }
>    else
>      {
> +      for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
> + regcache->raw_supply (i,
> +      XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i));

This covers the upper halves of zmm16 to zmm31.  Looking at the function it looks
like the lower halves are covered in separate cases avx512_ymmh_avx512 and
avx512_xmmh_avx512.  Maybe reflect this in the comment?  It currently suggests
that it handles the entire upper 16 registers.


>        for (i = I387_YMM16H_REGNUM (tdep);
>     i < I387_YMMH_AVX512_END_REGNUM (tdep);
>     i++)
> @@ -1340,7 +1343,8 @@ i387_collect_xsave (const struct regcach
>    gdb_byte *p, *regs = (gdb_byte *) xsave;
>    gdb_byte raw[I386_MAX_REGISTER_SIZE];
>    ULONGEST initial_xstate_bv, clear_bv, xstate_bv = 0;
> -  unsigned int i;
> +  unsigned int i, zmm_endlo_regnum = I387_ZMM0H_REGNUM (tdep)
> +     + std::min (tdep->num_zmm_regs, 16);
>    enum
>      {
>        x87_ctrl_or_mxcsr = 0x1,
> @@ -1441,9 +1445,8 @@ i387_collect_xsave (const struct regcach
>       i < I387_MPXEND_REGNUM (tdep); i++)
>    memset (XSAVE_MPX_ADDR (tdep, regs, i), 0, 8);
>
> -      if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
> - for (i = I387_ZMM0H_REGNUM (tdep);
> -     i < I387_ZMMENDH_REGNUM (tdep); i++)
> +      if ((clear_bv & X86_XSTATE_ZMM_H))
> + for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
>    memset (XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i), 0, 32);
>
>        if ((clear_bv & X86_XSTATE_K))
> @@ -1453,6 +1456,8 @@ i387_collect_xsave (const struct regcach
>
>        if ((clear_bv & X86_XSTATE_ZMM))
>   {
> +  for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
> +    memset (XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i), 0, 32);
>    for (i = I387_YMM16H_REGNUM (tdep);
>         i < I387_YMMH_AVX512_END_REGNUM (tdep); i++)
>      memset (XSAVE_YMM_AVX512_ADDR (tdep, regs, i), 0, 16);

Looks OK to me.

Regards,
Markus.

Intel Deutschland GmbH
Registered Address: Am Campeon 10-12, 85579 Neubiberg, Germany
Tel: +49 89 99 8853-0, www.intel.de
Managing Directors: Christin Eisenschmid, Christian Lamprechter
Chairperson of the Supervisory Board: Nicole Lau
Registered Office: Munich
Commercial Register: Amtsgericht Muenchen HRB 186928

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] x86-64: fix ZMM register state tracking

Simon Marchi-2
In reply to this post by Jan Beulich-2
On 2018-09-18 09:37 AM, Jan Beulich wrote:

>>>> On 10.09.18 at 15:01, <[hidden email]> wrote:
>> On 2018-09-10 07:25, Jan Beulich wrote:
>>>>>> Simon Marchi <[hidden email]> 09/08/18 1:13 AM >>>
>>>> Would it be possible to update or create a test to exercise that?
>>>> arch-specific tests are in testsuite/gdb.arch.
>>>
>>> I'm sure it would be possible, but while I was happy to invest the time
>>> to
>>> fix the actual bug (because it affects work I'm doing), I'm afraid I
>>> don't have
>>> the time to learn how gdb test cases are to be constructed (I'm
>>> familiar
>>> only with the binutils / gas side of things).
>>
>> I understand.  If you can provide:
>>
>>   - a minimal source file (C + assembly in this case, I suppose)
>>   - GDB commands to reproduce the bug
>>   - actual and expected output
>
> Attached. vzero.s is the source file used (no C file needed). gdb.log
> is a transcript of a session with a broken gdb (the one installed on
> the system), while gdb.txt is a transcript for the fixed one that I've
> built myself.
>
>> I can take care of turning it in a GDB test case.
>
> Thanks.
>
> Jan
>
>

Hi Jan,

Thanks for the instructions.  There is already a test covering AVX512
instructions, so I figured I would add it there.  However, I don't
have a processor that supports AVX512, so I'm unable to run the test.

Here's a patch, can you try to confirm that the test fails without the
fix and passes with the fix?  I probably screwed up somewhere, but it
should be pretty close.

You can run the test with

  $ make check TESTS="gdb.arch/i386-avx512.exp"

in the gdb/ build directory.  The gdb/testsuite/gdb.log file is useful
to look at in case something fails.

Feel free to integrate this in your eventual v2 if there is one, and
feel free to add a comment in the test to say what we are testing here,
as I am not too sure how to describe it.

Thanks!

Simon


From a167d21f452f6f3f19eac6623fa872fde1162128 Mon Sep 17 00:00:00 2001
From: Simon Marchi <[hidden email]>
Date: Mon, 24 Sep 2018 23:21:19 -0400
Subject: [PATCH] AVX512 test

---
 gdb/testsuite/gdb.arch/i386-avx512.c   | 5 +++++
 gdb/testsuite/gdb.arch/i386-avx512.exp | 8 ++++++++
 2 files changed, 13 insertions(+)

diff --git a/gdb/testsuite/gdb.arch/i386-avx512.c b/gdb/testsuite/gdb.arch/i386-avx512.c
index 9349f09d62e..537adb3ab86 100644
--- a/gdb/testsuite/gdb.arch/i386-avx512.c
+++ b/gdb/testsuite/gdb.arch/i386-avx512.c
@@ -249,6 +249,11 @@ main (int argc, char **argv)
  move back to array and check values.  */
       move_zmm_data_to_memory ();
       asm ("nop"); /* sixth breakpoint here  */
+
+      asm ("vpternlogd $0xff, %zmm0, %zmm0, %zmm0\n"
+   "vpternlogd $0xff, %zmm0, %zmm0, %zmm16\n"
+   "vzeroupper\n");
+      asm ("nop"); /* seventh breakpoint here  */
     }

   return 0;
diff --git a/gdb/testsuite/gdb.arch/i386-avx512.exp b/gdb/testsuite/gdb.arch/i386-avx512.exp
index d806d5f9a94..de2f62c3e1f 100644
--- a/gdb/testsuite/gdb.arch/i386-avx512.exp
+++ b/gdb/testsuite/gdb.arch/i386-avx512.exp
@@ -174,3 +174,11 @@ for { set r 0 } { $r < $nr_regs } { incr r } {
         ".. = \\{f = \\{[expr $r + 30], [expr $r.125 + 30], [expr $r.25 + 20], [expr $r.375 + 20], [expr $r.5 + 10], [expr $r.625 + 10], [expr $r.75 + 10], [expr $r.875 + 10]\\}\\}.*" \
         "check contents of zmm_data\[$r\] after writing XMM regs"
 }
+
+gdb_test "break [gdb_get_line_number "seventh breakpoint here"]" \
+    "Breakpoint .* at .*i386-avx512.c.*" \
+    "set seventh breakpoint in main"
+gdb_continue_to_breakpoint "continue to seventh breakpoint in main"
+for { set r 16 } { $r < $nr_regs } { incr r } {
+    gdb_test "print \$zmm${r}.v16_int32" "{-1 <repeats 16 times>}"
+}
--
2.19.0


Reply | Threaded
Open this post in threaded view
|

RE: [PATCH] x86-64: fix ZMM register state tracking

Jan Beulich-2
In reply to this post by Metzger, Markus T
>>> On 24.09.18 at 19:19, <[hidden email]> wrote:
>> --- a/gdb/i387-tdep.c
>> +++ b/gdb/i387-tdep.c
>> @@ -923,7 +923,8 @@ i387_supply_xsave (struct regcache *regc
>>    enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
>>    struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
>>    const gdb_byte *regs = (const gdb_byte *) xsave;
>> -  int i;
>> +  int i, zmm_endlo_regnum = I387_ZMM0H_REGNUM (tdep)
>> +    + std::min (tdep->num_zmm_regs, 16);
>
> It would be nice to comment on this magic 16 and the min operation.
> It's how XSAVE organizes things but it isn't entirely intuitive.

Okay, I can accept the desire for a (new) comment here, albeit it pretty
much goes along the lines of what I say further down.

>> @@ -1002,7 +1003,8 @@ i387_supply_xsave (struct regcache *regc
>>        return;
>>
>>      case avx512_zmm_h:
>> -      if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
>> +      if ((clear_bv & (regnum < zmm_endlo_regnum ? X86_XSTATE_ZMM_H
>> + : X86_XSTATE_ZMM)))
>
> A comment that XSAVE stores the lower 16 registers in a different place
> than the higher 16 registers and also guards them by different XCR0 bits
> would be nice.

Such a comment would have been appropriate already prior to my patch.
I have to admit that I'm not inclined to invest time in improving
pre-existing (lack of) commentary, the more that how to exactly word
this might be controversial. If that's really a requirement, then I'd
abandon the patch (perhaps for someone else to pick up), maintaining it
locally until the bug has been fixed upstream by whatever means. This
is even more so that the extra time I'll have to put into playing with
Simon's test case is also nothing I really have time for (but I'll try to
find the time nevertheless).

> We hid the different places behind those XSAVE_AVX512_ZMM_H_ADDR
> macros but there's nothing similar for the guard bits.  Maybe add macros
> for the guard check, as well?

Same here - nothing that this patch really changes.

>> @@ -1135,6 +1135,9 @@ i387_supply_xsave (struct regcache *regc
>>      }
>>    else
>>      {
>> +      for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
>> + regcache->raw_supply (i,
>> +      XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i));
>
> This covers the upper halves of zmm16 to zmm31.  Looking at the function it looks
> like the lower halves are covered in separate cases avx512_ymmh_avx512 and
> avx512_xmmh_avx512.  Maybe reflect this in the comment?  It currently suggests
> that it handles the entire upper 16 registers.

Same here - the split among code regions has been there before (also
for e.g. the YMM register lower and upper halves).

Irrespective of my mostly negative responses thanks for your
comments, Jan


Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] x86-64: fix ZMM register state tracking

Jan Beulich-2
In reply to this post by Simon Marchi-2
>>> On 25.09.18 at 05:28, <[hidden email]> wrote:
> On 2018-09-18 09:37 AM, Jan Beulich wrote:
> Thanks for the instructions.  There is already a test covering AVX512
> instructions, so I figured I would add it there.  However, I don't
> have a processor that supports AVX512, so I'm unable to run the test.
>
> Here's a patch, can you try to confirm that the test fails without the
> fix and passes with the fix?  I probably screwed up somewhere, but it
> should be pretty close.

There are two issues here: First of all, unrelated to this patch, the
construct around line 95 in i386-avx512.exp should look like

if [is_amd64_regs_target] {
    set nr_regs 32
} else {
    set nr_regs 8
}

Of course this also affects other tests in here, but without this correction
the loop you add does nothing at all.

And then that very loop and the i386-avx512.c addition are not in sync,
and I'm not sure which way you meant it to be: Either in the C file all 16
upper ZMM registers need to be set identically (not just ZMM16), or
there should be no loop.

Furthermore I think the C code addition and hence the test will need to
be x86-64-specific, as registers ZMM8 and higher are inaccessible in
32-bit mode.

So what I can confirm at this point is that with the fix in place there's
one less new failure from the test than with the fix no in place.

Jan


Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] x86-64: fix ZMM register state tracking

Simon Marchi-2
On 2018-09-25 11:04 AM, Jan Beulich wrote:

>>>> On 25.09.18 at 05:28, <[hidden email]> wrote:
>> On 2018-09-18 09:37 AM, Jan Beulich wrote:
>> Thanks for the instructions.  There is already a test covering AVX512
>> instructions, so I figured I would add it there.  However, I don't
>> have a processor that supports AVX512, so I'm unable to run the test.
>>
>> Here's a patch, can you try to confirm that the test fails without the
>> fix and passes with the fix?  I probably screwed up somewhere, but it
>> should be pretty close.
>
> There are two issues here: First of all, unrelated to this patch, the
> construct around line 95 in i386-avx512.exp should look like
>
> if [is_amd64_regs_target] {
>     set nr_regs 32
> } else {
>     set nr_regs 8
> }
>
> Of course this also affects other tests in here, but without this correction
> the loop you add does nothing at all.

Thanks, this has now been fixed in master.

> And then that very loop and the i386-avx512.c addition are not in sync,
> and I'm not sure which way you meant it to be: Either in the C file all 16
> upper ZMM registers need to be set identically (not just ZMM16), or
> there should be no loop.

Woops.  Testing only zmm0 and zmm16 will be enough I think.

> Furthermore I think the C code addition and hence the test will need to
> be x86-64-specific, as registers ZMM8 and higher are inaccessible in
> 32-bit mode.

Good point.

Here's the revised version with this fixed.  I am not sure about the output
for zmm0 though.


From cd9f3e298a3a516298d3fea15ba80b3eaa33cc7c Mon Sep 17 00:00:00 2001
From: Simon Marchi <[hidden email]>
Date: Mon, 24 Sep 2018 23:28:28 -0400
Subject: [PATCH] AVX512 test

---
 gdb/testsuite/gdb.arch/i386-avx512.c   |  7 +++++++
 gdb/testsuite/gdb.arch/i386-avx512.exp | 10 ++++++++++
 2 files changed, 17 insertions(+)

diff --git a/gdb/testsuite/gdb.arch/i386-avx512.c b/gdb/testsuite/gdb.arch/i386-avx512.c
index 9349f09d62e..7d088ed0343 100644
--- a/gdb/testsuite/gdb.arch/i386-avx512.c
+++ b/gdb/testsuite/gdb.arch/i386-avx512.c
@@ -249,6 +249,13 @@ main (int argc, char **argv)
  move back to array and check values.  */
       move_zmm_data_to_memory ();
       asm ("nop"); /* sixth breakpoint here  */
+
+      asm ("vpternlogd $0xff, %zmm0, %zmm0, %zmm0");
+#ifdef __x86_64__s
+      asm ("vpternlogd $0xff, %zmm0, %zmm0, %zmm16");
+#endif
+      asm ("vzeroupper");
+      asm ("nop"); /* seventh breakpoint here  */
     }

   return 0;
diff --git a/gdb/testsuite/gdb.arch/i386-avx512.exp b/gdb/testsuite/gdb.arch/i386-avx512.exp
index cd15e05fd03..43fde12f257 100644
--- a/gdb/testsuite/gdb.arch/i386-avx512.exp
+++ b/gdb/testsuite/gdb.arch/i386-avx512.exp
@@ -174,3 +174,13 @@ for { set r 0 } { $r < $nr_regs } { incr r } {
         ".. = \\{f = \\{[expr $r + 30], [expr $r.125 + 30], [expr $r.25 + 20], [expr $r.375 + 20], [expr $r.5 + 10], [expr $r.625 + 10], [expr $r.75 + 10], [expr $r.875 + 10]\\}\\}.*" \
         "check contents of zmm_data\[$r\] after writing XMM regs"
 }
+
+gdb_test "break [gdb_get_line_number "seventh breakpoint here"]" \
+    "Breakpoint .* at .*i386-avx512.c.*" \
+    "set seventh breakpoint in main"
+gdb_continue_to_breakpoint "continue to seventh breakpoint in main"
+gdb_test "print \$zmm0.v16_int32" "= {-1, -1, -1, -1, 0 <repeats 12 times>}"
+
+if { $nr_regs >= 16 } {
+    gdb_test "print \$zmm16.v16_int32" "= {-1 <repeats 16 times>}"
+}
--
2.19.0



Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] x86-64: fix ZMM register state tracking

Jan Beulich-2
>>> Simon Marchi <[hidden email]> 10/02/18 9:20 PM >>>
>On 2018-09-25 11:04 AM, Jan Beulich wrote:
>>>>> On 25.09.18 at 05:28, <[hidden email]> wrote:
>>> On 2018-09-18 09:37 AM, Jan Beulich wrote:
>>> Thanks for the instructions.  There is already a test covering AVX512
>>> instructions, so I figured I would add it there.  However, I don't
>>> have a processor that supports AVX512, so I'm unable to run the test.
>>>
>>> Here's a patch, can you try to confirm that the test fails without the
>>> fix and passes with the fix?  I probably screwed up somewhere, but it
>>> should be pretty close.
>>
>> There are two issues here: First of all, unrelated to this patch, the
>> construct around line 95 in i386-avx512.exp should look like
>>
>> if [is_amd64_regs_target] {
>>     set nr_regs 32
>> } else {
>>     set nr_regs 8
>> }
>>
>> Of course this also affects other tests in here, but without this correction
>> the loop you add does nothing at all.
>
>Thanks, this has now been fixed in master.

Ah, good to know.


>Here's the revised version with this fixed.  I am not sure about the output
>for zmm0 though.

I'll give this a go and adjust if need be, but it'll likely take me a couple of
days to get to it. I take it that ...


>--- a/gdb/testsuite/gdb.arch/i386-avx512.c
>+++ b/gdb/testsuite/gdb.arch/i386-avx512.c
>@@ -249,6 +249,13 @@ main (int argc, char **argv)
>move back to array and check values.  */
>move_zmm_data_to_memory ();
>asm ("nop"); /* sixth breakpoint here  */
>+
>+      asm ("vpternlogd $0xff, %zmm0, %zmm0, %zmm0");
>+#ifdef __x86_64__s

... the trailing s here simply is a typo.

Jan




Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] x86-64: fix ZMM register state tracking

Simon Marchi
On 2018-10-03 10:30, Jan Beulich wrote:
>> Here's the revised version with this fixed.  I am not sure about the
>> output
>> for zmm0 though.
>
> I'll give this a go and adjust if need be, but it'll likely take me a
> couple of
> days to get to it. I take it that ...

There's no rush, thanks for helping.

>> --- a/gdb/testsuite/gdb.arch/i386-avx512.c
>> +++ b/gdb/testsuite/gdb.arch/i386-avx512.c
>> @@ -249,6 +249,13 @@ main (int argc, char **argv)
>> move back to array and check values.  */
>> move_zmm_data_to_memory ();
>> asm ("nop"); /* sixth breakpoint here  */
>> +
>> +      asm ("vpternlogd $0xff, %zmm0, %zmm0, %zmm0");
>> +#ifdef __x86_64__s
>
> ... the trailing s here simply is a typo.

Arrrg, indeed.

Simon
Reply | Threaded
Open this post in threaded view
|

[PATCH v2] x86-64: fix ZMM register state tracking

Jan Beulich-2
In reply to this post by Jan Beulich-2
The three AVX512 state components are entirely independent - one being
in its "init state" has no implication whatsoever on either of the other
two. Fully separate X86_XSTATE_ZMM_H and X86_XSTATE_ZMM handling, to
prevent upper halves of the upper 16 ZMM registers to display as if they
were zero (when they aren't) after e.g. VZEROALL/VZEROUPPER.

gdb/
2018-10-10  Jan Beulich  <[hidden email]>

        * i387-tdep.c (i387_supply_xsave): Split handling of
        X86_XSTATE_ZMM_H and X86_XSTATE_ZMM.
        (i387_collect_xsave): Likewise.

gdb/testsuite/
2018-10-10  Simon Marchi <[hidden email]>

        * testsuite/gdb.arch/i386-avx512.c,
        testsuite/gdb.arch/i386-avx512.exp: Add 7th test.

---
v2: Attach comments to zmm_endlo_regnum declarations. Add testcase
    provided by Simon.

--- a/gdb/i387-tdep.c
+++ b/gdb/i387-tdep.c
@@ -924,6 +924,12 @@ i387_supply_xsave (struct regcache *regc
   struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
   const gdb_byte *regs = (const gdb_byte *) xsave;
   int i;
+  /* In 64-bit mode the split between "low" and "high" ZMM registers is at
+     ZMM16.  Outside of 64-bit mode there are no "high" ZMM registers at all.
+     Precalculate the number to be used for the split point, with the all
+     registers in the "low" portion outside of 64-bit mode.  */
+  unsigned int zmm_endlo_regnum = I387_ZMM0H_REGNUM (tdep)
+  + std::min (tdep->num_zmm_regs, 16);
   ULONGEST clear_bv;
   static const gdb_byte zero[I386_MAX_REGISTER_SIZE] = { 0 };
   enum
@@ -1002,7 +1008,8 @@ i387_supply_xsave (struct regcache *regc
       return;
 
     case avx512_zmm_h:
-      if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
+      if ((clear_bv & (regnum < zmm_endlo_regnum ? X86_XSTATE_ZMM_H
+ : X86_XSTATE_ZMM)))
  regcache->raw_supply (regnum, zero);
       else
  regcache->raw_supply (regnum,
@@ -1080,21 +1087,17 @@ i387_supply_xsave (struct regcache *regc
     }
  }
 
-      /* Handle the upper ZMM registers.  */
-      if ((tdep->xcr0 & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
+      /* Handle the upper halves of the low 8/16 ZMM registers.  */
+      if ((tdep->xcr0 & X86_XSTATE_ZMM_H))
  {
-  if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
+  if ((clear_bv & X86_XSTATE_ZMM_H))
     {
-      for (i = I387_ZMM0H_REGNUM (tdep);
-   i < I387_ZMMENDH_REGNUM (tdep);
-   i++)
+      for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
  regcache->raw_supply (i, zero);
     }
   else
     {
-      for (i = I387_ZMM0H_REGNUM (tdep);
-   i < I387_ZMMENDH_REGNUM (tdep);
-   i++)
+      for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
  regcache->raw_supply (i,
       XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i));
     }
@@ -1119,11 +1122,13 @@ i387_supply_xsave (struct regcache *regc
     }
  }
 
-      /* Handle the YMM_AVX512 registers.  */
+      /* Handle the upper 16 ZMM/YMM/XMM registers (if any).  */
       if ((tdep->xcr0 & X86_XSTATE_ZMM))
  {
   if ((clear_bv & X86_XSTATE_ZMM))
     {
+      for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
+ regcache->raw_supply (i, zero);
       for (i = I387_YMM16H_REGNUM (tdep);
    i < I387_YMMH_AVX512_END_REGNUM (tdep);
    i++)
@@ -1135,6 +1140,9 @@ i387_supply_xsave (struct regcache *regc
     }
   else
     {
+      for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
+ regcache->raw_supply (i,
+      XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i));
       for (i = I387_YMM16H_REGNUM (tdep);
    i < I387_YMMH_AVX512_END_REGNUM (tdep);
    i++)
@@ -1341,6 +1349,9 @@ i387_collect_xsave (const struct regcach
   gdb_byte raw[I386_MAX_REGISTER_SIZE];
   ULONGEST initial_xstate_bv, clear_bv, xstate_bv = 0;
   unsigned int i;
+  /* See the comment in i387_supply_xsave().  */
+  unsigned int zmm_endlo_regnum = I387_ZMM0H_REGNUM (tdep)
+  + std::min (tdep->num_zmm_regs, 16);
   enum
     {
       x87_ctrl_or_mxcsr = 0x1,
@@ -1441,9 +1452,8 @@ i387_collect_xsave (const struct regcach
      i < I387_MPXEND_REGNUM (tdep); i++)
   memset (XSAVE_MPX_ADDR (tdep, regs, i), 0, 8);
 
-      if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
- for (i = I387_ZMM0H_REGNUM (tdep);
-     i < I387_ZMMENDH_REGNUM (tdep); i++)
+      if ((clear_bv & X86_XSTATE_ZMM_H))
+ for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
   memset (XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i), 0, 32);
 
       if ((clear_bv & X86_XSTATE_K))
@@ -1453,6 +1463,8 @@ i387_collect_xsave (const struct regcach
 
       if ((clear_bv & X86_XSTATE_ZMM))
  {
+  for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
+    memset (XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i), 0, 32);
   for (i = I387_YMM16H_REGNUM (tdep);
        i < I387_YMMH_AVX512_END_REGNUM (tdep); i++)
     memset (XSAVE_YMM_AVX512_ADDR (tdep, regs, i), 0, 16);
--- a/gdb/testsuite/gdb.arch/i386-avx512.c
+++ b/gdb/testsuite/gdb.arch/i386-avx512.c
@@ -249,6 +249,13 @@ main (int argc, char **argv)
  move back to array and check values.  */
       move_zmm_data_to_memory ();
       asm ("nop"); /* sixth breakpoint here  */
+
+      asm ("vpternlogd $0xff, %zmm0, %zmm0, %zmm0");
+#ifdef __x86_64__
+      asm ("vpternlogd $0xff, %zmm0, %zmm0, %zmm16");
+#endif
+      asm ("vzeroupper");
+      asm ("nop"); /* seventh breakpoint here  */
     }
 
   return 0;
--- a/gdb/testsuite/gdb.arch/i386-avx512.exp
+++ b/gdb/testsuite/gdb.arch/i386-avx512.exp
@@ -174,3 +174,13 @@ for { set r 0 } { $r < $nr_regs } { incr
         ".. = \\{f = \\{[expr $r + 30], [expr $r.125 + 30], [expr $r.25 + 20], [expr $r.375 + 20], [expr $r.5 + 10], [expr $r.625 + 10], [expr $r.75 + 10], [expr $r.875 + 10]\\}\\}.*" \
         "check contents of zmm_data\[$r\] after writing XMM regs"
 }
+
+gdb_test "break [gdb_get_line_number "seventh breakpoint here"]" \
+    "Breakpoint .* at .*i386-avx512.c.*" \
+    "set seventh breakpoint in main"
+gdb_continue_to_breakpoint "continue to seventh breakpoint in main"
+gdb_test "print \$zmm0.v16_int32" "= {-1, -1, -1, -1, 0 <repeats 12 times>}"
+
+if { $nr_regs >= 16 } {
+    gdb_test "print \$zmm16.v16_int32" "= {-1 <repeats 16 times>}"
+}



Reply | Threaded
Open this post in threaded view
|

Ping: [PATCH v2] x86-64: fix ZMM register state tracking

Jan Beulich-2
In reply to this post by Jan Beulich-2
>>> On 10.10.18 at 17:12,  wrote:
> The three AVX512 state components are entirely independent - one being
> in its "init state" has no implication whatsoever on either of the other
> two. Fully separate X86_XSTATE_ZMM_H and X86_XSTATE_ZMM handling, to
> prevent upper halves of the upper 16 ZMM registers to display as if they
> were zero (when they aren't) after e.g. VZEROALL/VZEROUPPER.
>
> gdb/
> 2018-10-10  Jan Beulich  <[hidden email]>
>
> * i387-tdep.c (i387_supply_xsave): Split handling of
> X86_XSTATE_ZMM_H and X86_XSTATE_ZMM.
> (i387_collect_xsave): Likewise.
>
> gdb/testsuite/
> 2018-10-10  Simon Marchi <[hidden email]>
>
> * testsuite/gdb.arch/i386-avx512.c,
> testsuite/gdb.arch/i386-avx512.exp: Add 7th test.
>
> ---
> v2: Attach comments to zmm_endlo_regnum declarations. Add testcase
>     provided by Simon.
>
> --- a/gdb/i387-tdep.c
> +++ b/gdb/i387-tdep.c
> @@ -924,6 +924,12 @@ i387_supply_xsave (struct regcache *regc
>    struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
>    const gdb_byte *regs = (const gdb_byte *) xsave;
>    int i;
> +  /* In 64-bit mode the split between "low" and "high" ZMM registers is at
> +     ZMM16.  Outside of 64-bit mode there are no "high" ZMM registers at
> all.
> +     Precalculate the number to be used for the split point, with the all
> +     registers in the "low" portion outside of 64-bit mode.  */
> +  unsigned int zmm_endlo_regnum = I387_ZMM0H_REGNUM (tdep)
> +  + std::min (tdep->num_zmm_regs, 16);
>    ULONGEST clear_bv;
>    static const gdb_byte zero[I386_MAX_REGISTER_SIZE] = { 0 };
>    enum
> @@ -1002,7 +1008,8 @@ i387_supply_xsave (struct regcache *regc
>        return;
>  
>      case avx512_zmm_h:
> -      if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
> +      if ((clear_bv & (regnum < zmm_endlo_regnum ? X86_XSTATE_ZMM_H
> + : X86_XSTATE_ZMM)))
>   regcache->raw_supply (regnum, zero);
>        else
>   regcache->raw_supply (regnum,
> @@ -1080,21 +1087,17 @@ i387_supply_xsave (struct regcache *regc
>      }
>   }
>  
> -      /* Handle the upper ZMM registers.  */
> -      if ((tdep->xcr0 & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
> +      /* Handle the upper halves of the low 8/16 ZMM registers.  */
> +      if ((tdep->xcr0 & X86_XSTATE_ZMM_H))
>   {
> -  if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
> +  if ((clear_bv & X86_XSTATE_ZMM_H))
>      {
> -      for (i = I387_ZMM0H_REGNUM (tdep);
> -   i < I387_ZMMENDH_REGNUM (tdep);
> -   i++)
> +      for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
>   regcache->raw_supply (i, zero);
>      }
>    else
>      {
> -      for (i = I387_ZMM0H_REGNUM (tdep);
> -   i < I387_ZMMENDH_REGNUM (tdep);
> -   i++)
> +      for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
>   regcache->raw_supply (i,
>        XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i));
>      }
> @@ -1119,11 +1122,13 @@ i387_supply_xsave (struct regcache *regc
>      }
>   }
>  
> -      /* Handle the YMM_AVX512 registers.  */
> +      /* Handle the upper 16 ZMM/YMM/XMM registers (if any).  */
>        if ((tdep->xcr0 & X86_XSTATE_ZMM))
>   {
>    if ((clear_bv & X86_XSTATE_ZMM))
>      {
> +      for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
> + regcache->raw_supply (i, zero);
>        for (i = I387_YMM16H_REGNUM (tdep);
>     i < I387_YMMH_AVX512_END_REGNUM (tdep);
>     i++)
> @@ -1135,6 +1140,9 @@ i387_supply_xsave (struct regcache *regc
>      }
>    else
>      {
> +      for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
> + regcache->raw_supply (i,
> +      XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i));
>        for (i = I387_YMM16H_REGNUM (tdep);
>     i < I387_YMMH_AVX512_END_REGNUM (tdep);
>     i++)
> @@ -1341,6 +1349,9 @@ i387_collect_xsave (const struct regcach
>    gdb_byte raw[I386_MAX_REGISTER_SIZE];
>    ULONGEST initial_xstate_bv, clear_bv, xstate_bv = 0;
>    unsigned int i;
> +  /* See the comment in i387_supply_xsave().  */
> +  unsigned int zmm_endlo_regnum = I387_ZMM0H_REGNUM (tdep)
> +  + std::min (tdep->num_zmm_regs, 16);
>    enum
>      {
>        x87_ctrl_or_mxcsr = 0x1,
> @@ -1441,9 +1452,8 @@ i387_collect_xsave (const struct regcach
>       i < I387_MPXEND_REGNUM (tdep); i++)
>    memset (XSAVE_MPX_ADDR (tdep, regs, i), 0, 8);
>  
> -      if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
> - for (i = I387_ZMM0H_REGNUM (tdep);
> -     i < I387_ZMMENDH_REGNUM (tdep); i++)
> +      if ((clear_bv & X86_XSTATE_ZMM_H))
> + for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
>    memset (XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i), 0, 32);
>  
>        if ((clear_bv & X86_XSTATE_K))
> @@ -1453,6 +1463,8 @@ i387_collect_xsave (const struct regcach
>  
>        if ((clear_bv & X86_XSTATE_ZMM))
>   {
> +  for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
> +    memset (XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i), 0, 32);
>    for (i = I387_YMM16H_REGNUM (tdep);
>         i < I387_YMMH_AVX512_END_REGNUM (tdep); i++)
>      memset (XSAVE_YMM_AVX512_ADDR (tdep, regs, i), 0, 16);
> --- a/gdb/testsuite/gdb.arch/i386-avx512.c
> +++ b/gdb/testsuite/gdb.arch/i386-avx512.c
> @@ -249,6 +249,13 @@ main (int argc, char **argv)
>   move back to array and check values.  */
>        move_zmm_data_to_memory ();
>        asm ("nop"); /* sixth breakpoint here  */
> +
> +      asm ("vpternlogd $0xff, %zmm0, %zmm0, %zmm0");
> +#ifdef __x86_64__
> +      asm ("vpternlogd $0xff, %zmm0, %zmm0, %zmm16");
> +#endif
> +      asm ("vzeroupper");
> +      asm ("nop"); /* seventh breakpoint here  */
>      }
>  
>    return 0;
> --- a/gdb/testsuite/gdb.arch/i386-avx512.exp
> +++ b/gdb/testsuite/gdb.arch/i386-avx512.exp
> @@ -174,3 +174,13 @@ for { set r 0 } { $r < $nr_regs } { incr
>          ".. = \\{f = \\{[expr $r + 30], [expr $r.125 + 30], [expr $r.25 +
> 20], [expr $r.375 + 20], [expr $r.5 + 10], [expr $r.625 + 10], [expr $r.75 +
> 10], [expr $r.875 + 10]\\}\\}.*" \
>          "check contents of zmm_data\[$r\] after writing XMM regs"
>  }
> +
> +gdb_test "break [gdb_get_line_number "seventh breakpoint here"]" \
> +    "Breakpoint .* at .*i386-avx512.c.*" \
> +    "set seventh breakpoint in main"
> +gdb_continue_to_breakpoint "continue to seventh breakpoint in main"
> +gdb_test "print \$zmm0.v16_int32" "= {-1, -1, -1, -1, 0 <repeats 12
> times>}"
> +
> +if { $nr_regs >= 16 } {
> +    gdb_test "print \$zmm16.v16_int32" "= {-1 <repeats 16 times>}"
> +}
>
>
>
>



Reply | Threaded
Open this post in threaded view
|

Re: Ping: [PATCH v2] x86-64: fix ZMM register state tracking

Simon Marchi
On 2018-10-29 06:31, Jan Beulich wrote:

>>>> On 10.10.18 at 17:12,  wrote:
>> The three AVX512 state components are entirely independent - one being
>> in its "init state" has no implication whatsoever on either of the
>> other
>> two. Fully separate X86_XSTATE_ZMM_H and X86_XSTATE_ZMM handling, to
>> prevent upper halves of the upper 16 ZMM registers to display as if
>> they
>> were zero (when they aren't) after e.g. VZEROALL/VZEROUPPER.
>>
>> gdb/
>> 2018-10-10  Jan Beulich  <[hidden email]>
>>
>> * i387-tdep.c (i387_supply_xsave): Split handling of
>> X86_XSTATE_ZMM_H and X86_XSTATE_ZMM.
>> (i387_collect_xsave): Likewise.
>>
>> gdb/testsuite/
>> 2018-10-10  Simon Marchi <[hidden email]>
>>
>> * testsuite/gdb.arch/i386-avx512.c,
>> testsuite/gdb.arch/i386-avx512.exp: Add 7th test.
>>
>> ---
>> v2: Attach comments to zmm_endlo_regnum declarations. Add testcase
>>     provided by Simon.

The testcase obviously LGTM.  I will let Markus approve the other
changes.

Simon
Reply | Threaded
Open this post in threaded view
|

RE: Ping: [PATCH v2] x86-64: fix ZMM register state tracking

Metzger, Markus T
> On 2018-10-29 06:31, Jan Beulich wrote:
> >>>> On 10.10.18 at 17:12,  wrote:
> >> The three AVX512 state components are entirely independent - one
> >> being in its "init state" has no implication whatsoever on either of
> >> the other two. Fully separate X86_XSTATE_ZMM_H and X86_XSTATE_ZMM
> >> handling, to prevent upper halves of the upper 16 ZMM registers to
> >> display as if they were zero (when they aren't) after e.g.
> >> VZEROALL/VZEROUPPER.
> >>
> >> gdb/
> >> 2018-10-10  Jan Beulich  <[hidden email]>
> >>
> >> * i387-tdep.c (i387_supply_xsave): Split handling of
> >> X86_XSTATE_ZMM_H and X86_XSTATE_ZMM.
> >> (i387_collect_xsave): Likewise.
> >>
> >> gdb/testsuite/
> >> 2018-10-10  Simon Marchi <[hidden email]>
> >>
> >> * testsuite/gdb.arch/i386-avx512.c,
> >> testsuite/gdb.arch/i386-avx512.exp: Add 7th test.
> >>
> >> ---
> >> v2: Attach comments to zmm_endlo_regnum declarations. Add testcase
> >>     provided by Simon.
>
> The testcase obviously LGTM.  I will let Markus approve the other changes.

The code already looked good to me in v1.  Thanks for adding comments.

Markus.
Intel Deutschland GmbH
Registered Address: Am Campeon 10-12, 85579 Neubiberg, Germany
Tel: +49 89 99 8853-0, www.intel.de
Managing Directors: Christin Eisenschmid, Christian Lamprechter
Chairperson of the Supervisory Board: Nicole Lau
Registered Office: Munich
Commercial Register: Amtsgericht Muenchen HRB 186928

Reply | Threaded
Open this post in threaded view
|

RE: Ping: [PATCH v2] x86-64: fix ZMM register state tracking

Jan Beulich-2
>>> On 07.11.18 at 10:07, <[hidden email]> wrote:
>>  On 2018-10-29 06:31, Jan Beulich wrote:
>> >>>> On 10.10.18 at 17:12,  wrote:
>> >> The three AVX512 state components are entirely independent - one
>> >> being in its "init state" has no implication whatsoever on either of
>> >> the other two. Fully separate X86_XSTATE_ZMM_H and X86_XSTATE_ZMM
>> >> handling, to prevent upper halves of the upper 16 ZMM registers to
>> >> display as if they were zero (when they aren't) after e.g.
>> >> VZEROALL/VZEROUPPER.
>> >>
>> >> gdb/
>> >> 2018-10-10  Jan Beulich  <[hidden email]>
>> >>
>> >> * i387-tdep.c (i387_supply_xsave): Split handling of
>> >> X86_XSTATE_ZMM_H and X86_XSTATE_ZMM.
>> >> (i387_collect_xsave): Likewise.
>> >>
>> >> gdb/testsuite/
>> >> 2018-10-10  Simon Marchi <[hidden email]>
>> >>
>> >> * testsuite/gdb.arch/i386-avx512.c,
>> >> testsuite/gdb.arch/i386-avx512.exp: Add 7th test.
>> >>
>> >> ---
>> >> v2: Attach comments to zmm_endlo_regnum declarations. Add testcase
>> >>     provided by Simon.
>>
>> The testcase obviously LGTM.  I will let Markus approve the other changes.
>
> The code already looked good to me in v1.  Thanks for adding comments.

So can I translate this into an ack for me to commit the change?
Or else, who would be the one to give the go-ahead?

Jan


Reply | Threaded
Open this post in threaded view
|

RE: Ping: [PATCH v2] x86-64: fix ZMM register state tracking

Metzger, Markus T
> >>> On 07.11.18 at 10:07, <[hidden email]> wrote:
> >>  On 2018-10-29 06:31, Jan Beulich wrote:
> >> >>>> On 10.10.18 at 17:12,  wrote:
> >> >> The three AVX512 state components are entirely independent - one
> >> >> being in its "init state" has no implication whatsoever on either
> >> >> of the other two. Fully separate X86_XSTATE_ZMM_H and
> >> >> X86_XSTATE_ZMM handling, to prevent upper halves of the upper 16
> >> >> ZMM registers to display as if they were zero (when they aren't) after e.g.
> >> >> VZEROALL/VZEROUPPER.
> >> >>
> >> >> gdb/
> >> >> 2018-10-10  Jan Beulich  <[hidden email]>
> >> >>
> >> >> * i387-tdep.c (i387_supply_xsave): Split handling of
> >> >> X86_XSTATE_ZMM_H and X86_XSTATE_ZMM.
> >> >> (i387_collect_xsave): Likewise.
> >> >>
> >> >> gdb/testsuite/
> >> >> 2018-10-10  Simon Marchi <[hidden email]>
> >> >>
> >> >> * testsuite/gdb.arch/i386-avx512.c,
> >> >> testsuite/gdb.arch/i386-avx512.exp: Add 7th test.
> >> >>
> >> >> ---
> >> >> v2: Attach comments to zmm_endlo_regnum declarations. Add testcase
> >> >>     provided by Simon.
> >>
> >> The testcase obviously LGTM.  I will let Markus approve the other changes.
> >
> > The code already looked good to me in v1.  Thanks for adding comments.
>
> So can I translate this into an ack for me to commit the change?
> Or else, who would be the one to give the go-ahead?

Simon can approve your patch.

IIRC Pedro had a question regarding gdbserver.  From a first look, it seems to get the
feature bits right but does not distinguish 32-bit and 64-bit mode regarding the number
of available registers.

Did you run the new test you added also in remote configuration?

Markus.

Intel Deutschland GmbH
Registered Address: Am Campeon 10-12, 85579 Neubiberg, Germany
Tel: +49 89 99 8853-0, www.intel.de
Managing Directors: Christin Eisenschmid, Christian Lamprechter
Chairperson of the Supervisory Board: Nicole Lau
Registered Office: Munich
Commercial Register: Amtsgericht Muenchen HRB 186928

12