Re: [Patch] multibyte encodings in strings

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

Re: [Patch] multibyte encodings in strings

m4tze
bump

Should there be a problem with the patch format or the way of submission please let me know.

Mathias

> Dear binutils team,
>
> I am sending you a patch fixing an issue in the binary 'strings'.
> The issue concerned finding multibyte encoded strings at odd offsets.
>
> To test this issue try the following line
>
>     echo "aa53007400720069006e0067003100aa53007400720069006e0067003200bb" | xxd -p -r | strings -el
>
> Only the second string will be found.
>
> The attached patch fixes this problem by doing only a single byte step if an
> invalid character has been found.
>
> Do not hesitate to contact me in case of questions or feedback.
>
> Happy to contribute,
>
> Mathias
>
> diff --git a/binutils/strings.c b/binutils/strings.c
> index 74545dbbdc..5bd31213c0 100644
> --- a/binutils/strings.c
> +++ b/binutils/strings.c
> @@ -540,9 +540,41 @@ print_strings (const char *filename, FILE *stream, file_ptr address,
>           free (buf);
>           return;
>         }
> -     if (! STRING_ISGRAPHIC (c))
> -       /* Found a non-graphic.  Try again starting with next char.  */
> +     if (! STRING_ISGRAPHIC (c)){
> +       /* Found a non-graphic. Try again starting with next char. */
> +        if (encoding_bytes > 1){
> +          /* In case of multibyte encodings rewind using magic buffer. */
> +          if (magiccount == 0){
> +            /* If no magic buffer exists: use memory of c */
> +            switch (encoding)
> +            {
> +              default:
> +                break;
> +              case 'b':
> +                c = c & 0xff;
> +                magiccount += 1;
> +                break;
> +              case 'l':
> +              case 'L':
> +                c = c >> 8;
> +                magiccount += (encoding_bytes -1);
> +                break;
> +              case 'B':
> +                c = (( c & 0xff0000) >> 16) | ( c & 0xff00)
> +                    | (( c & 0xff) << 16);
> +                magiccount += 3;
> +                break;
> +            }
> +            magic = (char *)&c;
> +          }
> +          else {
> +             /* If magic buffer exists: rewind. */
> +             magic = magic - (encoding_bytes -1);
> +          }
> +          address = address - (encoding_bytes -1);
> +        }
>         goto tryline;
> +      }
>       buf[i] = c;
>     }
>  
> @@ -621,8 +653,40 @@ print_strings (const char *filename, FILE *stream, file_ptr address,
>       c = get_char (stream, &address, &magiccount, &magic);
>       if (c == EOF)
>         break;
> -     if (! STRING_ISGRAPHIC (c))
> +     if (! STRING_ISGRAPHIC (c)){
> +        if (encoding_bytes > 1){
> +        /*In case of multibyte encodings rewind using magic buffer. */
> +          if (magiccount == 0){
> +            /* If no magic buffer exists: use memory of c */
> +            switch (encoding)
> +            {
> +              default:
> +                break;
> +              case 'b':
> +                c = c & 0xff;
> +                magiccount += 1;
> +                break;
> +              case 'l':
> +              case 'L':
> +                c = c >> 8;
> +                magiccount += (encoding_bytes -1);
> +                break;
> +              case 'B':
> +                c = (( c & 0xff0000) >> 16) | ( c & 0xff00)
> +                    | (( c & 0xff) << 16);
> +                magiccount += 3;
> +                break;
> +            }
> +            magic = (char *)&c;
> +          }
> +          else {
> +             /* If magic buffer exists: rewind. */
> +             magic = magic - (encoding_bytes -1);
> +          }
> +          address = address - (encoding_bytes -1);
> +        }
>         break;
> +      }
>       putchar (c);
>     }