[PATCH] Use malloca instead alloca

classic Classic list List threaded Threaded
37 messages Options
12
Reply | Threaded
Open this post in threaded view
|

[PATCH] Use malloca instead alloca

Ondřej Bílka
As I described in http://sourceware.org/ml/libc-alpha/2012-12/msg00459.html 
I looked into using malloca.

It has two advantages, first is to avoid allocation large arrays into
stack, second is simplifying code by reducing number of #ifdef's

Initialy I examined gnulib one but it uses hash tables and is hard to
inline so I wrote faster variant.

As example I added used it in argp/argp-help.c .
I did not do more general alloca detection that glibc uses.

Also it could be possible GNU extension for 2.18?

---
 argp/argp-help.c |   23 ++++--------
 malloc/malloca.h |  102 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+), 16 deletions(-)
 create mode 100644 malloc/malloca.h

diff --git a/argp/argp-help.c b/argp/argp-help.c
index 80c2b7d..f0f74b4 100644
--- a/argp/argp-help.c
+++ b/argp/argp-help.c
@@ -25,20 +25,7 @@
 #include <config.h>
 #endif
 
-/* AIX requires this to be the first thing in the file.  */
-#ifndef __GNUC__
-# if HAVE_ALLOCA_H || defined _LIBC
-#  include <alloca.h>
-# else
-#  ifdef _AIX
-#pragma alloca
-#  else
-#   ifndef alloca /* predefined by HP cc +Olibcalls */
-char *alloca ();
-#   endif
-#  endif
-# endif
-#endif
+#include <malloc/malloca.h>
 
 #include <stdbool.h>
 #include <stddef.h>
@@ -1310,11 +1297,12 @@ usage_long_opt (const struct argp_option *opt,
 static void
 hol_usage (struct hol *hol, argp_fmtstream_t stream)
 {
+  char *short_no_arg_opts = NULL;
   if (hol->num_entries > 0)
     {
       unsigned nentries;
       struct hol_entry *entry;
-      char *short_no_arg_opts = alloca (strlen (hol->short_options) + 1);
+      short_no_arg_opts = malloca (strlen (hol->short_options) + 1);
       char *snao_end = short_no_arg_opts;
 
       /* First we put a list of short options without arguments.  */
@@ -1343,6 +1331,7 @@ hol_usage (struct hol *hol, argp_fmtstream_t stream)
  hol_entry_long_iterate (entry, usage_long_opt,
  entry->argp->argp_domain, stream);
     }
+  freea(short_no_arg_opts);
 }
 
 /* Make a HOL containing all levels of options in ARGP.  CLUSTER is the
@@ -1547,6 +1536,7 @@ static void
 _help (const struct argp *argp, const struct argp_state *state, FILE *stream,
        unsigned flags, char *name)
 {
+  char *pattern_levels = NULL;
   int anything = 0; /* Whether we've output anything.  */
   struct hol *hol = 0;
   argp_fmtstream_t fs;
@@ -1585,7 +1575,7 @@ _help (const struct argp *argp, const struct argp_state *state, FILE *stream,
     {
       int first_pattern = 1, more_patterns;
       size_t num_pattern_levels = argp_args_levels (argp);
-      char *pattern_levels = alloca (num_pattern_levels);
+      pattern_levels = malloca (num_pattern_levels);
 
       memset (pattern_levels, 0, num_pattern_levels);
 
@@ -1680,6 +1670,7 @@ Try `%s --help' or `%s --usage' for more information.\n"),
   if (hol)
     hol_free (hol);
 
+  freea (pattern_levels);
   __argp_fmtstream_free (fs);
 }
 
diff --git a/malloc/malloca.h b/malloc/malloca.h
new file mode 100644
index 0000000..fbae6e8
--- /dev/null
+++ b/malloc/malloca.h
@@ -0,0 +1,102 @@
+/* Safe automatic memory allocation.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MALLOCA_H
+#define _MALLOCA_H
+
+#include <alloca.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define ALLOCA_MC 0x2439a2431bca4812L
+#define MALLOC_MC 0x1bca48122439a243L
+
+/* AIX requires this to be the first thing in the file.  */
+#ifndef __GNUC__
+# if HAVE_ALLOCA_H || defined _LIBC
+#  include <alloca.h>
+# else
+#  ifdef _AIX
+#pragma alloca
+#  else
+#   ifndef alloca /* predefined by HP cc +Olibcalls */
+char *alloca ();
+#   endif
+#  endif
+# endif
+#endif
+
+
+  /* malloca(N) is a safe variant of alloca(N).  It allocates N bytes of
+     memory allocated on the stack or heap for large requests.
+     It must be freed using freea() before
+     the function returns.  Upon failure, it returns NULL.  */
+#if 1
+#define malloca(n) ({\
+  void *__r__ = NULL;\
+  if (n < 4096 - 8)\
+    {\
+      __r__ = alloca (n + sizeof (uint64_t));\
+      if (__r__)\
+        {\
+          *((uint64_t *)__r__)=ALLOCA_MC;\
+          __r__ += sizeof (uint64_t);\
+        }\
+    }\
+  if (!__r__)\
+    {\
+      __r__ = malloc (n + sizeof (uint64_t));\
+      if (__r__)\
+        {\
+          *((uint64_t *)__r__)=MALLOC_MC;\
+          __r__ += sizeof (uint64_t);\
+        }\
+    }\
+  __r__;\
+})
+
+#define freea(r) {\
+void *__r__ = r;\
+if (__r__)\
+  {\
+    __r__ -= sizeof (uint64_t);\
+    if  (*((uint64_t *)__r__) == MALLOC_MC)\
+      free (__r__);\
+    else if (*((uint64_t *)__r__) != ALLOCA_MC) abort_freea();\
+  }\
+}
+
+static void abort_freea()
+{
+  fprintf(stderr, "double freea or corruption\n");
+  abort();
+}
+#else
+#define malloca malloc
+#define freea   free
+#endif
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MALLOCA_H */
--
1.7.4.4

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Paul Eggert
On 12/28/2012 05:29 AM, Ondřej Bílka wrote:
> Initialy I examined gnulib one but it uses hash tables and is hard to
> inline so I wrote faster variant.

Where is the faster variant?  Should/could it be merged into gnulib?
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Ondřej Bílka
On Fri, Dec 28, 2012 at 09:08:39AM -0800, Paul Eggert wrote:
> On 12/28/2012 05:29 AM, Ondřej Bílka wrote:
> >Initialy I examined gnulib one but it uses hash tables and is hard to
> >inline so I wrote faster variant.
>
> Where is the faster variant?

Below. I fixed few details since first submission to libc-alpha.

> Should/could it be merged into gnulib?

Should. I am not expert on portability issues so improving portability
would be appreciated.


/* Safe automatic memory allocation.
   Copyright (C) 2012 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, see <http://www.gnu.org/licenses/>.  */

#ifndef _MALLOCA_H
#define _MALLOCA_H

#include <alloca.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif



#define _ALLOCA_MC 0x2439a2431bca4812L
#define _MALLOC_MC 0x1bca48122439a243L

/* AIX requires this to be the first thing in the file.  */
#ifndef __GNUC__
# if HAVE_ALLOCA_H || defined _LIBC
#  include <alloca.h>
# else
#  ifdef _AIX
#pragma alloca
#  else
#   ifndef alloca /* predefined by HP cc +Olibcalls */
char *alloca ();
#   endif
#  endif
# endif
#endif

  /* malloca(N) is a safe variant of alloca(N).  It allocates N bytes of
     memory allocated on the stack or heap for large requests.
     It must be freed using freea() before
     the function returns.  Upon failure, it returns NULL.  */

#if 1
#define malloca(n) ({\
  void *__r__ = NULL;\
  if (n < 4096 - 8)\
    {\
      __r__ = alloca (n + sizeof (uint64_t));\
      if (__r__)\
        {\
          *((uint64_t *)__r__) = _ALLOCA_MC;\
          __r__ += sizeof (uint64_t);\
        }\
    }\
  if (!__r__)\
    {\
      __r__ = malloc (n + sizeof (uint64_t));\
      if (__r__)\
        {\
          *((uint64_t *)__r__) = _MALLOC_MC;\
          __r__ += sizeof (uint64_t);\
        }\
    }\
  __r__;\
})


/* If desired we could detect more corruption by
   adding constant to end of alloca'd array.*/

#define freea(r) {\
void *__r__ = r;\
if (__r__)\
  {\
    __r__ -= sizeof (uint64_t);\
    if  (    *((uint64_t *)__r__) == _MALLOC_MC)\
      free (__r__);\
    else if (*((uint64_t *)__r__) != _ALLOCA_MC)\
      __abort_freea();\
  }\
}

static void __abort_freea()
{
  fprintf(stderr, "double freea or corruption\n");
  abort();
}
#else
#define malloca malloc
#define freea   free
#endif
#ifdef __cplusplus
}
#endif

#endif /* _MALLOCA_H */
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Rich Felker
On Fri, Dec 28, 2012 at 06:38:23PM +0100, Ondřej Bílka wrote:
>   /* malloca(N) is a safe variant of alloca(N).  It allocates N bytes of
>      memory allocated on the stack or heap for large requests.
>      It must be freed using freea() before
>      the function returns.  Upon failure, it returns NULL.  */
>
> #if 1
> #define malloca(n) ({\
>   void *__r__ = NULL;\
>   if (n < 4096 - 8)\

This comparison is performed without promoting n to size_t. Although
in most correct usages it should not matter, I think this should be
fixed; things like malloca(-1) should fail (or allocate 4gb-1
successfully) rather than succeeding and then causing memory
corruption. Note that n is also being evaluated more than once, so
just storing it in a variable of type size_t first would avoid this
issue too.

Rich
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Ondřej Bílka
On Fri, Dec 28, 2012 at 03:17:30PM -0500, Rich Felker wrote:

> On Fri, Dec 28, 2012 at 06:38:23PM +0100, Ondřej Bílka wrote:
> >   /* malloca(N) is a safe variant of alloca(N).  It allocates N bytes of
> >      memory allocated on the stack or heap for large requests.
> >      It must be freed using freea() before
> >      the function returns.  Upon failure, it returns NULL.  */
> >
> > #if 1
> > #define malloca(n) ({\
> >   void *__r__ = NULL;\
> >   if (n < 4096 - 8)\
>
> This comparison is performed without promoting n to size_t. Although
> in most correct usages it should not matter, I think this should be
> fixed; things like malloca(-1) should fail (or allocate 4gb-1
> successfully) rather than succeeding and then causing memory
> corruption. Note that n is also being evaluated more than once, so
> just storing it in a variable of type size_t first would avoid this
> issue too.
>
> Rich

Fixed, here is updated version.


/* Safe automatic memory allocation.
   Copyright (C) 2012 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, see <http://www.gnu.org/licenses/>.  */

#ifndef _MALLOCA_H
#define _MALLOCA_H

/* AIX requires this to be the first thing in the file.  */
#ifndef __GNUC__
# if HAVE_ALLOCA_H || defined _LIBC
#  include <alloca.h>
# else
#  ifdef _AIX
#pragma alloca
#  else
#   ifndef alloca /* predefined by HP cc +Olibcalls */
char *alloca ();
#   endif
#  endif
# endif
#endif

#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

#define _ALLOCA_MC 0x2439a2431bca4812L
#define _MALLOC_MC 0x1bca48122439a243L


  /* malloca(N) is a safe variant of alloca(N).  It allocates N bytes of
     memory allocated on the stack or heap for large requests.
     It must be freed using freea() before
     the function returns.  Upon failure, it returns NULL.  */

#if 1
#define malloca(n) ({\
  size_t  __n__ = n;\
  void  * __r__ = NULL;\
  if (__n__ <= 4096)\
    {\
      __r__ = alloca (__n__ + sizeof (uint64_t));\
      if (__r__)\
        {\
          *((uint64_t *)__r__) = _ALLOCA_MC;\
          __r__ += sizeof (uint64_t);\
        }\
    }\
  if (!__r__)\
    {\
      __r__ = malloc (__n__ + sizeof (uint64_t));\
      if (__r__)\
        {\
          *((uint64_t *)__r__) = _MALLOC_MC;\
          __r__ += sizeof (uint64_t);\
        }\
    }\
  __r__;\
})

/* If desired we could detect more corruption by
   adding constant to end of alloca'd array. */

#define freea(r) {\
void *__r__ = r;\
if (__r__)\
  {\
    __r__ -= sizeof (uint64_t);\
    if  (    *((uint64_t *)__r__) == _MALLOC_MC)\
      free (__r__);\
    else if (*((uint64_t *)__r__) != _ALLOCA_MC)\
      __abort_freea();\
  }\
}

static void __abort_freea()
{
  fprintf(stderr, "double freea or corruption\n");
  abort();
}
#else
#define malloca malloc
#define freea   free
#endif
#ifdef __cplusplus
}
#endif

#endif /* _MALLOCA_H */
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Petr Baudis
  Hi!

  I applaud any effort that absolves us of the current unholy pointer
flags we need to keep track of now.

On Sat, Dec 29, 2012 at 11:33:15AM +0100, Ondřej Bílka wrote:

> /* Safe automatic memory allocation.
>    Copyright (C) 2012 Free Software Foundation, Inc.
>
>    This program is free software; you can redistribute it and/or modify
>    it under the terms of the GNU General Public License as published by
>    the Free Software Foundation; either version 2, or (at your option)
>    any later version.
>
>    This program is distributed in the hope that it will be useful,
>    but WITHOUT ANY WARRANTY; without even the implied warranty of
>    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>    GNU General Public License for more details.
>
>    You should have received a copy of the GNU General Public License
>    along with this program; if not, see <http://www.gnu.org/licenses/>.  */
>
> #ifndef _MALLOCA_H
> #define _MALLOCA_H
>
> /* AIX requires this to be the first thing in the file.  */
> #ifndef __GNUC__
> # if HAVE_ALLOCA_H || defined _LIBC
> #  include <alloca.h>
> # else
> #  ifdef _AIX
> #pragma alloca
> #  else
> #   ifndef alloca /* predefined by HP cc +Olibcalls */
> char *alloca ();
> #   endif

Does this matter if we go ahead with using ({...}) just a few lines
below?

> #  endif
> # endif
> #endif
>
> #include <stddef.h>
> #include <stdlib.h>
> #include <stdio.h>
> #include <stdint.h>
>
> #ifdef __cplusplus
> extern "C" {
> #endif
>
> #define _ALLOCA_MC 0x2439a2431bca4812L
> #define _MALLOC_MC 0x1bca48122439a243L

  Does the 'L' have any effect? I'm not sure if it isn't actually doing
any harm on 32-bit platforms... If anything, I'd use 'ULL', but maybe no
suffix should be necessary.

  I assume uint64_t is used to preserve alignment? This question doesn't
appear as trivial to me, as there are some significant considerations
behind MALLOC_ALIGNMENT; malloca() returns less aligned pointers than
malloc() on 64-bit platforms, which maybe matters just for exotic cases
like long double which probably won't be an issue in glibc, but this
should be documented somewhere.

>   /* malloca(N) is a safe variant of alloca(N).  It allocates N bytes of
>      memory allocated on the stack or heap for large requests.
>      It must be freed using freea() before
>      the function returns.  Upon failure, it returns NULL.  */
>
> #if 1

#if 1?

> #define malloca(n) ({\
>   size_t  __n__ = n;\
>   void  * __r__ = NULL;\
>   if (__n__ <= 4096)\
>     {\
>       __r__ = alloca (__n__ + sizeof (uint64_t));\
>       if (__r__)\
>         {\
>           *((uint64_t *)__r__) = _ALLOCA_MC;\
>           __r__ += sizeof (uint64_t);\
>         }\
>     }\
>   if (!__r__)\
>     {\
>       __r__ = malloc (__n__ + sizeof (uint64_t));\
>       if (__r__)\
>         {\
>           *((uint64_t *)__r__) = _MALLOC_MC;\
>           __r__ += sizeof (uint64_t);\
>         }\
>     }\
>   __r__;\
> })

So it is still unsafe to call malloca() in a loop. This is also
something that should be documented. Or can't we do better? I think even
having an alternative 2-parameter call would be worth considering so
that users that need this can use a local variable to keep track of
stack usage, as per include/alloca.h.

> /* If desired we could detect more corruption by
>    adding constant to end of alloca'd array. */
>
> #define freea(r) {\
> void *__r__ = r;\
> if (__r__)\
>   {\
>     __r__ -= sizeof (uint64_t);\
>     if  (    *((uint64_t *)__r__) == _MALLOC_MC)\
>       free (__r__);\
>     else if (*((uint64_t *)__r__) != _ALLOCA_MC)\
>       __abort_freea();\
>   }\
> }

I personally find it much more readable to have " \" instead of "\" at
line ends; maybe GNU style even requires having the \s aligned at col 78.

You really should wrap the freea() block in do { ... } while (0)

                                Petr "Pasky" Baudis
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Andreas Schwab-2
In reply to this post by Rich Felker
Ondřej Bílka <[hidden email]> writes:

> #define malloca(n) ({\
>   size_t  __n__ = n;\

Macro arguments must be properly parenthesised.

Andreas.

--
Andreas Schwab, [hidden email]
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Ondřej Bílka
In reply to this post by Petr Baudis
On Sat, Dec 29, 2012 at 03:29:36PM +0100, Petr Baudis wrote:

>   Hi!
>
>   I applaud any effort that absolves us of the current unholy pointer
> flags we need to keep track of now.
>
> On Sat, Dec 29, 2012 at 11:33:15AM +0100, Ondřej Bílka wrote:
> > /* Safe automatic memory allocation.
> >    Copyright (C) 2012 Free Software Foundation, Inc.
> >
> >    This program is free software; you can redistribute it and/or modify
> >    it under the terms of the GNU General Public License as published by
> >    the Free Software Foundation; either version 2, or (at your option)
> >    any later version.
> >
> >    This program is distributed in the hope that it will be useful,
> >    but WITHOUT ANY WARRANTY; without even the implied warranty of
> >    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> >    GNU General Public License for more details.
> >
> >    You should have received a copy of the GNU General Public License
> >    along with this program; if not, see <http://www.gnu.org/licenses/>.  */
> >
> > #ifndef _MALLOCA_H
> > #define _MALLOCA_H
> >
> > /* AIX requires this to be the first thing in the file.  */
> > #ifndef __GNUC__
> > # if HAVE_ALLOCA_H || defined _LIBC
> > #  include <alloca.h>
> > # else
> > #  ifdef _AIX
> > #pragma alloca
> > #  else
> > #   ifndef alloca /* predefined by HP cc +Olibcalls */
> > char *alloca ();
> > #   endif
>
> Does this matter if we go ahead with using ({...}) just a few lines
> below?

I kept this from argp/argp-help.c and was not sure if its necessary.

>
> > #  endif
> > # endif
> > #endif
> >
> > #include <stddef.h>
> > #include <stdlib.h>
> > #include <stdio.h>
> > #include <stdint.h>
> >
> > #ifdef __cplusplus
> > extern "C" {
> > #endif
> >
> > #define _ALLOCA_MC 0x2439a2431bca4812L
> > #define _MALLOC_MC 0x1bca48122439a243L
>
>   Does the 'L' have any effect? I'm not sure if it isn't actually doing
> any harm on 32-bit platforms... If anything, I'd use 'ULL', but maybe no
> suffix should be necessary.
OK
>
>   I assume uint64_t is used to preserve alignment? This question doesn't
> appear as trivial to me, as there are some significant considerations
> behind MALLOC_ALIGNMENT; malloca() returns less aligned pointers than
> malloc() on 64-bit platforms, which maybe matters just for exotic cases
> like long double which probably won't be an issue in glibc, but this
> should be documented somewhere.
>
I forgot that alloca alignment is 16byte. Remaining bytes could be
pointer to guard at end. I wait with this as it would make code
uglier than already is.

> >   /* malloca(N) is a safe variant of alloca(N).  It allocates N bytes of
> >      memory allocated on the stack or heap for large requests.
> >      It must be freed using freea() before
> >      the function returns.  Upon failure, it returns NULL.  */
> >
> > #if 1
>
> #if 1?
Planned add alloca support detection.

>
> > #define malloca(n) ({\
> >   size_t  __n__ = n;\
> >   void  * __r__ = NULL;\
> >   if (__n__ <= 4096)\
> >     {\
> >       __r__ = alloca (__n__ + sizeof (uint64_t));\
> >       if (__r__)\
> >         {\
> >           *((uint64_t *)__r__) = _ALLOCA_MC;\
> >           __r__ += sizeof (uint64_t);\
> >         }\
> >     }\
> >   if (!__r__)\
> >     {\
> >       __r__ = malloc (__n__ + sizeof (uint64_t));\
> >       if (__r__)\
> >         {\
> >           *((uint64_t *)__r__) = _MALLOC_MC;\
> >           __r__ += sizeof (uint64_t);\
> >         }\
> >     }\
> >   __r__;\
> > })
>
> So it is still unsafe to call malloca() in a loop. This is also
> something that should be documented. Or can't we do better? I think even
> having an alternative 2-parameter call would be worth considering so
> that users that need this can use a local variable to keep track of
> stack usage, as per include/alloca.h.
I do not know how to portably do that.

For platform specific way best solution is to read where bottom of stack is
and allocate only when at least say 32768 bytes are left.

>
> > /* If desired we could detect more corruption by
> >    adding constant to end of alloca'd array. */
> >
> > #define freea(r) {\
> > void *__r__ = r;\
> > if (__r__)\
> >   {\
> >     __r__ -= sizeof (uint64_t);\
> >     if  (    *((uint64_t *)__r__) == _MALLOC_MC)\
> >       free (__r__);\
> >     else if (*((uint64_t *)__r__) != _ALLOCA_MC)\
> >       __abort_freea();\
> >   }\
> > }
>
> You really should wrap the freea() block in do { ... } while (0)
Or possibly as I dont manipulate stack here static inline function.


> I personally find it much more readable to have " \" instead of "\" at
> line ends; maybe GNU style even requires having the \s aligned at col 78.
ok
>
>
> Petr "Pasky" Baudis

--

terrorist activities
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Paul Eggert
In reply to this post by Ondřej Bílka
Thanks for looking into this.  Three comments.

First, as far as correctness goes, one cannot simply replace
{ P = alloca (N); use (*P); } with
{ P = malloca (N); use (*P); freea (P); },
as was done in the examples in
<http://sourceware.org/ml/libc-alpha/2012-12/msg00469.html>.
This is because malloca might return NULL.  Wouldn't you prefer a
different API, where the allocator cannot possibly return NULL
because it reliably aborts if storage is not available?
(This is what gnulib's xmalloca module does.)

Second, if you do want the malloca API, then it seems to me that,
in the typical case, the gnulib implementation should
perform better than the proposed malloca (with _ALLOCA_MC).
The _ALLOCA_MC approach expands to more machine instructions than
the gnulib approach does, which surely hurts instruction caching.
And in the typical case, where alloca is used, the _ALLOCA_MC
version has more work to do (to store _ALLOCA_MC)
than the gnulib code does (no need to store a magic word).
Have you measured the performance of the _ALLOCA_MC approach,
compared to the gnulib approach, on realistic workloads?

Third, the _ALLOCA_MC version mishandles very large sizes, e.g.,
its malloca (SIZE_MAX) goes greatly awry.
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Ondřej Bílka
On Sat, Dec 29, 2012 at 09:38:04AM -0800, Paul Eggert wrote:

> Thanks for looking into this.  Three comments.
>
> First, as far as correctness goes, one cannot simply replace
> { P = alloca (N); use (*P); } with
> { P = malloca (N); use (*P); freea (P); },
> as was done in the examples in
> <http://sourceware.org/ml/libc-alpha/2012-12/msg00469.html>.
> This is because malloca might return NULL.  Wouldn't you prefer a
> different API, where the allocator cannot possibly return NULL
> because it reliably aborts if storage is not available?
> (This is what gnulib's xmalloca module does.)
Here I can as alloca caused segfault on oom condition and null pointer
access has equivalent behaviour.

And on linux it will always succeed and be killed by oom later.

>
> Second, if you do want the malloca API, then it seems to me that,
> in the typical case, the gnulib implementation should
> perform better than the proposed malloca (with _ALLOCA_MC).
> The _ALLOCA_MC approach expands to more machine instructions than
> the gnulib approach does, which surely hurts instruction caching.
> And in the typical case, where alloca is used, the _ALLOCA_MC
> version has more work to do (to store _ALLOCA_MC)
> than the gnulib code does (no need to store a magic word).
> Have you measured the performance of the _ALLOCA_MC approach,
> compared to the gnulib approach, on realistic workloads?
Of course. In this benchmark
http://kam.mff.cuni.cz/~ondra/malloca_benchmark.tar.bz2
with my implementation is 20% faster than gnulib one.
>
> Third, the _ALLOCA_MC version mishandles very large sizes, e.g.,
> its malloca (SIZE_MAX) goes greatly awry.

--

CD-ROM server needs recalibration
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Paul Eggert
On 12/29/2012 11:18 AM, Ondřej Bílka wrote:

> alloca caused segfault on oom condition and null pointer
> access has equivalent behaviour.

alloca doesn't always cause a SEGV on out of memory.
That's part of the problem that we're trying to cure.
If alloca is given too large a number, it might SEGV,
and it might not.  malloca should not have this problem:
it should reliably fail when out of memory.

Unfortunately when out of memory the proposed use of
malloca does not reliably SEGV.  Here's a trivial example:

   size_t n = ... some big number ...;
   char *p = malloca (n);
   strcpy (p + n - 10, "x");
   freea (p);

This might not SEGV when malloca returns NULL, depending
on the architecture; for example, if n happens to be
100000010 and (char *) 100000000 happens to be a valid address.

> And on linux it will always succeed and be killed by oom later.

Sorry, I'm not following, or perhaps I'm misunderstanding you.
malloc does not always succeed on GNU/Linux; it sometimes
returns NULL.  malloc (SIZE_MAX) is a trivial example of this.

> Of course. In this benchmark
> http://kam.mff.cuni.cz/~ondra/malloca_benchmark.tar.bz2
> with my implementation is 20% faster than gnulib one.

First, we need a correct implementation before we can start
doing benchmark comparisons, as fixing the problems will slow
things down, I expect.  It's not just the SEGV problem
mentioned above; it's also the problem with very large
allocation requests that I mentioned earlier.

Second, that benchmark invokes malloca on a constant.  But
actual code rarely does this:

      char *p = alloca (100);

as what would be the point?  It's more portable to do this:

      char buf[100];
      char *p = buf;

and one doesn't need either alloca or malloca in this case.
A more-realistic benchmark would invoke malloca with a
non-constant, as that's how alloca is typically used in
practice.
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Ondřej Bílka
On Sat, Dec 29, 2012 at 06:21:55PM -0800, Paul Eggert wrote:

> On 12/29/2012 11:18 AM, Ondřej Bílka wrote:
>
> > alloca caused segfault on oom condition and null pointer
> > access has equivalent behaviour.
>
> alloca doesn't always cause a SEGV on out of memory.
> That's part of the problem that we're trying to cure.
> If alloca is given too large a number, it might SEGV,
> and it might not.  malloca should not have this problem:
> it should reliably fail when out of memory.
>
> Unfortunately when out of memory the proposed use of
> malloca does not reliably SEGV.  Here's a trivial example:
>
On my example I checked it causes null access. But you are rigth it
cannot be done in general.
>
> > And on linux it will always succeed and be killed by oom later.
>
> Sorry, I'm not following, or perhaps I'm misunderstanding you.
> malloc does not always succeed on GNU/Linux; it sometimes
> returns NULL.  malloc (SIZE_MAX) is a trivial example of this.

It was a example why oom condition cannot be detected reliably.

>
> > with my implementation is 20% faster than gnulib one.
>
> First, we need a correct implementation before we can start
> doing benchmark comparisons, as fixing the problems will slow
> things down, I expect.  It's not just the SEGV problem
> mentioned above; it's also the problem with very large
> allocation requests that I mentioned earlier.
OK
In http://kam.mff.cuni.cz/~ondra/malloca_benchmark.tar.bz2
I changed signal from SEGV to ABRT and sizes. No measurable
performance drop.

>
> Second, that benchmark invokes malloca on a constant.  But
> actual code rarely does this:
>
>       char *p = alloca (100);
>
> as what would be the point?  It's more portable to do this:
>
>       char buf[100];
>       char *p = buf;
>
> and one doesn't need either alloca or malloca in this case.
> A more-realistic benchmark would invoke malloca with a
> non-constant, as that's how alloca is typically used in
> practice.
Now passing size by variable and this does not change performance ratio.

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Ondřej Bílka
In reply to this post by Ondřej Bílka
On Sat, Dec 29, 2012 at 05:23:17PM +0100, Ondřej Bílka wrote:

> On Sat, Dec 29, 2012 at 03:29:36PM +0100, Petr Baudis wrote:
> >
> > So it is still unsafe to call malloca() in a loop. This is also
> > something that should be documented. Or can't we do better? I think even
> > having an alternative 2-parameter call would be worth considering so
> > that users that need this can use a local variable to keep track of
> > stack usage, as per include/alloca.h.
> I do not know how to portably do that.
>
> For platform specific way best solution is to read where bottom of stack is
> and allocate only when at least say 32768 bytes are left.
> >
And when knowing stack boundaries I could also recognize stack pointer
by single comparison.

It needs to define _STACK_TOP,_STACK_CUP, _STACK_SIZE macros for stack parameters.
I can find them by pthread_attr_getstack but this call is slow.

/* Safe automatic memory allocation.
   Copyright (C) 2012 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, see <http://www.gnu.org/licenses/>.  */

#ifndef _MALLOCA_H
#define _MALLOCA_H

#include <alloca.h>
#include <stdlib.h>

#ifdef __cplusplus
extern "C" {
#endif

  /* malloca(N) is a safe variant of alloca(N).  It allocates N bytes of
     memory allocated on the stack while allocation keeps at least 32768
     bytes of stack space and heap otherwise. When memory cannot be
     allocated from heap malloca aborts.
     It must be freed using freea() before the function returns. */

#if _STACK_GROWS_DOWN
#define _ALLOCA_FITS(n) _STACK_CUR - (n) - 32768 > _STACK_TOP
  /* We use & instead && because gcc simplification made by gcc.*/
#define _STACK_PTR(p) ( _STACK_TOP < (p) & (p) < _STACK_TOP + _STACK_SIZE )
#else
#if _STACK_GROWS_UP
#define _ALLOCA_FITS(n) _STACK_CUR + (n) + 32768 < _STACK_TOP
#define _STACK_PTR(p) ( _STACK_TOP - _STACK_SIZE < (p) & (p) < _STACK_TOP )
#else
#define _ALLOCA_FITS(n) 0
#define _STACK_PTR(x)   0
#endif

#define malloca(n) ({                             \
  size_t  __n__ = (n);                            \
  void  * __r__ = NULL;                           \
  if (ALLOCA_FITS (__n__))                        \
    __r__ = alloca (__n__);                       \
  else                                            \
    {                                             \
      __r__ = malloc (__n__);                     \
      if (!__r__ )                                \
            abort();                              \
    }                                             \
  __r__;                                          \
})

static inline freea(void * __r)
{
  if (__r && !_STACK_PTR(__r))
    free (__r);
}

#endif
#ifdef __cplusplus
}
#endif


#endif /* _MALLOCA_H */
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Andreas Schwab-2
Ondřej Bílka <[hidden email]> writes:

> static inline freea(void * __r)
              void

Andreas.

--
Andreas Schwab, [hidden email]
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

KOSAKI Motohiro-2
In reply to this post by Ondřej Bílka
> And on linux it will always succeed and be killed by oom later.

I suspect you forgot /proc/sys/vm/overcommit_memory=2 (i.e. account
always mode).
Moreover, the default mode (/proc/sys/vm/overcommit_memory=1, ie
account guess mode) also may return failure if you try obvious big
size allocation.

thank you.
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

KOSAKI Motohiro-2
In reply to this post by Ondřej Bílka
>> For platform specific way best solution is to read where bottom of stack is
>> and allocate only when at least say 32768 bytes are left.
>> >
> And when knowing stack boundaries I could also recognize stack pointer
> by single comparison.
>
> It needs to define _STACK_TOP,_STACK_CUP, _STACK_SIZE macros for stack parameters.
> I can find them by pthread_attr_getstack but this call is slow.

I think this approach has two issues.

1) On LInux, RLIMIT_STACK doesn't affect to main thread. so there is
no reliable stack limit detection way. Think mmap vs stack expansion
race case for example.
2) setcontext/getcontext family provide to implement userland thread
(it is sometimes called fiber or green thread). so
pthread_attr_getstack is not suitable for getting generic  stack
knowledge.

I suppose original hardcoded 4096 boundary is best.

Thank you.
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Rich Felker
On Sun, Dec 30, 2012 at 12:48:50PM -0500, KOSAKI Motohiro wrote:

> >> For platform specific way best solution is to read where bottom of stack is
> >> and allocate only when at least say 32768 bytes are left.
> >> >
> > And when knowing stack boundaries I could also recognize stack pointer
> > by single comparison.
> >
> > It needs to define _STACK_TOP,_STACK_CUP, _STACK_SIZE macros for stack parameters.
> > I can find them by pthread_attr_getstack but this call is slow.
>
> I think this approach has two issues.
>
> 1) On LInux, RLIMIT_STACK doesn't affect to main thread. so there is
> no reliable stack limit detection way. Think mmap vs stack expansion
> race case for example.

I'm not sure this is correct, or perhaps I don't understand. The
entire virtual memory range covered by RLIMIT_STACK is reserved for
stack growth, but only the first 128k (this number seems to be
hard-coded in the kernel..?) is initially reserved in the
commit-charge sense.

> 2) setcontext/getcontext family provide to implement userland thread
> (it is sometimes called fiber or green thread). so
> pthread_attr_getstack is not suitable for getting generic  stack
> knowledge.

Also, signal handlers can be run on alternate stacks with sigaltstack,
and it's possible to call non-async-signal-safe functions in libc from
such signal handlers as long as you can ensure they did not interrupt
unsafe functions. A trivial way to do this is raise().

I agree it's wrong to try to measure the available stack space. Even
if it could be done reliably, you don't want to consume most of the
remaining stack space with alloca when the function is not a leaf
function, and other functions it calls still might need significant
stack space. It's really just bad policy for any function to use more
than a small, bounded amount of stack space.

Rich
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

KOSAKI Motohiro-2
On Sun, Dec 30, 2012 at 1:45 PM, Rich Felker <[hidden email]> wrote:

> On Sun, Dec 30, 2012 at 12:48:50PM -0500, KOSAKI Motohiro wrote:
>> >> For platform specific way best solution is to read where bottom of stack is
>> >> and allocate only when at least say 32768 bytes are left.
>> >> >
>> > And when knowing stack boundaries I could also recognize stack pointer
>> > by single comparison.
>> >
>> > It needs to define _STACK_TOP,_STACK_CUP, _STACK_SIZE macros for stack parameters.
>> > I can find them by pthread_attr_getstack but this call is slow.
>>
>> I think this approach has two issues.
>>
>> 1) On LInux, RLIMIT_STACK doesn't affect to main thread. so there is
>> no reliable stack limit detection way. Think mmap vs stack expansion
>> race case for example.
>
> I'm not sure this is correct, or perhaps I don't understand. The
> entire virtual memory range covered by RLIMIT_STACK is reserved for
> stack growth, but only the first 128k (this number seems to be
> hard-coded in the kernel..?) is initially reserved in the
> commit-charge sense.

Damn. I wrote incorrect. I'm very sorry. I'd like to try clarify. On
linux main thread,
it doesn't reserve RLIMIT_STACK size virtual memory. but automatically expand
a stack when needed and then makes SEGV if exceed RLIMIT_STACK.

Then, the trouble is happen when virtual memory is very tight. mmap()
may allocate
a virtual memory very near place of ESP and it prevent to expand to
stack. So, stack
access can make SEGV even if stack size < RLIMIT_STACK.

Thank you.
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Rich Felker
On Sun, Dec 30, 2012 at 02:18:11PM -0500, KOSAKI Motohiro wrote:

> On Sun, Dec 30, 2012 at 1:45 PM, Rich Felker <[hidden email]> wrote:
> > On Sun, Dec 30, 2012 at 12:48:50PM -0500, KOSAKI Motohiro wrote:
> >> >> For platform specific way best solution is to read where bottom of stack is
> >> >> and allocate only when at least say 32768 bytes are left.
> >> >> >
> >> > And when knowing stack boundaries I could also recognize stack pointer
> >> > by single comparison.
> >> >
> >> > It needs to define _STACK_TOP,_STACK_CUP, _STACK_SIZE macros for stack parameters.
> >> > I can find them by pthread_attr_getstack but this call is slow.
> >>
> >> I think this approach has two issues.
> >>
> >> 1) On LInux, RLIMIT_STACK doesn't affect to main thread. so there is
> >> no reliable stack limit detection way. Think mmap vs stack expansion
> >> race case for example.
> >
> > I'm not sure this is correct, or perhaps I don't understand. The
> > entire virtual memory range covered by RLIMIT_STACK is reserved for
> > stack growth, but only the first 128k (this number seems to be
> > hard-coded in the kernel..?) is initially reserved in the
> > commit-charge sense.
>
> Damn. I wrote incorrect. I'm very sorry. I'd like to try clarify. On
> linux main thread,
> it doesn't reserve RLIMIT_STACK size virtual memory. but automatically expand
> a stack when needed and then makes SEGV if exceed RLIMIT_STACK.
>
> Then, the trouble is happen when virtual memory is very tight. mmap()
> may allocate
> a virtual memory very near place of ESP and it prevent to expand to
> stack. So, stack
> access can make SEGV even if stack size < RLIMIT_STACK.

Could you provide a citation for this? I'm pretty sure the kernel
ensures this does not happen by never allocating maps in the
RLIMIT_STACK range. On the other hand, I have seen buggy
vendor-patched kernels which would allocate maps in the stack range
even when there is no memory pressure.

Rich
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Use malloca instead alloca

Ondřej Bílka
In reply to this post by Rich Felker
On Sun, Dec 30, 2012 at 01:45:55PM -0500, Rich Felker wrote:

> On Sun, Dec 30, 2012 at 12:48:50PM -0500, KOSAKI Motohiro wrote:
> > >> For platform specific way best solution is to read where bottom of stack is
> > >> and allocate only when at least say 32768 bytes are left.
> > >> >
> > > And when knowing stack boundaries I could also recognize stack pointer
> > > by single comparison.
> > >
> > > It needs to define _STACK_TOP,_STACK_CUP, _STACK_SIZE macros for stack parameters.
> > > I can find them by pthread_attr_getstack but this call is slow.
> >
> > I think this approach has two issues.
> >
> > 1) On LInux, RLIMIT_STACK doesn't affect to main thread. so there is
> > no reliable stack limit detection way. Think mmap vs stack expansion
> > race case for example.
>
> I'm not sure this is correct, or perhaps I don't understand. The
> entire virtual memory range covered by RLIMIT_STACK is reserved for
> stack growth, but only the first 128k (this number seems to be
> hard-coded in the kernel..?) is initially reserved in the
> commit-charge sense.

On Sun, Dec 30, 2012 at 02:18:11PM -0500, KOSAKI Motohiro wrote:

> Damn. I wrote incorrect. I'm very sorry. I'd like to try clarify. On
> linux main thread,
> it doesn't reserve RLIMIT_STACK size virtual memory. but automatically expand
> a stack when needed and then makes SEGV if exceed RLIMIT_STACK.
>
> Then, the trouble is happen when virtual memory is very tight. mmap()
> may allocate
> a virtual memory very near place of ESP and it prevent to expand to
> stack. So, stack
> access can make SEGV even if stack size < RLIMIT_STACK.

As value allocated on stack is value not allocated on heap this would
delay this condition. If 4mb were allocated on stack then with
fragmentation ratio 2 you could allocate additional 2mb until this
happen.
>
> > 2) setcontext/getcontext family provide to implement userland thread
> > (it is sometimes called fiber or green thread). so
> > pthread_attr_getstack is not suitable for getting generic  stack
> > knowledge.
I was writing platform specific which could include adding counters in
nptl/allocatestack.c, pthread_attr_getstack was meant as starting point.

Anyway I realized that this is not needed for detection, it suffices to
compare with %esp and %ebp or analogs on other architectures.

>
> Also, signal handlers can be run on alternate stacks with sigaltstack,
> and it's possible to call non-async-signal-safe functions in libc from
> such signal handlers as long as you can ensure they did not interrupt
> unsafe functions. A trivial way to do this is raise().
>
> I agree it's wrong to try to measure the available stack space. Even
> if it could be done reliably, you don't want to consume most of the
> remaining stack space with alloca when the function is not a leaf
> function, and other functions it calls still might need significant
> stack space. It's really just bad policy for any function to use more
> than a small, bounded amount of stack space.
This is not big problem. We could change bound from size-32768 to size/2.
Or additionaly keep counter of memory used my malloca and
allocate until size/2 is hit.

Anyway original question was how handle malloca in loop, and some counter
is necessary.
>
> Rich

12