[PATCH 0/2] Remove signal blocking from dlopen

classic Classic list List threaded Threaded
10 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH 0/2] Remove signal blocking from dlopen

Florian Weimer-5
These patches are on top of the previously posted patch, “dlopen: Fix
issues related to NODELETE handling and relocations”.

Tested (with the glibc testsuite) on x86_64-linux-gnu, i686-linux-gnu,
powerpc64le-linux-gnu.

Florian Weimer (2):
  dlopen: Rework handling of pending NODELETE status
  dlopen: Do not block signals

 elf/dl-close.c         |  7 +++--
 elf/dl-lookup.c        | 58 ++++++++++++++++++++++++-----------------
 elf/dl-open.c          | 59 +++++++++++++++++-------------------------
 elf/get-dynamic-info.h |  2 +-
 include/link.h         | 31 ++++++++--------------
 5 files changed, 73 insertions(+), 84 deletions(-)

--
2.23.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 1/2] dlopen: Rework handling of pending NODELETE status

Florian Weimer-5
To avoid a read-modify-write cycle on the l_nodelete field, this
commit introduces two flags for active NODELETE status (irrevocable)
and pending NODELETE status (revocable until activate_nodelete) is
invoked.  As a result, NODELETE processing in dlopen does not
introduce further reasons why lazy binding from signal handlers
is unsafe during dlopen, and a subsequent commit can remove signal
blocking from dlopen.
---
 elf/dl-close.c         |  7 +++--
 elf/dl-lookup.c        | 58 +++++++++++++++++++++++++-----------------
 elf/dl-open.c          | 22 +++++++++-------
 elf/get-dynamic-info.h |  2 +-
 include/link.h         | 31 ++++++++--------------
 5 files changed, 62 insertions(+), 58 deletions(-)

diff --git a/elf/dl-close.c b/elf/dl-close.c
index e35a62daf6..df1df6fb29 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -197,7 +197,7 @@ _dl_close_worker (struct link_map *map, bool force)
       /* Check whether this object is still used.  */
       if (l->l_type == lt_loaded
   && l->l_direct_opencount == 0
-  && l->l_nodelete != link_map_nodelete_active
+  && !l->l_nodelete_active
   /* See CONCURRENCY NOTES in cxa_thread_atexit_impl.c to know why
      acquire is sufficient and correct.  */
   && atomic_load_acquire (&l->l_tls_dtor_count) == 0
@@ -279,8 +279,7 @@ _dl_close_worker (struct link_map *map, bool force)
 
       if (!used[i])
  {
-  assert (imap->l_type == lt_loaded
-  && imap->l_nodelete != link_map_nodelete_active);
+  assert (imap->l_type == lt_loaded && !imap->l_nodelete_active);
 
   /* Call its termination function.  Do not do it for
      half-cooked objects.  Temporarily disable exception
@@ -830,7 +829,7 @@ _dl_close (void *_map)
      before we took the lock. There is no way to detect this (see below)
      so we proceed assuming this isn't the case.  First see whether we
      can remove the object at all.  */
-  if (__glibc_unlikely (map->l_nodelete == link_map_nodelete_active))
+  if (__glibc_unlikely (map->l_nodelete_active))
     {
       /* Nope.  Do nothing.  */
       __rtld_lock_unlock_recursive (GL(dl_load_lock));
diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
index 99846918c3..759b45a2c9 100644
--- a/elf/dl-lookup.c
+++ b/elf/dl-lookup.c
@@ -187,6 +187,28 @@ enter_unique_sym (struct unique_sym *table, size_t size,
   table[idx].map = map;
 }
 
+/* Mark MAP as NODELETE according to the lookup mode in FLAGS.  During
+   initial relocation, NODELETE state is pending only.  */
+static void
+mark_nodelete (struct link_map *map, int flags)
+{
+  if (flags & DL_LOOKUP_FOR_RELOCATE)
+    map->l_nodelete_pending = true;
+  else
+    map->l_nodelete_active = true;
+}
+
+/* Return true if MAP is marked as NODELETE according to the lookup
+   mode in FLAGS> */
+static bool
+is_nodelete (struct link_map *map, int flags)
+{
+  /* Non-pending NODELETE always counts.  Pending NODELETE only counts
+     during initial relocation processing.  */
+  return map->l_nodelete_active
+    || ((flags & DL_LOOKUP_FOR_RELOCATE) && map->l_nodelete_pending);
+}
+
 /* Utility function for do_lookup_x. Lookup an STB_GNU_UNIQUE symbol
    in the unique symbol table, creating a new entry if necessary.
    Return the matching symbol in RESULT.  */
@@ -311,8 +333,7 @@ do_lookup_unique (const char *undef_name, uint_fast32_t new_hash,
       enter_unique_sym (entries, size,
                         new_hash, strtab + sym->st_name, sym, map);
 
-      if (map->l_type == lt_loaded
-  && map->l_nodelete == link_map_nodelete_inactive)
+      if (map->l_type == lt_loaded && !is_nodelete (map, flags))
  {
   /* Make sure we don't unload this object by
      setting the appropriate flag.  */
@@ -320,10 +341,7 @@ do_lookup_unique (const char *undef_name, uint_fast32_t new_hash,
     _dl_debug_printf ("\
 marking %s [%lu] as NODELETE due to unique symbol\n",
       map->l_name, map->l_ns);
-  if (flags & DL_LOOKUP_FOR_RELOCATE)
-    map->l_nodelete = link_map_nodelete_pending;
-  else
-    map->l_nodelete = link_map_nodelete_active;
+  mark_nodelete (map, flags);
  }
     }
   ++tab->n_elements;
@@ -586,7 +604,7 @@ add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
      dependencies may pick an dependency which can be dlclose'd, but
      such IFUNC resolvers are undefined anyway.  */
   assert (map->l_type == lt_loaded);
-  if (map->l_nodelete != link_map_nodelete_inactive)
+  if (is_nodelete (map, flags))
     return 0;
 
   struct link_map_reldeps *l_reldeps
@@ -694,17 +712,16 @@ add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
 
       /* Redo the NODELETE check, as when dl_load_lock wasn't held
  yet this could have changed.  */
-      if (map->l_nodelete != link_map_nodelete_inactive)
+      if (is_nodelete (map, flags))
  goto out;
 
       /* If the object with the undefined reference cannot be removed ever
  just make sure the same is true for the object which contains the
  definition.  */
-      if (undef_map->l_type != lt_loaded
-  || (undef_map->l_nodelete != link_map_nodelete_inactive))
+      if (undef_map->l_type != lt_loaded || is_nodelete (map, flags))
  {
   if (__glibc_unlikely (GLRO (dl_debug_mask) & DL_DEBUG_BINDINGS)
-      && map->l_nodelete == link_map_nodelete_inactive)
+      && !is_nodelete (map, flags))
     {
       if (undef_map->l_name[0] == '\0')
  _dl_debug_printf ("\
@@ -716,11 +733,7 @@ marking %s [%lu] as NODELETE due to reference to %s [%lu]\n",
   map->l_name, map->l_ns,
   undef_map->l_name, undef_map->l_ns);
     }
-
-  if (flags & DL_LOOKUP_FOR_RELOCATE)
-    map->l_nodelete = link_map_nodelete_pending;
-  else
-    map->l_nodelete = link_map_nodelete_active;
+  mark_nodelete (map, flags);
   goto out;
  }
 
@@ -746,17 +759,14 @@ marking %s [%lu] as NODELETE due to reference to %s [%lu]\n",
  cannot be unloaded.  This is semantically the correct
  behavior.  */
       if (__glibc_unlikely (GLRO (dl_debug_mask) & DL_DEBUG_BINDINGS)
-  && map->l_nodelete == link_map_nodelete_inactive)
+  && !is_nodelete (map, flags))
  _dl_debug_printf ("\
 marking %s [%lu] as NODELETE due to memory allocation failure\n",
   map->l_name, map->l_ns);
-      if (flags & DL_LOOKUP_FOR_RELOCATE)
- /* In case of non-lazy binding, we could actually
-   report the memory allocation error, but for now, we
-   use the conservative approximation as well. */
- map->l_nodelete = link_map_nodelete_pending;
-      else
- map->l_nodelete = link_map_nodelete_active;
+      /* In case of non-lazy binding, we could actually report
+ the memory allocation error, but for now, we use the
+ conservative approximation as well.  */
+      mark_nodelete (map, flags);
       goto out;
     }
   else
diff --git a/elf/dl-open.c b/elf/dl-open.c
index 56f213323c..c23341be58 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -440,13 +440,17 @@ activate_nodelete (struct link_map *new)
      NODELETE status for objects outside the local scope.  */
   for (struct link_map *l = GL (dl_ns)[new->l_ns]._ns_loaded; l != NULL;
        l = l->l_next)
-    if (l->l_nodelete == link_map_nodelete_pending)
+    if (l->l_nodelete_pending)
       {
  if (__glibc_unlikely (GLRO (dl_debug_mask) & DL_DEBUG_FILES))
   _dl_debug_printf ("activating NODELETE for %s [%lu]\n",
     l->l_name, l->l_ns);
 
- l->l_nodelete = link_map_nodelete_active;
+ l->l_nodelete_active = true;
+
+ /* This is just a debugging aid, to indicate that
+   activate_nodelete has run for this map.  */
+ l->l_nodelete_pending = false;
       }
 }
 
@@ -549,10 +553,10 @@ dl_open_worker (void *a)
       if (__glibc_unlikely (mode & RTLD_NODELETE))
  {
   if (__glibc_unlikely (GLRO (dl_debug_mask) & DL_DEBUG_FILES)
-      && new->l_nodelete == link_map_nodelete_inactive)
+      && !new->l_nodelete_active)
     _dl_debug_printf ("marking %s [%lu] as NODELETE\n",
       new->l_name, new->l_ns);
-  new->l_nodelete = link_map_nodelete_active;
+  new->l_nodelete_active = true;
  }
 
       /* Finalize the addition to the global scope.  */
@@ -568,7 +572,7 @@ dl_open_worker (void *a)
   /* Schedule NODELETE marking for the directly loaded object if
      requested.  */
   if (__glibc_unlikely (mode & RTLD_NODELETE))
-    new->l_nodelete = link_map_nodelete_pending;
+    new->l_nodelete_pending = true;
 
   /* Load that object's dependencies.  */
   _dl_map_object_deps (new, NULL, 0, 0,
@@ -683,7 +687,7 @@ dl_open_worker (void *a)
       _dl_start_profile ();
 
       /* Prevent unloading the object.  */
-      GL(dl_profile_map)->l_nodelete = link_map_nodelete_active;
+      GL(dl_profile_map)->l_nodelete_active = true;
     }
  }
       else
@@ -882,9 +886,9 @@ no more namespaces available for dlmopen()"));
      happens inside dl_open_worker.  */
   __libc_signal_restore_set (&args.original_signal_mask);
 
-  /* All link_map_nodelete_pending objects should have been
-     deleted at this point, which is why it is not necessary
-     to reset the flag here.  */
+  /* All l_nodelete_pending objects should have been deleted
+     at this point, which is why it is not necessary to reset
+     the flag here.  */
  }
       else
  __libc_signal_restore_set (&args.original_signal_mask);
diff --git a/elf/get-dynamic-info.h b/elf/get-dynamic-info.h
index 075683d729..ea4e304bef 100644
--- a/elf/get-dynamic-info.h
+++ b/elf/get-dynamic-info.h
@@ -164,7 +164,7 @@ elf_get_dynamic_info (struct link_map *l, ElfW(Dyn) *temp)
     {
       l->l_flags_1 = info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val;
       if (l->l_flags_1 & DF_1_NODELETE)
- l->l_nodelete = link_map_nodelete_pending;
+ l->l_nodelete_pending = true;
 
       /* Only DT_1_SUPPORTED_MASK bits are supported, and we would like
  to assert this, but we can't. Users have been setting
diff --git a/include/link.h b/include/link.h
index 2e771e433a..2acc836380 100644
--- a/include/link.h
+++ b/include/link.h
@@ -79,22 +79,6 @@ struct r_search_path_struct
     int malloced;
   };
 
-/* Type used by the l_nodelete member.  */
-enum link_map_nodelete
-{
- /* This link map can be deallocated.  */
- link_map_nodelete_inactive = 0, /* Zero-initialized in _dl_new_object.  */
-
- /* This link map cannot be deallocated.  */
- link_map_nodelete_active,
-
- /* This link map cannot be deallocated after dlopen has succeded.
-    dlopen turns this into link_map_nodelete_active.  dlclose treats
-    this intermediate state as link_map_nodelete_active.  */
- link_map_nodelete_pending,
-};
-
-
 /* Structure describing a loaded shared object.  The `l_next' and `l_prev'
    members form a chain of all the shared objects loaded at startup.
 
@@ -218,10 +202,17 @@ struct link_map
        freed, ie. not allocated with
        the dummy malloc in ld.so.  */
 
-    /* Actually of type enum link_map_nodelete.  Separate byte due to
-       a read in add_dependency in elf/dl-lookup.c outside the loader
-       lock.  Only valid for l_type == lt_loaded.  */
-    unsigned char l_nodelete;
+    /* NODELETE status of the map.  Only valid for maps of type
+       lt_loaded.  Lazy binding sets l_nodelete_active directly,
+       potentially from signal handlers.  Initial loading of an
+       DF_1_NODELETE object set l_nodelete_pending.  Relocation may
+       set l_nodelete_pending as well.  l_nodelete_pending maps are
+       promoted to l_nodelete_active status in the final stages of
+       dlopen, prior to calling ELF constructors.  dlclose only
+       refuses to unload l_nodelete_active maps, the pending status is
+       ignored.  */
+    bool l_nodelete_active;
+    bool l_nodelete_pending;
 
 #include <link_map.h>
 
--
2.23.0


Reply | Threaded
Open this post in threaded view
|

[PATCH 2/2] dlopen: Do not block signals

Florian Weimer-5
In reply to this post by Florian Weimer-5
Blocking signals causes issues with certain anti-malware solutions
which rely on an unblocked SIGSYS signal for system calls they
intercept.

This reverts commit a2e8aa0d9ea648068d8be52dd7b15f1b6a008e23
("Block signals during the initial part of dlopen") and adds
comments related to async signal safety to active_nodelete and
its caller.

Note that this does not make lazy binding async-signal-safe with regards
to dlopen.  It merely avoids introducing new async-signal-safety hazards
as part of the NODELETE changes.
---
 elf/dl-open.c | 37 +++++++++++--------------------------
 1 file changed, 11 insertions(+), 26 deletions(-)

diff --git a/elf/dl-open.c b/elf/dl-open.c
index c23341be58..5a1c5b5326 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -34,7 +34,6 @@
 #include <atomic.h>
 #include <libc-internal.h>
 #include <array_length.h>
-#include <internal-signals.h>
 
 #include <dl-dst.h>
 #include <dl-prop.h>
@@ -53,10 +52,6 @@ struct dl_open_args
   /* Namespace ID.  */
   Lmid_t nsid;
 
-  /* Original signal mask.  Used for unblocking signal handlers before
-     running ELF constructors.  */
-  sigset_t original_signal_mask;
-
   /* Original value of _ns_global_scope_pending_adds.  Set by
      dl_open_worker.  Only valid if nsid is a real namespace
      (non-negative).  */
@@ -446,6 +441,9 @@ activate_nodelete (struct link_map *new)
   _dl_debug_printf ("activating NODELETE for %s [%lu]\n",
     l->l_name, l->l_ns);
 
+ /* The flag can already be true at this point, e.g. a signal
+   handler may have triggered lazy binding and set NODELETE
+   status immediately.  */
  l->l_nodelete_active = true;
 
  /* This is just a debugging aid, to indicate that
@@ -520,16 +518,12 @@ dl_open_worker (void *a)
   if (new == NULL)
     {
       assert (mode & RTLD_NOLOAD);
-      __libc_signal_restore_set (&args->original_signal_mask);
       return;
     }
 
   if (__glibc_unlikely (mode & __RTLD_SPROF))
-    {
-      /* This happens only if we load a DSO for 'sprof'.  */
-      __libc_signal_restore_set (&args->original_signal_mask);
-      return;
-    }
+    /* This happens only if we load a DSO for 'sprof'.  */
+    return;
 
   /* This object is directly loaded.  */
   ++new->l_direct_opencount;
@@ -565,7 +559,6 @@ dl_open_worker (void *a)
 
       assert (_dl_debug_initialize (0, args->nsid)->r_state == RT_CONSISTENT);
 
-      __libc_signal_restore_set (&args->original_signal_mask);
       return;
     }
 
@@ -712,6 +705,12 @@ dl_open_worker (void *a)
      All memory allocations for new objects must have happened
      before.  */
 
+  /* Finalize the NODELETE status first.  This comes before
+     update_scopes, so that lazy binding will not see pending NODELETE
+     state for newly loaded objects.  There is a compiler barrier in
+     update_scopes which ensures that the changes from
+     activate_nodelete are visible before new objects show up in the
+     local scope.  */
   activate_nodelete (new);
 
   /* Second stage after resize_scopes: Actually perform the scope
@@ -745,10 +744,6 @@ dl_open_worker (void *a)
   if (mode & RTLD_GLOBAL)
     add_to_global_resize (new);
 
-  /* Unblock signals.  Data structures are now consistent, and
-     application code may run.  */
-  __libc_signal_restore_set (&args->original_signal_mask);
-
   /* Run the initializer functions of new objects.  Temporarily
      disable the exception handler, so that lazy binding failures are
      fatal.  */
@@ -838,10 +833,6 @@ no more namespaces available for dlmopen()"));
   args.argv = argv;
   args.env = env;
 
-  /* Recursive lazy binding during manipulation of the dynamic loader
-     structures may result in incorrect behavior.  */
-  __libc_signal_block_all (&args.original_signal_mask);
-
   struct dl_exception exception;
   int errcode = _dl_catch_exception (&exception, dl_open_worker, &args);
 
@@ -882,16 +873,10 @@ no more namespaces available for dlmopen()"));
 
   _dl_close_worker (args.map, true);
 
-  /* Restore the signal mask.  In the success case, this
-     happens inside dl_open_worker.  */
-  __libc_signal_restore_set (&args.original_signal_mask);
-
   /* All l_nodelete_pending objects should have been deleted
      at this point, which is why it is not necessary to reset
      the flag here.  */
  }
-      else
- __libc_signal_restore_set (&args.original_signal_mask);
 
       assert (_dl_debug_initialize (0, args.nsid)->r_state == RT_CONSISTENT);
 
--
2.23.0

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 0/2] Remove signal blocking from dlopen

Florian Weimer-5
In reply to this post by Florian Weimer-5
* Florian Weimer:

> These patches are on top of the previously posted patch, “dlopen: Fix
> issues related to NODELETE handling and relocations”.
>
> Tested (with the glibc testsuite) on x86_64-linux-gnu, i686-linux-gnu,
> powerpc64le-linux-gnu.
>
> Florian Weimer (2):
>   dlopen: Rework handling of pending NODELETE status
>   dlopen: Do not block signals
>
>  elf/dl-close.c         |  7 +++--
>  elf/dl-lookup.c        | 58 ++++++++++++++++++++++++-----------------
>  elf/dl-open.c          | 59 +++++++++++++++++-------------------------
>  elf/get-dynamic-info.h |  2 +-
>  include/link.h         | 31 ++++++++--------------
>  5 files changed, 73 insertions(+), 84 deletions(-)

Ping.  These patches need review.

  <https://sourceware.org/ml/libc-alpha/2019-12/msg00176.html>
  <https://sourceware.org/ml/libc-alpha/2019-12/msg00177.html>

Thanks,
Florian

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/2] dlopen: Rework handling of pending NODELETE status

Adhemerval Zanella-2
In reply to this post by Florian Weimer-5


On 05/12/2019 12:19, Florian Weimer wrote:
> To avoid a read-modify-write cycle on the l_nodelete field, this
> commit introduces two flags for active NODELETE status (irrevocable)
> and pending NODELETE status (revocable until activate_nodelete) is
> invoked.  As a result, NODELETE processing in dlopen does not
> introduce further reasons why lazy binding from signal handlers
> is unsafe during dlopen, and a subsequent commit can remove signal
> blocking from dlopen.

The changes to use the new two flags seems right, but I don't fully grasp
the avoid modification this patch tries to achieve (read-modify-write).
Could you explain why would be required without field split?

> ---
>  elf/dl-close.c         |  7 +++--
>  elf/dl-lookup.c        | 58 +++++++++++++++++++++++++-----------------
>  elf/dl-open.c          | 22 +++++++++-------
>  elf/get-dynamic-info.h |  2 +-
>  include/link.h         | 31 ++++++++--------------
>  5 files changed, 62 insertions(+), 58 deletions(-)
>
> diff --git a/elf/dl-close.c b/elf/dl-close.c
> index e35a62daf6..df1df6fb29 100644
> --- a/elf/dl-close.c
> +++ b/elf/dl-close.c
> @@ -197,7 +197,7 @@ _dl_close_worker (struct link_map *map, bool force)
>        /* Check whether this object is still used.  */
>        if (l->l_type == lt_loaded
>    && l->l_direct_opencount == 0
> -  && l->l_nodelete != link_map_nodelete_active
> +  && !l->l_nodelete_active
>    /* See CONCURRENCY NOTES in cxa_thread_atexit_impl.c to know why
>       acquire is sufficient and correct.  */
>    && atomic_load_acquire (&l->l_tls_dtor_count) == 0

Ok.

> @@ -279,8 +279,7 @@ _dl_close_worker (struct link_map *map, bool force)
>  
>        if (!used[i])
>   {
> -  assert (imap->l_type == lt_loaded
> -  && imap->l_nodelete != link_map_nodelete_active);
> +  assert (imap->l_type == lt_loaded && !imap->l_nodelete_active);
>  
>    /* Call its termination function.  Do not do it for
>       half-cooked objects.  Temporarily disable exception

Ok.

> @@ -830,7 +829,7 @@ _dl_close (void *_map)
>       before we took the lock. There is no way to detect this (see below)
>       so we proceed assuming this isn't the case.  First see whether we
>       can remove the object at all.  */
> -  if (__glibc_unlikely (map->l_nodelete == link_map_nodelete_active))
> +  if (__glibc_unlikely (map->l_nodelete_active))
>      {
>        /* Nope.  Do nothing.  */
>        __rtld_lock_unlock_recursive (GL(dl_load_lock));

Ok.

> diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
> index 99846918c3..759b45a2c9 100644
> --- a/elf/dl-lookup.c
> +++ b/elf/dl-lookup.c
> @@ -187,6 +187,28 @@ enter_unique_sym (struct unique_sym *table, size_t size,
>    table[idx].map = map;
>  }
>  
> +/* Mark MAP as NODELETE according to the lookup mode in FLAGS.  During
> +   initial relocation, NODELETE state is pending only.  */
> +static void
> +mark_nodelete (struct link_map *map, int flags)
> +{
> +  if (flags & DL_LOOKUP_FOR_RELOCATE)
> +    map->l_nodelete_pending = true;
> +  else
> +    map->l_nodelete_active = true;
> +}
> +
> +/* Return true if MAP is marked as NODELETE according to the lookup
> +   mode in FLAGS> */
> +static bool
> +is_nodelete (struct link_map *map, int flags)
> +{
> +  /* Non-pending NODELETE always counts.  Pending NODELETE only counts
> +     during initial relocation processing.  */
> +  return map->l_nodelete_active
> +    || ((flags & DL_LOOKUP_FOR_RELOCATE) && map->l_nodelete_pending);
> +}
> +
>  /* Utility function for do_lookup_x. Lookup an STB_GNU_UNIQUE symbol
>     in the unique symbol table, creating a new entry if necessary.
>     Return the matching symbol in RESULT.  */
> @@ -311,8 +333,7 @@ do_lookup_unique (const char *undef_name, uint_fast32_t new_hash,
>        enter_unique_sym (entries, size,
>                          new_hash, strtab + sym->st_name, sym, map);
>  
> -      if (map->l_type == lt_loaded
> -  && map->l_nodelete == link_map_nodelete_inactive)
> +      if (map->l_type == lt_loaded && !is_nodelete (map, flags))

Ok, so it adds a check for DL_LOOKUP_FOR_RELOCATE and l_nodelete_pending
as well.

>   {
>    /* Make sure we don't unload this object by
>       setting the appropriate flag.  */
> @@ -320,10 +341,7 @@ do_lookup_unique (const char *undef_name, uint_fast32_t new_hash,
>      _dl_debug_printf ("\
>  marking %s [%lu] as NODELETE due to unique symbol\n",
>        map->l_name, map->l_ns);
> -  if (flags & DL_LOOKUP_FOR_RELOCATE)
> -    map->l_nodelete = link_map_nodelete_pending;
> -  else
> -    map->l_nodelete = link_map_nodelete_active;
> +  mark_nodelete (map, flags);
>   }
>      }

Ok.

>    ++tab->n_elements;
> @@ -586,7 +604,7 @@ add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
>       dependencies may pick an dependency which can be dlclose'd, but
>       such IFUNC resolvers are undefined anyway.  */
>    assert (map->l_type == lt_loaded);
> -  if (map->l_nodelete != link_map_nodelete_inactive)
> +  if (is_nodelete (map, flags))
>      return 0;
>  
>    struct link_map_reldeps *l_reldeps

Ok.

> @@ -694,17 +712,16 @@ add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
>  
>        /* Redo the NODELETE check, as when dl_load_lock wasn't held
>   yet this could have changed.  */
> -      if (map->l_nodelete != link_map_nodelete_inactive)
> +      if (is_nodelete (map, flags))
>   goto out;
>  

Ok.

>        /* If the object with the undefined reference cannot be removed ever
>   just make sure the same is true for the object which contains the
>   definition.  */
> -      if (undef_map->l_type != lt_loaded
> -  || (undef_map->l_nodelete != link_map_nodelete_inactive))
> +      if (undef_map->l_type != lt_loaded || is_nodelete (map, flags))
>   {

Ok.

>    if (__glibc_unlikely (GLRO (dl_debug_mask) & DL_DEBUG_BINDINGS)
> -      && map->l_nodelete == link_map_nodelete_inactive)
> +      && !is_nodelete (map, flags))
>      {

Ok.

>        if (undef_map->l_name[0] == '\0')
>   _dl_debug_printf ("\
> @@ -716,11 +733,7 @@ marking %s [%lu] as NODELETE due to reference to %s [%lu]\n",
>    map->l_name, map->l_ns,
>    undef_map->l_name, undef_map->l_ns);
>      }
> -
> -  if (flags & DL_LOOKUP_FOR_RELOCATE)
> -    map->l_nodelete = link_map_nodelete_pending;
> -  else
> -    map->l_nodelete = link_map_nodelete_active;
> +  mark_nodelete (map, flags);
>    goto out;
>   }
>  

Ok.

> @@ -746,17 +759,14 @@ marking %s [%lu] as NODELETE due to reference to %s [%lu]\n",
>   cannot be unloaded.  This is semantically the correct
>   behavior.  */
>        if (__glibc_unlikely (GLRO (dl_debug_mask) & DL_DEBUG_BINDINGS)
> -  && map->l_nodelete == link_map_nodelete_inactive)
> +  && !is_nodelete (map, flags))
>   _dl_debug_printf ("\

Ok.

>  marking %s [%lu] as NODELETE due to memory allocation failure\n",
>    map->l_name, map->l_ns);
> -      if (flags & DL_LOOKUP_FOR_RELOCATE)
> - /* In case of non-lazy binding, we could actually
> -   report the memory allocation error, but for now, we
> -   use the conservative approximation as well. */
> - map->l_nodelete = link_map_nodelete_pending;
> -      else
> - map->l_nodelete = link_map_nodelete_active;
> +      /* In case of non-lazy binding, we could actually report
> + the memory allocation error, but for now, we use the
> + conservative approximation as well.  */
> +      mark_nodelete (map, flags);
>        goto out;
>      }
>    else

Ok.

> diff --git a/elf/dl-open.c b/elf/dl-open.c
> index 56f213323c..c23341be58 100644
> --- a/elf/dl-open.c
> +++ b/elf/dl-open.c
> @@ -440,13 +440,17 @@ activate_nodelete (struct link_map *new)
>       NODELETE status for objects outside the local scope.  */
>    for (struct link_map *l = GL (dl_ns)[new->l_ns]._ns_loaded; l != NULL;
>         l = l->l_next)
> -    if (l->l_nodelete == link_map_nodelete_pending)
> +    if (l->l_nodelete_pending)
>        {
>   if (__glibc_unlikely (GLRO (dl_debug_mask) & DL_DEBUG_FILES))
>    _dl_debug_printf ("activating NODELETE for %s [%lu]\n",
>      l->l_name, l->l_ns);
>  
> - l->l_nodelete = link_map_nodelete_active;
> + l->l_nodelete_active = true;
> +
> + /* This is just a debugging aid, to indicate that
> +   activate_nodelete has run for this map.  */
> + l->l_nodelete_pending = false;
>        }
>  }
>  

Ok.

> @@ -549,10 +553,10 @@ dl_open_worker (void *a)
>        if (__glibc_unlikely (mode & RTLD_NODELETE))
>   {
>    if (__glibc_unlikely (GLRO (dl_debug_mask) & DL_DEBUG_FILES)
> -      && new->l_nodelete == link_map_nodelete_inactive)
> +      && !new->l_nodelete_active)
>      _dl_debug_printf ("marking %s [%lu] as NODELETE\n",
>        new->l_name, new->l_ns);
> -  new->l_nodelete = link_map_nodelete_active;
> +  new->l_nodelete_active = true;
>   }
>  

Ok.

>        /* Finalize the addition to the global scope.  */
> @@ -568,7 +572,7 @@ dl_open_worker (void *a)
>    /* Schedule NODELETE marking for the directly loaded object if
>       requested.  */
>    if (__glibc_unlikely (mode & RTLD_NODELETE))
> -    new->l_nodelete = link_map_nodelete_pending;
> +    new->l_nodelete_pending = true;
>  
>    /* Load that object's dependencies.  */
>    _dl_map_object_deps (new, NULL, 0, 0,

Ok.

> @@ -683,7 +687,7 @@ dl_open_worker (void *a)
>        _dl_start_profile ();
>  
>        /* Prevent unloading the object.  */
> -      GL(dl_profile_map)->l_nodelete = link_map_nodelete_active;
> +      GL(dl_profile_map)->l_nodelete_active = true;
>      }
>   }
>        else

Ok.

> @@ -882,9 +886,9 @@ no more namespaces available for dlmopen()"));
>       happens inside dl_open_worker.  */
>    __libc_signal_restore_set (&args.original_signal_mask);
>  
> -  /* All link_map_nodelete_pending objects should have been
> -     deleted at this point, which is why it is not necessary
> -     to reset the flag here.  */
> +  /* All l_nodelete_pending objects should have been deleted
> +     at this point, which is why it is not necessary to reset
> +     the flag here.  */
>   }
>        else
>   __libc_signal_restore_set (&args.original_signal_mask);

Ok.

> diff --git a/elf/get-dynamic-info.h b/elf/get-dynamic-info.h
> index 075683d729..ea4e304bef 100644
> --- a/elf/get-dynamic-info.h
> +++ b/elf/get-dynamic-info.h
> @@ -164,7 +164,7 @@ elf_get_dynamic_info (struct link_map *l, ElfW(Dyn) *temp)
>      {
>        l->l_flags_1 = info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val;
>        if (l->l_flags_1 & DF_1_NODELETE)
> - l->l_nodelete = link_map_nodelete_pending;
> + l->l_nodelete_pending = true;
>  
>        /* Only DT_1_SUPPORTED_MASK bits are supported, and we would like
>   to assert this, but we can't. Users have been setting

Ok.

> diff --git a/include/link.h b/include/link.h
> index 2e771e433a..2acc836380 100644
> --- a/include/link.h
> +++ b/include/link.h
> @@ -79,22 +79,6 @@ struct r_search_path_struct
>      int malloced;
>    };
>  
> -/* Type used by the l_nodelete member.  */
> -enum link_map_nodelete
> -{
> - /* This link map can be deallocated.  */
> - link_map_nodelete_inactive = 0, /* Zero-initialized in _dl_new_object.  */
> -
> - /* This link map cannot be deallocated.  */
> - link_map_nodelete_active,
> -
> - /* This link map cannot be deallocated after dlopen has succeded.
> -    dlopen turns this into link_map_nodelete_active.  dlclose treats
> -    this intermediate state as link_map_nodelete_active.  */
> - link_map_nodelete_pending,
> -};
> -
> -
>  /* Structure describing a loaded shared object.  The `l_next' and `l_prev'
>     members form a chain of all the shared objects loaded at startup.
>  

Ok.

> @@ -218,10 +202,17 @@ struct link_map
>         freed, ie. not allocated with
>         the dummy malloc in ld.so.  */
>  
> -    /* Actually of type enum link_map_nodelete.  Separate byte due to
> -       a read in add_dependency in elf/dl-lookup.c outside the loader
> -       lock.  Only valid for l_type == lt_loaded.  */
> -    unsigned char l_nodelete;
> +    /* NODELETE status of the map.  Only valid for maps of type
> +       lt_loaded.  Lazy binding sets l_nodelete_active directly,
> +       potentially from signal handlers.  Initial loading of an
> +       DF_1_NODELETE object set l_nodelete_pending.  Relocation may
> +       set l_nodelete_pending as well.  l_nodelete_pending maps are
> +       promoted to l_nodelete_active status in the final stages of
> +       dlopen, prior to calling ELF constructors.  dlclose only
> +       refuses to unload l_nodelete_active maps, the pending status is
> +       ignored.  */
> +    bool l_nodelete_active;
> +    bool l_nodelete_pending;
>  
>  #include <link_map.h>
>  
>

Ok.
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/2] dlopen: Rework handling of pending NODELETE status

Florian Weimer-5
* Adhemerval Zanella:

> On 05/12/2019 12:19, Florian Weimer wrote:
>> To avoid a read-modify-write cycle on the l_nodelete field, this
>> commit introduces two flags for active NODELETE status (irrevocable)
>> and pending NODELETE status (revocable until activate_nodelete) is
>> invoked.  As a result, NODELETE processing in dlopen does not
>> introduce further reasons why lazy binding from signal handlers
>> is unsafe during dlopen, and a subsequent commit can remove signal
>> blocking from dlopen.
>
> The changes to use the new two flags seems right, but I don't fully grasp
> the avoid modification this patch tries to achieve (read-modify-write).
> Could you explain why would be required without field split?

I think an example of that is in add_dependency in elf/dl-lookup.c.  In
the old code, we check for any kind of NODELETE status and bail out:

      /* Redo the NODELETE check, as when dl_load_lock wasn't held
         yet this could have changed.  */
      if (map->l_nodelete != link_map_nodelete_inactive)
        goto out;

And then set pending status (during relocation):

          if (flags & DL_LOOKUP_FOR_RELOCATE)
            map->l_nodelete = link_map_nodelete_pending;
          else
            map->l_nodelete = link_map_nodelete_active;

If a signal arrives during relocation and the signal handler, through
lazy binding, adds a global scope dependency on the same map, it will
set map->l_nodelete to link_map_nodelete_active.  This will be
overwritten with link_map_nodelete_pending by the dlopen relocation
code.

Does this explain my concerns?

Thanks,
Florian

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/2] dlopen: Rework handling of pending NODELETE status

Adhemerval Zanella-2


On 12/12/2019 10:51, Florian Weimer wrote:

> * Adhemerval Zanella:
>
>> On 05/12/2019 12:19, Florian Weimer wrote:
>>> To avoid a read-modify-write cycle on the l_nodelete field, this
>>> commit introduces two flags for active NODELETE status (irrevocable)
>>> and pending NODELETE status (revocable until activate_nodelete) is
>>> invoked.  As a result, NODELETE processing in dlopen does not
>>> introduce further reasons why lazy binding from signal handlers
>>> is unsafe during dlopen, and a subsequent commit can remove signal
>>> blocking from dlopen.
>>
>> The changes to use the new two flags seems right, but I don't fully grasp
>> the avoid modification this patch tries to achieve (read-modify-write).
>> Could you explain why would be required without field split?
>
> I think an example of that is in add_dependency in elf/dl-lookup.c.  In
> the old code, we check for any kind of NODELETE status and bail out:
>
>       /* Redo the NODELETE check, as when dl_load_lock wasn't held
> yet this could have changed.  */
>       if (map->l_nodelete != link_map_nodelete_inactive)
> goto out;
>
> And then set pending status (during relocation):
>
>  if (flags & DL_LOOKUP_FOR_RELOCATE)
>    map->l_nodelete = link_map_nodelete_pending;
>  else
>    map->l_nodelete = link_map_nodelete_active;
>
> If a signal arrives during relocation and the signal handler, through
> lazy binding, adds a global scope dependency on the same map, it will
> set map->l_nodelete to link_map_nodelete_active.  This will be
> overwritten with link_map_nodelete_pending by the dlopen relocation
> code.
>
> Does this explain my concerns?

Right, it is more clear. Could you add this comment on commit message
as well?

Reviewed-by: Adhemerval Zanella <[hidden email]>
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/2] dlopen: Do not block signals

Adhemerval Zanella-2
In reply to this post by Florian Weimer-5


On 05/12/2019 12:20, Florian Weimer wrote:

> Blocking signals causes issues with certain anti-malware solutions
> which rely on an unblocked SIGSYS signal for system calls they
> intercept.
>
> This reverts commit a2e8aa0d9ea648068d8be52dd7b15f1b6a008e23
> ("Block signals during the initial part of dlopen") and adds
> comments related to async signal safety to active_nodelete and
> its caller.
>
> Note that this does not make lazy binding async-signal-safe with regards
> to dlopen.  It merely avoids introducing new async-signal-safety hazards
> as part of the NODELETE changes.

LGTM, thanks.

Reviewed-by: Adhemerval Zanella <[hidden email]>

> ---
>  elf/dl-open.c | 37 +++++++++++--------------------------
>  1 file changed, 11 insertions(+), 26 deletions(-)
>
> diff --git a/elf/dl-open.c b/elf/dl-open.c
> index c23341be58..5a1c5b5326 100644
> --- a/elf/dl-open.c
> +++ b/elf/dl-open.c
> @@ -34,7 +34,6 @@
>  #include <atomic.h>
>  #include <libc-internal.h>
>  #include <array_length.h>
> -#include <internal-signals.h>
>  
>  #include <dl-dst.h>
>  #include <dl-prop.h>
> @@ -53,10 +52,6 @@ struct dl_open_args
>    /* Namespace ID.  */
>    Lmid_t nsid;
>  
> -  /* Original signal mask.  Used for unblocking signal handlers before
> -     running ELF constructors.  */
> -  sigset_t original_signal_mask;
> -
>    /* Original value of _ns_global_scope_pending_adds.  Set by
>       dl_open_worker.  Only valid if nsid is a real namespace
>       (non-negative).  */

Ok.

> @@ -446,6 +441,9 @@ activate_nodelete (struct link_map *new)
>    _dl_debug_printf ("activating NODELETE for %s [%lu]\n",
>      l->l_name, l->l_ns);
>  
> + /* The flag can already be true at this point, e.g. a signal
> +   handler may have triggered lazy binding and set NODELETE
> +   status immediately.  */
>   l->l_nodelete_active = true;
>  
>   /* This is just a debugging aid, to indicate that

Ok.

> @@ -520,16 +518,12 @@ dl_open_worker (void *a)
>    if (new == NULL)
>      {
>        assert (mode & RTLD_NOLOAD);
> -      __libc_signal_restore_set (&args->original_signal_mask);
>        return;
>      }
>  
>    if (__glibc_unlikely (mode & __RTLD_SPROF))
> -    {
> -      /* This happens only if we load a DSO for 'sprof'.  */
> -      __libc_signal_restore_set (&args->original_signal_mask);
> -      return;
> -    }
> +    /* This happens only if we load a DSO for 'sprof'.  */
> +    return;
>  
>    /* This object is directly loaded.  */
>    ++new->l_direct_opencount;

Ok.

> @@ -565,7 +559,6 @@ dl_open_worker (void *a)
>  
>        assert (_dl_debug_initialize (0, args->nsid)->r_state == RT_CONSISTENT);
>  
> -      __libc_signal_restore_set (&args->original_signal_mask);
>        return;
>      }
>  

Ok.

> @@ -712,6 +705,12 @@ dl_open_worker (void *a)
>       All memory allocations for new objects must have happened
>       before.  */
>  
> +  /* Finalize the NODELETE status first.  This comes before
> +     update_scopes, so that lazy binding will not see pending NODELETE
> +     state for newly loaded objects.  There is a compiler barrier in
> +     update_scopes which ensures that the changes from
> +     activate_nodelete are visible before new objects show up in the
> +     local scope.  */
>    activate_nodelete (new);
>  
>    /* Second stage after resize_scopes: Actually perform the scope

Ok.

> @@ -745,10 +744,6 @@ dl_open_worker (void *a)
>    if (mode & RTLD_GLOBAL)
>      add_to_global_resize (new);
>  
> -  /* Unblock signals.  Data structures are now consistent, and
> -     application code may run.  */
> -  __libc_signal_restore_set (&args->original_signal_mask);
> -
>    /* Run the initializer functions of new objects.  Temporarily
>       disable the exception handler, so that lazy binding failures are
>       fatal.  */

Ok.

> @@ -838,10 +833,6 @@ no more namespaces available for dlmopen()"));
>    args.argv = argv;
>    args.env = env;
>  
> -  /* Recursive lazy binding during manipulation of the dynamic loader
> -     structures may result in incorrect behavior.  */
> -  __libc_signal_block_all (&args.original_signal_mask);
> -
>    struct dl_exception exception;
>    int errcode = _dl_catch_exception (&exception, dl_open_worker, &args);
>  

Ok.

> @@ -882,16 +873,10 @@ no more namespaces available for dlmopen()"));
>  
>    _dl_close_worker (args.map, true);
>  
> -  /* Restore the signal mask.  In the success case, this
> -     happens inside dl_open_worker.  */
> -  __libc_signal_restore_set (&args.original_signal_mask);
> -
>    /* All l_nodelete_pending objects should have been deleted
>       at this point, which is why it is not necessary to reset
>       the flag here.  */
>   }
> -      else
> - __libc_signal_restore_set (&args.original_signal_mask);
>  
>        assert (_dl_debug_initialize (0, args.nsid)->r_state == RT_CONSISTENT);
>  
>

Ok.
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/2] dlopen: Rework handling of pending NODELETE status

Carlos O'Donell-5
In reply to this post by Florian Weimer-5
On 12/5/19 10:19 AM, Florian Weimer wrote:
> To avoid a read-modify-write cycle on the l_nodelete field, this
> commit introduces two flags for active NODELETE status (irrevocable)
> and pending NODELETE status (revocable until activate_nodelete) is
> invoked.  As a result, NODELETE processing in dlopen does not
> introduce further reasons why lazy binding from signal handlers
> is unsafe during dlopen, and a subsequent commit can remove signal
> blocking from dlopen.
> ---

OK for master if you fix the minor comment typo, and expand the comment
as indicated by Adhemerval.

Reviewed-by: Carlos O'Donell <[hidden email]>


>  elf/dl-close.c         |  7 +++--
>  elf/dl-lookup.c        | 58 +++++++++++++++++++++++++-----------------
>  elf/dl-open.c          | 22 +++++++++-------
>  elf/get-dynamic-info.h |  2 +-
>  include/link.h         | 31 ++++++++--------------
>  5 files changed, 62 insertions(+), 58 deletions(-)
>
> diff --git a/elf/dl-close.c b/elf/dl-close.c
> index e35a62daf6..df1df6fb29 100644
> --- a/elf/dl-close.c
> +++ b/elf/dl-close.c
> @@ -197,7 +197,7 @@ _dl_close_worker (struct link_map *map, bool force)
>        /* Check whether this object is still used.  */
>        if (l->l_type == lt_loaded
>    && l->l_direct_opencount == 0
> -  && l->l_nodelete != link_map_nodelete_active
> +  && !l->l_nodelete_active

OK.

>    /* See CONCURRENCY NOTES in cxa_thread_atexit_impl.c to know why
>       acquire is sufficient and correct.  */
>    && atomic_load_acquire (&l->l_tls_dtor_count) == 0
> @@ -279,8 +279,7 @@ _dl_close_worker (struct link_map *map, bool force)
>  
>        if (!used[i])
>   {
> -  assert (imap->l_type == lt_loaded
> -  && imap->l_nodelete != link_map_nodelete_active);
> +  assert (imap->l_type == lt_loaded && !imap->l_nodelete_active);

OK.

>  
>    /* Call its termination function.  Do not do it for
>       half-cooked objects.  Temporarily disable exception
> @@ -830,7 +829,7 @@ _dl_close (void *_map)
>       before we took the lock. There is no way to detect this (see below)
>       so we proceed assuming this isn't the case.  First see whether we
>       can remove the object at all.  */
> -  if (__glibc_unlikely (map->l_nodelete == link_map_nodelete_active))
> +  if (__glibc_unlikely (map->l_nodelete_active))

OK.

>      {
>        /* Nope.  Do nothing.  */
>        __rtld_lock_unlock_recursive (GL(dl_load_lock));
> diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
> index 99846918c3..759b45a2c9 100644
> --- a/elf/dl-lookup.c
> +++ b/elf/dl-lookup.c
> @@ -187,6 +187,28 @@ enter_unique_sym (struct unique_sym *table, size_t size,
>    table[idx].map = map;
>  }
>  
> +/* Mark MAP as NODELETE according to the lookup mode in FLAGS.  During
> +   initial relocation, NODELETE state is pending only.  */
> +static void
> +mark_nodelete (struct link_map *map, int flags)
> +{
> +  if (flags & DL_LOOKUP_FOR_RELOCATE)
> +    map->l_nodelete_pending = true;
> +  else
> +    map->l_nodelete_active = true;

OK.

> +}
> +
> +/* Return true if MAP is marked as NODELETE according to the lookup
> +   mode in FLAGS> */

s/>/.  /g

> +static bool
> +is_nodelete (struct link_map *map, int flags)
> +{
> +  /* Non-pending NODELETE always counts.  Pending NODELETE only counts
> +     during initial relocation processing.  */
> +  return map->l_nodelete_active
> +    || ((flags & DL_LOOKUP_FOR_RELOCATE) && map->l_nodelete_pending);
> +}

OK.

> +
>  /* Utility function for do_lookup_x. Lookup an STB_GNU_UNIQUE symbol
>     in the unique symbol table, creating a new entry if necessary.
>     Return the matching symbol in RESULT.  */
> @@ -311,8 +333,7 @@ do_lookup_unique (const char *undef_name, uint_fast32_t new_hash,
>        enter_unique_sym (entries, size,
>                          new_hash, strtab + sym->st_name, sym, map);
>  
> -      if (map->l_type == lt_loaded
> -  && map->l_nodelete == link_map_nodelete_inactive)
> +      if (map->l_type == lt_loaded && !is_nodelete (map, flags))

OK.

>   {
>    /* Make sure we don't unload this object by
>       setting the appropriate flag.  */
> @@ -320,10 +341,7 @@ do_lookup_unique (const char *undef_name, uint_fast32_t new_hash,
>      _dl_debug_printf ("\
>  marking %s [%lu] as NODELETE due to unique symbol\n",
>        map->l_name, map->l_ns);
> -  if (flags & DL_LOOKUP_FOR_RELOCATE)
> -    map->l_nodelete = link_map_nodelete_pending;
> -  else
> -    map->l_nodelete = link_map_nodelete_active;
> +  mark_nodelete (map, flags);

OK.

>   }
>      }
>    ++tab->n_elements;
> @@ -586,7 +604,7 @@ add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
>       dependencies may pick an dependency which can be dlclose'd, but
>       such IFUNC resolvers are undefined anyway.  */
>    assert (map->l_type == lt_loaded);
> -  if (map->l_nodelete != link_map_nodelete_inactive)
> +  if (is_nodelete (map, flags))

OK.

>      return 0;
>  
>    struct link_map_reldeps *l_reldeps
> @@ -694,17 +712,16 @@ add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
>  
>        /* Redo the NODELETE check, as when dl_load_lock wasn't held
>   yet this could have changed.  */
> -      if (map->l_nodelete != link_map_nodelete_inactive)
> +      if (is_nodelete (map, flags))

OK.

>   goto out;
>  
>        /* If the object with the undefined reference cannot be removed ever
>   just make sure the same is true for the object which contains the
>   definition.  */
> -      if (undef_map->l_type != lt_loaded
> -  || (undef_map->l_nodelete != link_map_nodelete_inactive))
> +      if (undef_map->l_type != lt_loaded || is_nodelete (map, flags))

OK.

>   {
>    if (__glibc_unlikely (GLRO (dl_debug_mask) & DL_DEBUG_BINDINGS)
> -      && map->l_nodelete == link_map_nodelete_inactive)
> +      && !is_nodelete (map, flags))

OK.

>      {
>        if (undef_map->l_name[0] == '\0')
>   _dl_debug_printf ("\
> @@ -716,11 +733,7 @@ marking %s [%lu] as NODELETE due to reference to %s [%lu]\n",
>    map->l_name, map->l_ns,
>    undef_map->l_name, undef_map->l_ns);
>      }
> -
> -  if (flags & DL_LOOKUP_FOR_RELOCATE)
> -    map->l_nodelete = link_map_nodelete_pending;
> -  else
> -    map->l_nodelete = link_map_nodelete_active;
> +  mark_nodelete (map, flags);

OK.

>    goto out;
>   }
>  
> @@ -746,17 +759,14 @@ marking %s [%lu] as NODELETE due to reference to %s [%lu]\n",
>   cannot be unloaded.  This is semantically the correct
>   behavior.  */
>        if (__glibc_unlikely (GLRO (dl_debug_mask) & DL_DEBUG_BINDINGS)
> -  && map->l_nodelete == link_map_nodelete_inactive)
> +  && !is_nodelete (map, flags))

OK.

>   _dl_debug_printf ("\
>  marking %s [%lu] as NODELETE due to memory allocation failure\n",
>    map->l_name, map->l_ns);
> -      if (flags & DL_LOOKUP_FOR_RELOCATE)
> - /* In case of non-lazy binding, we could actually
> -   report the memory allocation error, but for now, we
> -   use the conservative approximation as well. */
> - map->l_nodelete = link_map_nodelete_pending;
> -      else
> - map->l_nodelete = link_map_nodelete_active;
> +      /* In case of non-lazy binding, we could actually report
> + the memory allocation error, but for now, we use the
> + conservative approximation as well.  */
> +      mark_nodelete (map, flags);

OK.

>        goto out;
>      }
>    else
> diff --git a/elf/dl-open.c b/elf/dl-open.c
> index 56f213323c..c23341be58 100644
> --- a/elf/dl-open.c
> +++ b/elf/dl-open.c
> @@ -440,13 +440,17 @@ activate_nodelete (struct link_map *new)
>       NODELETE status for objects outside the local scope.  */
>    for (struct link_map *l = GL (dl_ns)[new->l_ns]._ns_loaded; l != NULL;
>         l = l->l_next)
> -    if (l->l_nodelete == link_map_nodelete_pending)
> +    if (l->l_nodelete_pending)

OK.

>        {
>   if (__glibc_unlikely (GLRO (dl_debug_mask) & DL_DEBUG_FILES))
>    _dl_debug_printf ("activating NODELETE for %s [%lu]\n",
>      l->l_name, l->l_ns);
>  
> - l->l_nodelete = link_map_nodelete_active;
> + l->l_nodelete_active = true;
> +
> + /* This is just a debugging aid, to indicate that
> +   activate_nodelete has run for this map.  */
> + l->l_nodelete_pending = false;

OK.

>        }
>  }
>  
> @@ -549,10 +553,10 @@ dl_open_worker (void *a)
>        if (__glibc_unlikely (mode & RTLD_NODELETE))
>   {
>    if (__glibc_unlikely (GLRO (dl_debug_mask) & DL_DEBUG_FILES)
> -      && new->l_nodelete == link_map_nodelete_inactive)
> +      && !new->l_nodelete_active)

OK.

>      _dl_debug_printf ("marking %s [%lu] as NODELETE\n",
>        new->l_name, new->l_ns);
> -  new->l_nodelete = link_map_nodelete_active;
> +  new->l_nodelete_active = true;

OK.

>   }
>  
>        /* Finalize the addition to the global scope.  */
> @@ -568,7 +572,7 @@ dl_open_worker (void *a)
>    /* Schedule NODELETE marking for the directly loaded object if
>       requested.  */
>    if (__glibc_unlikely (mode & RTLD_NODELETE))
> -    new->l_nodelete = link_map_nodelete_pending;
> +    new->l_nodelete_pending = true;

OK.

>  
>    /* Load that object's dependencies.  */
>    _dl_map_object_deps (new, NULL, 0, 0,
> @@ -683,7 +687,7 @@ dl_open_worker (void *a)
>        _dl_start_profile ();
>  
>        /* Prevent unloading the object.  */
> -      GL(dl_profile_map)->l_nodelete = link_map_nodelete_active;
> +      GL(dl_profile_map)->l_nodelete_active = true;

OK.

>      }
>   }
>        else
> @@ -882,9 +886,9 @@ no more namespaces available for dlmopen()"));
>       happens inside dl_open_worker.  */
>    __libc_signal_restore_set (&args.original_signal_mask);
>  
> -  /* All link_map_nodelete_pending objects should have been
> -     deleted at this point, which is why it is not necessary
> -     to reset the flag here.  */
> +  /* All l_nodelete_pending objects should have been deleted
> +     at this point, which is why it is not necessary to reset
> +     the flag here.  */

OK.

>   }
>        else
>   __libc_signal_restore_set (&args.original_signal_mask);
> diff --git a/elf/get-dynamic-info.h b/elf/get-dynamic-info.h
> index 075683d729..ea4e304bef 100644
> --- a/elf/get-dynamic-info.h
> +++ b/elf/get-dynamic-info.h
> @@ -164,7 +164,7 @@ elf_get_dynamic_info (struct link_map *l, ElfW(Dyn) *temp)
>      {
>        l->l_flags_1 = info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val;
>        if (l->l_flags_1 & DF_1_NODELETE)
> - l->l_nodelete = link_map_nodelete_pending;
> + l->l_nodelete_pending = true;

OK.

>  
>        /* Only DT_1_SUPPORTED_MASK bits are supported, and we would like
>   to assert this, but we can't. Users have been setting
> diff --git a/include/link.h b/include/link.h
> index 2e771e433a..2acc836380 100644
> --- a/include/link.h
> +++ b/include/link.h
> @@ -79,22 +79,6 @@ struct r_search_path_struct
>      int malloced;
>    };
>  
> -/* Type used by the l_nodelete member.  */
> -enum link_map_nodelete
> -{
> - /* This link map can be deallocated.  */
> - link_map_nodelete_inactive = 0, /* Zero-initialized in _dl_new_object.  */
> -
> - /* This link map cannot be deallocated.  */
> - link_map_nodelete_active,
> -
> - /* This link map cannot be deallocated after dlopen has succeded.
> -    dlopen turns this into link_map_nodelete_active.  dlclose treats
> -    this intermediate state as link_map_nodelete_active.  */
> - link_map_nodelete_pending,
> -};

OK.

> -
> -
>  /* Structure describing a loaded shared object.  The `l_next' and `l_prev'
>     members form a chain of all the shared objects loaded at startup.
>  
> @@ -218,10 +202,17 @@ struct link_map
>         freed, ie. not allocated with
>         the dummy malloc in ld.so.  */
>  
> -    /* Actually of type enum link_map_nodelete.  Separate byte due to
> -       a read in add_dependency in elf/dl-lookup.c outside the loader
> -       lock.  Only valid for l_type == lt_loaded.  */
> -    unsigned char l_nodelete;
> +    /* NODELETE status of the map.  Only valid for maps of type
> +       lt_loaded.  Lazy binding sets l_nodelete_active directly,
> +       potentially from signal handlers.  Initial loading of an
> +       DF_1_NODELETE object set l_nodelete_pending.  Relocation may
> +       set l_nodelete_pending as well.  l_nodelete_pending maps are
> +       promoted to l_nodelete_active status in the final stages of
> +       dlopen, prior to calling ELF constructors.  dlclose only
> +       refuses to unload l_nodelete_active maps, the pending status is
> +       ignored.  */
> +    bool l_nodelete_active;
> +    bool l_nodelete_pending;

OK. An arriving signal can set l_nodelete_active to true, independent of
setting l_nodelete_pending, and once the signal returns, even if you set
l_nodelete_pending to true, l_nodelete_active is still set to true.
The case in dl_open_worker will tolerate having l_nodelete_active set
to true by an async-signal handler. Likewise do_lookup_unique and
add_dependency.

Adhemerval requested this case of an async-signal handler should be
better spelled out in the comment, and he's probably right that we
could epxand the "potentially from signal handlers" bit here because
it's the entire point of these two fields which is to support setting
active and pending independently.

>  
>  #include <link_map.h>
>  
>


--
Cheers,
Carlos.

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/2] dlopen: Do not block signals

Carlos O'Donell-5
In reply to this post by Adhemerval Zanella-2
On 12/12/19 12:01 PM, Adhemerval Zanella wrote:

>
>
> On 05/12/2019 12:20, Florian Weimer wrote:
>> Blocking signals causes issues with certain anti-malware solutions
>> which rely on an unblocked SIGSYS signal for system calls they
>> intercept.
>>
>> This reverts commit a2e8aa0d9ea648068d8be52dd7b15f1b6a008e23
>> ("Block signals during the initial part of dlopen") and adds
>> comments related to async signal safety to active_nodelete and
>> its caller.
>>
>> Note that this does not make lazy binding async-signal-safe with regards
>> to dlopen.  It merely avoids introducing new async-signal-safety hazards
>> as part of the NODELETE changes.
>
> LGTM, thanks.
>
> Reviewed-by: Adhemerval Zanella <[hidden email]>

LGTM too.

Reviewed-by: Carlos O'Donell <[hidden email]>

>> ---
>>  elf/dl-open.c | 37 +++++++++++--------------------------
>>  1 file changed, 11 insertions(+), 26 deletions(-)
>>
>> diff --git a/elf/dl-open.c b/elf/dl-open.c
>> index c23341be58..5a1c5b5326 100644
>> --- a/elf/dl-open.c
>> +++ b/elf/dl-open.c
>> @@ -34,7 +34,6 @@
>>  #include <atomic.h>
>>  #include <libc-internal.h>
>>  #include <array_length.h>
>> -#include <internal-signals.h>
>>  
>>  #include <dl-dst.h>
>>  #include <dl-prop.h>
>> @@ -53,10 +52,6 @@ struct dl_open_args
>>    /* Namespace ID.  */
>>    Lmid_t nsid;
>>  
>> -  /* Original signal mask.  Used for unblocking signal handlers before
>> -     running ELF constructors.  */
>> -  sigset_t original_signal_mask;
>> -
>>    /* Original value of _ns_global_scope_pending_adds.  Set by
>>       dl_open_worker.  Only valid if nsid is a real namespace
>>       (non-negative).  */
>
> Ok.
>
>> @@ -446,6 +441,9 @@ activate_nodelete (struct link_map *new)
>>    _dl_debug_printf ("activating NODELETE for %s [%lu]\n",
>>      l->l_name, l->l_ns);
>>  
>> + /* The flag can already be true at this point, e.g. a signal
>> +   handler may have triggered lazy binding and set NODELETE
>> +   status immediately.  */
>>   l->l_nodelete_active = true;
>>  
>>   /* This is just a debugging aid, to indicate that
>
> Ok.
>
>> @@ -520,16 +518,12 @@ dl_open_worker (void *a)
>>    if (new == NULL)
>>      {
>>        assert (mode & RTLD_NOLOAD);
>> -      __libc_signal_restore_set (&args->original_signal_mask);
>>        return;
>>      }
>>  
>>    if (__glibc_unlikely (mode & __RTLD_SPROF))
>> -    {
>> -      /* This happens only if we load a DSO for 'sprof'.  */
>> -      __libc_signal_restore_set (&args->original_signal_mask);
>> -      return;
>> -    }
>> +    /* This happens only if we load a DSO for 'sprof'.  */
>> +    return;
>>  
>>    /* This object is directly loaded.  */
>>    ++new->l_direct_opencount;
>
> Ok.
>
>> @@ -565,7 +559,6 @@ dl_open_worker (void *a)
>>  
>>        assert (_dl_debug_initialize (0, args->nsid)->r_state == RT_CONSISTENT);
>>  
>> -      __libc_signal_restore_set (&args->original_signal_mask);
>>        return;
>>      }
>>  
>
> Ok.
>
>> @@ -712,6 +705,12 @@ dl_open_worker (void *a)
>>       All memory allocations for new objects must have happened
>>       before.  */
>>  
>> +  /* Finalize the NODELETE status first.  This comes before
>> +     update_scopes, so that lazy binding will not see pending NODELETE
>> +     state for newly loaded objects.  There is a compiler barrier in
>> +     update_scopes which ensures that the changes from
>> +     activate_nodelete are visible before new objects show up in the
>> +     local scope.  */
>>    activate_nodelete (new);
>>  
>>    /* Second stage after resize_scopes: Actually perform the scope
>
> Ok.
>
>> @@ -745,10 +744,6 @@ dl_open_worker (void *a)
>>    if (mode & RTLD_GLOBAL)
>>      add_to_global_resize (new);
>>  
>> -  /* Unblock signals.  Data structures are now consistent, and
>> -     application code may run.  */
>> -  __libc_signal_restore_set (&args->original_signal_mask);
>> -
>>    /* Run the initializer functions of new objects.  Temporarily
>>       disable the exception handler, so that lazy binding failures are
>>       fatal.  */
>
> Ok.
>
>> @@ -838,10 +833,6 @@ no more namespaces available for dlmopen()"));
>>    args.argv = argv;
>>    args.env = env;
>>  
>> -  /* Recursive lazy binding during manipulation of the dynamic loader
>> -     structures may result in incorrect behavior.  */
>> -  __libc_signal_block_all (&args.original_signal_mask);
>> -
>>    struct dl_exception exception;
>>    int errcode = _dl_catch_exception (&exception, dl_open_worker, &args);
>>  
>
> Ok.
>
>> @@ -882,16 +873,10 @@ no more namespaces available for dlmopen()"));
>>  
>>    _dl_close_worker (args.map, true);
>>  
>> -  /* Restore the signal mask.  In the success case, this
>> -     happens inside dl_open_worker.  */
>> -  __libc_signal_restore_set (&args.original_signal_mask);
>> -
>>    /* All l_nodelete_pending objects should have been deleted
>>       at this point, which is why it is not necessary to reset
>>       the flag here.  */
>>   }
>> -      else
>> - __libc_signal_restore_set (&args.original_signal_mask);
>>  
>>        assert (_dl_debug_initialize (0, args.nsid)->r_state == RT_CONSISTENT);
>>  
>>
>
> Ok.
>


--
Cheers,
Carlos.