[commit] Avoid falling down for missing threads on Linux

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[commit] Avoid falling down for missing threads on Linux

Daniel Jacobowitz-2
This patch corresponds to a kernel bug I reported several months ago:
  http://bugzilla.kernel.org/show_bug.cgi?id=7210

It affects all kernels with the CLONE_PARENT_TIDPTR flag, at least through
2.6.19.  This flag writes the PID of the new process into the parent's
memory, and NPTL uses this to set up the thread list.  But there are still
places where the operation can fail after that point, leaving a stale value
in memory.  I convinced the kernel developers that this behavior was a bug,
but there are a lot of deployed systems with the problem and a fix
hasn't been merged yet.

It affects basically every kernel currently capable of running NPTL, and one
of the ways it manifests is as an intermittent failure on tls.exp.  We won't
get a thread creation event until later (after a successful retry of the
clone which failed), but if we issue "info threads" right after a failed
clone, the thread list in NPTL's internal state will list a thread that
isn't there yet.

We don't need to take any special action to make sure we get the thread;
we'll be notified in due course.  So we just need to avoid blowing up
in the mean time, as in the simple patch below.

Tested on x86_64-pc-linux-gnu and committed.  I get the warning about 10%
of the time when running tls.exp, but now there are no failures when
I trigger the race condition.

--
Daniel Jacobowitz
CodeSourcery

2006-12-31  Daniel Jacobowitz  <[hidden email]>

        * linux-nat.c (lin_lwp_attach_lwp): Return a status.  Do not
        add the LWP to our list until we are attached.  Warn instead
        of erroring if the attach fails.
        * linux-nat.h (lin_lwp_attach_lwp): New prototype.
        * linux-thread-db.c (attach_thread): Call lin_lwp_attach_lwp
        directly.  Do not add the thread to our list until we are
        successfully attached.
        * config/nm-linux.h (lin_lwp_attach_lwp, ATTACH_LWP): Delete.

Index: linux-nat.c
===================================================================
RCS file: /cvs/src/src/gdb/linux-nat.c,v
retrieving revision 1.51
diff -u -p -r1.51 linux-nat.c
--- linux-nat.c 20 Nov 2006 21:47:06 -0000 1.51
+++ linux-nat.c 31 Dec 2006 21:02:40 -0000
@@ -915,12 +915,13 @@ exit_lwp (struct lwp_info *lp)
 
 /* Attach to the LWP specified by PID.  If VERBOSE is non-zero, print
    a message telling the user that a new LWP has been added to the
-   process.  */
+   process.  Return 0 if successful or -1 if the new LWP could not
+   be attached.  */
 
-void
+int
 lin_lwp_attach_lwp (ptid_t ptid, int verbose)
 {
-  struct lwp_info *lp, *found_lp;
+  struct lwp_info *lp;
 
   gdb_assert (is_lwp (ptid));
 
@@ -932,12 +933,7 @@ lin_lwp_attach_lwp (ptid_t ptid, int ver
       sigprocmask (SIG_BLOCK, &blocked_mask, NULL);
     }
 
-  if (verbose)
-    printf_filtered (_("[New %s]\n"), target_pid_to_str (ptid));
-
-  found_lp = lp = find_lwp_pid (ptid);
-  if (lp == NULL)
-    lp = add_lwp (ptid);
+  lp = find_lwp_pid (ptid);
 
   /* We assume that we're already attached to any LWP that has an id
      equal to the overall process id, and to any LWP that is already
@@ -945,14 +941,25 @@ lin_lwp_attach_lwp (ptid_t ptid, int ver
      and we've had PID wraparound since we last tried to stop all threads,
      this assumption might be wrong; fortunately, this is very unlikely
      to happen.  */
-  if (GET_LWP (ptid) != GET_PID (ptid) && found_lp == NULL)
+  if (GET_LWP (ptid) != GET_PID (ptid) && lp == NULL)
     {
       pid_t pid;
       int status;
 
       if (ptrace (PTRACE_ATTACH, GET_LWP (ptid), 0, 0) < 0)
- error (_("Can't attach %s: %s"), target_pid_to_str (ptid),
-       safe_strerror (errno));
+ {
+  /* If we fail to attach to the thread, issue a warning,
+     but continue.  One way this can happen is if thread
+     creation is interrupted; as of Linux 2.6.19, a kernel
+     bug may place threads in the thread list and then fail
+     to create them.  */
+  warning (_("Can't attach %s: %s"), target_pid_to_str (ptid),
+   safe_strerror (errno));
+  return -1;
+ }
+
+      if (lp == NULL)
+ lp = add_lwp (ptid);
 
       if (debug_linux_nat)
  fprintf_unfiltered (gdb_stdlog,
@@ -990,8 +997,15 @@ lin_lwp_attach_lwp (ptid_t ptid, int ver
          threads.  Note that this won't have already been done since
          the main thread will have, we assume, been stopped by an
          attach from a different layer.  */
+      if (lp == NULL)
+ lp = add_lwp (ptid);
       lp->stopped = 1;
     }
+
+  if (verbose)
+    printf_filtered (_("[New %s]\n"), target_pid_to_str (ptid));
+
+  return 0;
 }
 
 static void
Index: linux-nat.h
===================================================================
RCS file: /cvs/src/src/gdb/linux-nat.h,v
retrieving revision 1.13
diff -u -p -r1.13 linux-nat.h
--- linux-nat.h 20 Nov 2006 21:47:06 -0000 1.13
+++ linux-nat.h 31 Dec 2006 21:02:40 -0000
@@ -80,6 +80,8 @@ extern void linux_enable_event_reporting
 extern ptid_t linux_handle_extended_wait (int pid, int status,
   struct target_waitstatus *ourstatus);
 
+extern int lin_lwp_attach_lwp (ptid_t ptid, int verbose);
+
 /* Iterator function for lin-lwp's lwp list.  */
 struct lwp_info *iterate_over_lwps (int (*callback) (struct lwp_info *,
      void *),
Index: linux-thread-db.c
===================================================================
RCS file: /cvs/src/src/gdb/linux-thread-db.c,v
retrieving revision 1.23
diff -u -p -r1.23 linux-thread-db.c
--- linux-thread-db.c 31 Dec 2006 20:20:13 -0000 1.23
+++ linux-thread-db.c 31 Dec 2006 21:02:40 -0000
@@ -690,6 +690,13 @@ attach_thread (ptid_t ptid, const td_thr
 
   check_thread_signals ();
 
+  if (ti_p->ti_state == TD_THR_UNKNOWN || ti_p->ti_state == TD_THR_ZOMBIE)
+    return; /* A zombie thread -- do not attach.  */
+
+  /* Under GNU/Linux, we have to attach to each and every thread.  */
+  if (lin_lwp_attach_lwp (BUILD_LWP (ti_p->ti_lid, GET_PID (ptid)), 0) < 0)
+    return;
+
   /* Add the thread to GDB's thread list.  */
   tp = add_thread (ptid);
   tp->private = xmalloc (sizeof (struct private_thread_info));
@@ -698,14 +705,6 @@ attach_thread (ptid_t ptid, const td_thr
   if (verbose)
     printf_unfiltered (_("[New %s]\n"), target_pid_to_str (ptid));
 
-  if (ti_p->ti_state == TD_THR_UNKNOWN || ti_p->ti_state == TD_THR_ZOMBIE)
-    return; /* A zombie thread -- do not attach.  */
-
-  /* Under GNU/Linux, we have to attach to each and every thread.  */
-#ifdef ATTACH_LWP
-  ATTACH_LWP (BUILD_LWP (ti_p->ti_lid, GET_PID (ptid)), 0);
-#endif
-
   /* Enable thread event reporting for this thread.  */
   err = td_thr_event_enable_p (th_p, 1);
   if (err != TD_OK)
Index: config/nm-linux.h
===================================================================
RCS file: /cvs/src/src/gdb/config/nm-linux.h,v
retrieving revision 1.27
diff -u -p -r1.27 nm-linux.h
--- config/nm-linux.h 28 Nov 2006 19:45:07 -0000 1.27
+++ config/nm-linux.h 31 Dec 2006 21:02:40 -0000
@@ -1,6 +1,6 @@
 /* Native support for GNU/Linux.
 
-   Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005
+   Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
    Free Software Foundation, Inc.
 
    This file is part of GDB.
@@ -25,9 +25,6 @@ struct target_ops;
 /* GNU/Linux is SVR4-ish but its /proc file system isn't.  */
 #undef USE_PROC_FS
 
-extern void lin_lwp_attach_lwp (ptid_t ptid, int verbose);
-#define ATTACH_LWP(ptid, verbose) lin_lwp_attach_lwp ((ptid), (verbose))
-
 extern void lin_thread_get_thread_signals (sigset_t *mask);
 #define GET_THREAD_SIGNALS(mask) lin_thread_get_thread_signals (mask)