[ECOS] Miss calling ASR in sched.cxx

classic Classic list List threaded Threaded
4 messages Options
Reply | Threaded
Open this post in threaded view
|

[ECOS] Miss calling ASR in sched.cxx

kiron
Hi All,

I' am new to this mail list. If I have something wrong, correct me.

I debug my application on MPC8313 platform. It has some pthreads. one
of them use the posix timer
(packages/compat/posix/current/src/time.cxx) to drive a status
machine.
When the pthread running a few minuters, the timer was disarmed.

I tracked code path, found that code clips of
Cyg_Scheduler::unlock_inner in
packages/kernel/current/src/sched/sched.cxx :

-------------------------------------------------------------------------------------------------------------------------------------------
    do {

        CYG_PRECONDITION( new_lock==0 ? get_sched_lock() == 1 :
                          ((get_sched_lock() == new_lock) ||
(get_sched_lock() == new_lock+1)),
                          "sched_lock not at expected value" );

#ifdef CYGIMP_KERNEL_INTERRUPTS_DSRS

        // Call any pending DSRs. Do this here to ensure that any
        // threads that get awakened are properly scheduled.

        if( new_lock == 0 && Cyg_Interrupt::DSRs_pending() )
            Cyg_Interrupt::call_pending_DSRs();
#endif

        Cyg_Thread *current = get_current_thread();

        CYG_ASSERTCLASS( current, "Bad current thread" );

#ifdef CYGFUN_KERNEL_ALL_THREADS_STACK_CHECKING
        // should have  CYGVAR_KERNEL_THREADS_LIST
        current = Cyg_Thread::get_list_head();
        while ( current ) {
            current->check_stack();
            current = current->get_list_next();
        }
        current = get_current_thread();
#endif

#ifdef CYGFUN_KERNEL_THREADS_STACK_CHECKING
        current->check_stack();
#endif

        // If the current thread is going to sleep, or someone
        // wants a reschedule, choose another thread to run

        if( current->state != Cyg_Thread::RUNNING || get_need_reschedule() ) {

            CYG_INSTRUMENT_SCHED(RESCHEDULE,0,0);

            // Get the next thread to run from scheduler
            Cyg_Thread *next = scheduler.schedule();

            CYG_CHECK_DATA_PTR( next, "Invalid next thread pointer");
            CYG_ASSERTCLASS( next, "Bad next thread" );

            if( current != next )
            {

                CYG_INSTRUMENT_THREAD(SWITCH,current,next);

                // Count this thread switch
                thread_switches[CYG_KERNEL_CPU_THIS()]++;

#ifdef CYGFUN_KERNEL_THREADS_STACK_CHECKING
                next->check_stack(); // before running it
#endif
                current->timeslice_save();

                // Switch contexts
                HAL_THREAD_SWITCH_CONTEXT( &current->stack_ptr,
                                           &next->stack_ptr );

                // Worry here about possible compiler
                // optimizations across the above call that may try to
                // propogate common subexpresions.  We would end up
                // with the expression from one thread in its
                // successor. This is only a worry if we do not save
                // and restore the complete register set. We need a
                // way of marking functions that return into a
                // different context. A temporary fix would be to
                // disable CSE (-fdisable-cse) in the compiler.

                // We return here only when the current thread is
                // rescheduled.  There is a bit of housekeeping to do
                // here before we are allowed to go on our way.

                CYG_CHECK_DATA_PTR( current, "Invalid current thread pointer");
                CYG_ASSERTCLASS( current, "Bad current thread" );

                current_thread[CYG_KERNEL_CPU_THIS()] = current;   //
restore current thread pointer

                current->timeslice_restore();
            }

            clear_need_reschedule();    // finished rescheduling
        }

        if( new_lock == 0 )
        {

#ifdef CYGSEM_KERNEL_SCHED_ASR_SUPPORT

            // Check whether the ASR is pending and not inhibited.  If
            // we can call it, then transfer this info to a local
            // variable (call_asr) and clear the pending flag.  Note
            // that we only do this if the scheduler lock is about to
            // be zeroed. In any other circumstance we are not
            // unlocking.

            cyg_bool call_asr = false;

            if( (current->asr_inhibit == 0) && current->asr_pending )
            {
                call_asr = true;
                current->asr_pending = false;
            }
#endif

            HAL_REORDER_BARRIER(); // Make sure everything above has happened
                                   // by this point
            zero_sched_lock();     // Clear the lock
            HAL_REORDER_BARRIER();

#ifdef CYGIMP_KERNEL_INTERRUPTS_DSRS

            // Now check whether any DSRs got posted during the thread
            // switch and if so, go around again. Making this test after
            // the lock has been zeroed avoids a race condition in which
            // a DSR could have been posted during a reschedule, but would
            // not be run until the _next_ time we release the sched lock.

            if( Cyg_Interrupt::DSRs_pending() ) {
                inc_sched_lock();   // reclaim the lock
                continue;           // go back to head of loop
            }

#endif
            // Otherwise the lock is zero, we can return.

//            CYG_POSTCONDITION( get_sched_lock() == 0, "sched_lock not zero" );

#ifdef CYGSEM_KERNEL_SCHED_ASR_SUPPORT
            // If the test within the sched_lock indicating that the ASR
            // be called was true, call it here. Calling the ASR must be
            // the very last thing we do here, since it must run as close
            // to "user" state as possible.

            if( call_asr ) current->asr(current->asr_data);
#endif

        }
        else
        {
            // If new_lock is non-zero then we restore the sched_lock to
            // the value given.

            HAL_REORDER_BARRIER();

            set_sched_lock(new_lock);

            HAL_REORDER_BARRIER();
        }

#ifdef CYGDBG_KERNEL_TRACE_UNLOCK_INNER
        CYG_REPORT_RETURN();
#endif
        return;

    } while( 1 );

-------------------------------------------------------------------------------------------------------------------------------------------

When enable both CYGSEM_KERNEL_SCHED_ASR_SUPPORT and
CYGIMP_KERNEL_INTERRUPTS_DSRS, consider that local variable call_asr
is set to true and clear the asr_pending flag of current thread, but
has DSRs_pending. It will continue the while loop and the call_asr's
value is re-initialized to false, code path will miss to call ASR.

Posix timer use ASR to deliver signal,If missing to call ASR, and
signal will not been delivered. Unfortunately, posix time subsystem
don't try to reset the asr_pending flag (see alarm_action() in
packages/compat/posix/current/src/time.cxx), and the timer was
disarmed forever (If it is a interval timer).

here is a small patch to fix this.

Huang Yi
------------------------

Index: packages/kernel/current/src/sched/sched.cxx
===================================================================
RCS file: /cvs/ecos/ecos/packages/kernel/current/src/sched/sched.cxx,v
retrieving revision 1.20
diff -u -8 -p -r1.20 sched.cxx
--- packages/kernel/current/src/sched/sched.cxx 29 Jan 2009 17:49:50
-0000      1.20
+++ packages/kernel/current/src/sched/sched.cxx 22 Apr 2011 02:42:06 -0000
@@ -131,16 +131,20 @@ inline void *operator new(size_t size, v
 // have when it reschedules this thread back, and leaves this function.
 // When it is non-zero, and the thread is rescheduled, no ASRS are run,
 // or DSRs processed. By doing this, it makes it possible for threads
 // that want to go to sleep to wake up with the scheduler lock in the
 // same state it was in before.

 void Cyg_Scheduler::unlock_inner( cyg_ucount32 new_lock )
 {
+#ifdef CYGSEM_KERNEL_SCHED_ASR_SUPPORT
+    cyg_bool call_asr = false;
+#endif
+
 #ifdef CYGDBG_KERNEL_TRACE_UNLOCK_INNER
     CYG_REPORT_FUNCTION();
 #endif

     do {

         CYG_PRECONDITION( new_lock==0 ? get_sched_lock() == 1 :
                           ((get_sched_lock() == new_lock) ||
(get_sched_lock() == new_lock+1)),
@@ -229,23 +233,21 @@ void Cyg_Scheduler::unlock_inner( cyg_uc
         }

         if( new_lock == 0 )
         {

 #ifdef CYGSEM_KERNEL_SCHED_ASR_SUPPORT

             // Check whether the ASR is pending and not inhibited.  If
-            // we can call it, then transfer this info to a local
+            // we can call it, then transfer this info to a
             // variable (call_asr) and clear the pending flag.  Note
             // that we only do this if the scheduler lock is about to
             // be zeroed. In any other circumstance we are not
             // unlocking.
-
-            cyg_bool call_asr = false;

             if( (current->asr_inhibit == 0) && current->asr_pending )
             {
                 call_asr = true;
                 current->asr_pending = false;
             }
 #endif
Reply | Threaded
Open this post in threaded view
|

Re: [ECOS] Miss calling ASR in sched.cxx

kiron
2011/4/22 kiron <[hidden email]>:

> When enable both CYGSEM_KERNEL_SCHED_ASR_SUPPORT and
> CYGIMP_KERNEL_INTERRUPTS_DSRS, consider that local variable call_asr
> is set to true and clear the asr_pending flag of current thread, but
> has DSRs_pending. It will continue the while loop and the call_asr's
> value is re-initialized to false, code path will miss to call ASR.
>
> Posix timer use ASR to deliver signal,If missing to call ASR, and
> signal will not been delivered. Unfortunately, posix time subsystem
> don't try to reset the asr_pending flag (see alarm_action() in
> packages/compat/posix/current/src/time.cxx), and the timer was
> disarmed forever (If it is a interval timer).
>
> here is a small patch to fix this.
>
> Huang Yi
> ------------------------

Anybody can review this patch? or I should send the mail to another mail list?

--
Huang Yi (kiron)
-------------------
Reply | Threaded
Open this post in threaded view
|

Re: [ECOS] Miss calling ASR in sched.cxx

Lambrecht Jürgen
This is the correct list, but your attachment got lost.
Regards, Jürgen

On 05/03/2011 03:18 AM, Huang Yi wrote:

>
> 2011/4/22 kiron <[hidden email]>:
> > When enable both CYGSEM_KERNEL_SCHED_ASR_SUPPORT and
> > CYGIMP_KERNEL_INTERRUPTS_DSRS, consider that local variable call_asr
> > is set to true and clear the asr_pending flag of current thread, but
> > has DSRs_pending. It will continue the while loop and the call_asr's
> > value is re-initialized to false, code path will miss to call ASR.
> >
> > Posix timer use ASR to deliver signal,If missing to call ASR, and
> > signal will not been delivered. Unfortunately, posix time subsystem
> > don't try to reset the asr_pending flag (see alarm_action() in
> > packages/compat/posix/current/src/time.cxx), and the timer was
> > disarmed forever (If it is a interval timer).
> >
> > here is a small patch to fix this.
> >
> > Huang Yi
> > ------------------------
>
> Anybody can review this patch? or I should send the mail to another
> mail list?
>
> --
> Huang Yi (kiron)
> -------------------
>


--
Jürgen Lambrecht
R&D Associate
Tel: +32 (0)51 303045    Fax: +32 (0)51 310670
http://www.televic-rail.com
Televic Rail NV - Leo Bekaertlaan 1 - 8870 Izegem - Belgium
Company number 0825.539.581 - RPR Kortrijk

Reply | Threaded
Open this post in threaded view
|

Re: [ECOS] Miss calling ASR in sched.cxx

kiron
Thanks. But at the mail archive, I found my original mail :
http://ecos.sourceware.org/ml/ecos-devel/2011-04/msg00008.html
The patch is present at the end of the mail.

I paste the same patch text here:

------------------------------[PATCH START] ------------------------------------
Index: packages/kernel/current/src/sched/sched.cxx
===================================================================
RCS file: /cvs/ecos/ecos/packages/kernel/current/src/sched/sched.cxx,v
retrieving revision 1.20
diff -u -8 -p -r1.20 sched.cxx
--- packages/kernel/current/src/sched/sched.cxx 29 Jan 2009 17:49:50
-0000      1.20
+++ packages/kernel/current/src/sched/sched.cxx 22 Apr 2011 02:42:06 -0000
@@ -131,16 +131,20 @@ inline void *operator new(size_t size, v
 // have when it reschedules this thread back, and leaves this function.
 // When it is non-zero, and the thread is rescheduled, no ASRS are run,
 // or DSRs processed. By doing this, it makes it possible for threads
 // that want to go to sleep to wake up with the scheduler lock in the
 // same state it was in before.

 void Cyg_Scheduler::unlock_inner( cyg_ucount32 new_lock )
 {
+#ifdef CYGSEM_KERNEL_SCHED_ASR_SUPPORT
+    cyg_bool call_asr = false;
+#endif
+
 #ifdef CYGDBG_KERNEL_TRACE_UNLOCK_INNER
    CYG_REPORT_FUNCTION();
 #endif

    do {

        CYG_PRECONDITION( new_lock==0 ? get_sched_lock() == 1 :
                          ((get_sched_lock() == new_lock) ||
(get_sched_lock() == new_lock+1)),
@@ -229,23 +233,21 @@ void Cyg_Scheduler::unlock_inner( cyg_uc
        }

        if( new_lock == 0 )
        {

 #ifdef CYGSEM_KERNEL_SCHED_ASR_SUPPORT

            // Check whether the ASR is pending and not inhibited.  If
-            // we can call it, then transfer this info to a local
+            // we can call it, then transfer this info to a
            // variable (call_asr) and clear the pending flag.  Note
            // that we only do this if the scheduler lock is about to
            // be zeroed. In any other circumstance we are not
            // unlocking.
-
-            cyg_bool call_asr = false;

            if( (current->asr_inhibit == 0) && current->asr_pending )
            {
                call_asr = true;
                current->asr_pending = false;
            }
 #endif
------------------------------[PATCH END] ------------------------------------


2011/5/3 Jürgen Lambrecht <[hidden email]>:
> This is the correct list, but your attachment got lost.
> Regards, Jürgen

--
Huang Yi (kiron)
-------------------