Linux FD Handler 以及 Timer 機制

好久沒寫文章了 ... 怎麼覺得每次要寫東西都有這種感慨 ... 今天要紀錄的東西是為了工作上的案子所打造的 API。這個案子講穿了就是要實作一個 Protocol。既然是 Protocol,毫無疑問就要去處理封包收送啊、Timer啊,這一類的問題。以前在開發的時候是把這些問題全部丟給 Quagga (Quagga Software Routing Suite) 提供的工具來處理,在更早以前有用過另外一套忘記哪個案子裏面提供的 event loop。現在,龜毛如我,嫌 Quagga 太肥,另外一套又找不到(沒看到我連名字都忘了嗎?),所以只好自己來囉。

整個架構預計採用 epoll 的機制,這樣 socket的處理就完成了;那 Timer 呢?根據研究以後,Linux 有提供 Timerfd 的接口,簡單來說,就是設定定時器,時間到的時候, Linux Kernel會主動送出資料給 Timerfd 的 File Descriptor ,所以有收到東西就知道有 Timeout 的事件發生。這個機制最大的好處是可以配合所有的 FD 處理機制,不管是 poll、select 或是今天我採用的 epoll。

下面就是我設計的 API:


#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/epoll.h>      // For epoll
#include <sys/timerfd.h>    // For timerfd

#include "fd_handler.h"

// Static Variables

static int                      epfd = -1;              // EPOLL File Descriptor. 
static int                      epfdMax;                // Max no of FD.
static struct epoll_event       *pEvents = NULL;        // Used for EPOLL.
static int                      running;                // Used as a running flag. 

struct  event_record
{
    int     fd;
    int     type;
    void    *pData;
    int     ( *callback )( int, void * );    
};

int     fd_handler_create( int max )
{
    // Create Epoll fd
    epfd = epoll_create( max );
    
    if( epfd == -1 )
    {
        // Error
        return -1;
    }    
    
    pEvents = malloc( max * sizeof( struct epoll_event )  );
    running = 1;
    epfdMax = max;
    
    return 0;
}

int     fd_handler_start( void )
{
    int             numberOfEvents = 0;     // The number of the received events.
    int             i = 0;                  // For loop use.
    unsigned long   timerValue = 0;         // For timer fd use.
    
    struct event_record *pRecord = NULL;    // Event record.
   
    while( running ) 
    {
        // Infinite loop for the child process which will be executed in the background.
   
        numberOfEvents = epoll_wait( epfd, pEvents, epfdMax , -1 );
       
        if ( numberOfEvents <= 0 ) 
        {
            continue; 
        }
       
        for( i = 0 ; i < numberOfEvents; i++ )
        {
            if ( pEvents[i].events & EPOLLIN ) 
            {                
                pRecord = ( struct event_record * )pEvents[i].data.ptr;
                
                if( pRecord -> type == FD_TYPE_TIMER )
                {   
                    read( pRecord -> fd, &timerValue, 8 );
                }
                
                pRecord -> callback( pRecord -> fd, pRecord -> pData );               
            } 
        }
    }
    
    return 0;
}

int     fd_handler_stop( void )
{
    running = 0;
    return 0;
}

int     fd_handler_close( void )
{    
    // Close the Epoll FDSET.
    int result = close( epfd );
    
    // Reset all static variables.
    epfd = -1;
    epfdMax = 0;
    free( pEvents );
    pEvents = NULL;
    
    return result;
}

int fd_handler_add( int fd, int type, void *pData, int ( *callback )( int, void * ) )
{
    struct epoll_event      ev;
    struct event_record     *pRecord;    
  
    pRecord = malloc( sizeof( struct event_record ) ); 
    pRecord -> fd = fd;
    pRecord -> type = type;
    pRecord -> pData = pData;
    pRecord -> callback = callback;
    ev.data.ptr = pRecord;
    //ev.events = EPOLLIN | EPOLLET;
    ev.events = EPOLLIN;
    
    return epoll_ctl( epfd, EPOLL_CTL_ADD, fd, &ev ); 
}

int fd_handler_del( int fd )
{
    struct epoll_event      ev;
    
    /*
    * In kernel versions before 2.6.9, the EPOLL_CTL_DEL operation required a 
    * non-NULL pointer in event, even though this argument is ignored. Since  
    * Linux 2.6.9, event can be specified as NULL when using EPOLL_CTL_DEL. 
    * Applications that need to be portable to kernels before 2.6.9 should 
    * specify a non-NULL pointer in event.
    */
    
    return epoll_ctl( epfd, EPOLL_CTL_DEL, fd, &ev ); 
}

int fd_handler_create_timer()
{
    int                     timerfd = -1;
    
    // Create Timer fd
    timerfd = timerfd_create( CLOCK_MONOTONIC, 0 );
    
    if( timerfd == -1 )
    {
        // Error
        perror( "timerfd_create" );
        return -1;
    }
    
    return timerfd;
}

int fd_handler_set_timer_once( int timerfd, int sec, int nsec )
{
    struct itimerspec new_value;
    
    new_value.it_value.tv_sec = sec;
    new_value.it_value.tv_nsec = nsec + 10;
    new_value.it_interval.tv_sec = 0;
    new_value.it_interval.tv_nsec = 0;
    
    // 0 means relative timer.
    if ( timerfd_settime( timerfd, 0, &new_value, NULL ) == -1 )
    {
        // Error
        perror( "timerfd_settime" ); 
        return -1;  
    }
    
    return 0;
}

int fd_handler_set_timer_periodic( int timerfd, int sec, int nsec )
{
    struct itimerspec new_value;
    
    new_value.it_value.tv_sec = sec;
    new_value.it_value.tv_nsec = nsec  + 10;
    new_value.it_interval.tv_sec = sec;
    new_value.it_interval.tv_nsec = nsec + 10;
    
    // 0 means relative timer.
    if ( timerfd_settime( timerfd, 0, &new_value, NULL ) == -1 )
    {
        // Error
        perror( "timerfd_settime" ); 
        return -1;  
    }
    
    return 0;
}

int fd_handler_stop_timer( int timerfd )
{
    struct itimerspec new_value;
    
    new_value.it_value.tv_sec = 0;
    new_value.it_value.tv_nsec = 0;
    new_value.it_interval.tv_sec = 0;
    new_value.it_interval.tv_nsec = 0;
    
    // 0 means relative timer.
    if ( timerfd_settime( timerfd, 0, &new_value, NULL ) == -1 )
    {
        // Error
        perror( "timerfd_settime" ); 
        return -1;  
    }
    
    return 0;
}

int fd_handler_add_socket( int fd, void *pData, int ( *callback )( int, void * ) )
{
    return fd_handler_add( fd, FD_TYPE_SOCKET, pData, callback );
}

int fd_handler_add_timer_periodic( int fd, int sec, int nsec, void *pData, int ( *callback )( int, void * ) )
{
    if( fd_handler_set_timer_periodic( fd, sec, nsec ) == -1 )
    {
        return -1;
    }
    
    return fd_handler_add( fd, FD_TYPE_TIMER, pData, callback );
}

int fd_handler_add_timer_once( int fd, int sec, int nsec, void *pData, int ( *callback )( int, void * ) )
{
    if( fd_handler_set_timer_once( fd, sec, nsec ) == -1 )
    {
        return -1;
    }
    
    return fd_handler_add( fd, FD_TYPE_TIMER, pData, callback );
}


上面的程式碼有一段要特別提一下:那就是在收到 Timerfd 的通知後,必須要去 read 該 timerfd 的內容,因為 Linux Kernel 觸發的方式就是在該 FD 中寫入資料。

不過到頭來這份 API 並沒有被採用 ... 主要的理由是客戶平台的 Kernel 版本太舊了!!所以不支援 Timerfd 的用法,那 ... 要怎麼辦呢?最後的方法是將 Timerfd 的機制改成使用 Signal 的方式。當有 Timeout 事件發生時,Kernel 會的打出 Signal 來通知 Process。用 Signal 當作 Timeout 的機制範例如下:

#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>

#include <signal.h>
#include <time.h>

#define CLOCKID CLOCK_REALTIME
#define SIG SIGRTMIN

#define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE); \
                            } while (0)

static void  print_siginfo(siginfo_t *si)
{
    timer_t *tidp;
    int or;

    tidp = si->si_value.sival_ptr;

    printf("    sival_ptr = %p; ", si->si_value.sival_ptr);
    printf("    *sival_ptr = 0x%lx\n", (long) *tidp);

    or = timer_getoverrun(*tidp);
    
    if (or == -1)
    {
        errExit("timer_getoverrun");
    }
    else
    {
        printf("    overrun count = %d\n", or);
    }
}

static void handler(int sig, siginfo_t *si, void *uc)
{
    /* Note: calling printf() from a signal handler is not
    strictly correct, since printf() is not async-signal-safe;
    see signal(7) */

    printf("Caught signal %d\n", sig);
    print_siginfo(si);
}

int main(int argc, char *argv[])
{
    timer_t timerid;
    struct sigevent sev;
    struct itimerspec its;
    sigset_t mask;
    struct sigaction sa;

    if (argc != 2) 
    {
        fprintf(stderr, "Usage: %s <sleep-secs> \n", argv[0]);
        exit(EXIT_FAILURE);
    }

    /* Establish handler for timer signal */

    printf("Establishing handler for signal %d\n", SIG);
    sa.sa_flags = SA_SIGINFO;
    sa.sa_sigaction = handler;
    sigemptyset(&sa.sa_mask);

    if (sigaction(SIG, &sa, NULL) == -1)
        errExit("sigaction");

    /* Create the timer */

    sev.sigev_notify = SIGEV_SIGNAL;
    sev.sigev_signo = SIG;
    sev.sigev_value.sival_ptr = &timerid;

    if (timer_create(CLOCKID, &sev, &timerid) == -1)
        errExit("timer_create");

    printf("timer ID is 0x%lx\n", (long) timerid);

    /* Start the timer */

    its.it_value.tv_sec = atoi(argv[1]);
    its.it_value.tv_nsec = 0;
    its.it_interval.tv_sec = its.it_value.tv_sec;
    its.it_interval.tv_nsec = its.it_value.tv_nsec;

    if (timer_settime(timerid, 0, &its, NULL) == -1)
        errExit("timer_settime");

   
    while(1)
    {
        printf("Neokent!\n");
        sleep(1);
    }

    exit(EXIT_SUCCESS);
}

由於 Signal 有 Interrupt 的特性,所以最後整合到 Handler 裏面的時候,我不會因為收到 Signal 就立刻呼叫 Callback 函式,而是會送一個通知的信息給 localhost 的 socket,重新讓 epoll 來管理。當然會比較慢,但對 Process 來說會比較安全。那部份的程式碼就不貼了。

留言

  1. Thanks for your good tutorial.
    I found that there is no free() for event_record pointer.
    fd_handler_del() could be the right place.

    回覆刪除
    回覆
    1. Thanks for your friendly reminder.
      There should be a free function for pRecord in the function fd_handler_del().
      To solve this issue, it is necessary to save a mapping table from fd to pRecord.

      刪除

張貼留言

這個網誌中的熱門文章

如何將Linux打造成OpenFlow Switch:Openvswitch

我弟家的新居感恩禮拜分享:善頌善禱

Linux Virtual Interface: TUN/TAP