jsgf avatar jsgf committed 42e1734

Add an explicit state machine for each interface to track the
flag state changes and the various scripts. This should make
the behaviour much more controllable.

Still to do: also poll the interface flag state so that we can
detect the interface state at startup, rather than purely relying
on netlink state change events.

Comments (0)

Files changed (5)

 #include <stdlib.h>
 #include <string.h>
 #include <syslog.h>
+#include <assert.h>
+#include <time.h>
+#include <wait.h>
+#include <net/if.h>
 
 #include "netplug.h"
 
+#define INFOHASHSZ	16	/* must be a power of 2 */
+static struct if_info *if_info[INFOHASHSZ];
+
+static const char *statename(enum ifstate s)
+{
+    switch(s) {
+#define S(x)	case ST_##x: return #x
+	S(DOWN);
+	S(DOWNANDOUT);
+	S(PROBING);
+	S(PROBING_UP);
+	S(INACTIVE);
+	S(INNING);
+	S(WAIT_IN);
+	S(ACTIVE);
+	S(OUTING);
+	S(INSANE);
+#undef S
+    default: return "???";
+    }
+}
+
+static const char *flags_str(char *buf, unsigned int fl)
+{
+    static struct flag {
+	const char *name;
+	unsigned int flag;
+    } flags[] = {
+#define  F(x)	{ #x, IFF_##x }
+	F(UP),
+	F(BROADCAST),
+	F(DEBUG),
+	F(LOOPBACK),
+	F(POINTOPOINT),
+	F(NOTRAILERS),
+	F(RUNNING),
+	F(NOARP),
+	F(PROMISC),
+	F(ALLMULTI),
+	F(MASTER),
+	F(SLAVE),
+	F(MULTICAST),
+#undef F
+    };
+    char *cp = buf;
+
+    *cp = '\0';
+
+    for(int i = 0; i < sizeof(flags)/sizeof(*flags); i++) {
+	if (fl & flags[i].flag) {
+	    fl &= ~flags[i].flag;
+	    cp += sprintf(cp, "%s,", flags[i].name);
+	}
+    }
+
+    if (fl != 0)
+	cp += sprintf(cp, "%x,", fl);
+
+    if (cp != buf)
+	cp[-1] = '\0';
+
+    return buf;
+}
+
+/* if_info state machine transitions caused by interface flag changes */
+void ifsm_flagchange(struct if_info *info, unsigned int newflags)
+{
+    unsigned int changed = (info->flags ^ newflags) & (IFF_RUNNING | IFF_UP);
+
+    if (changed == 0)
+	return;
+
+    char buf1[512], buf2[512];
+    do_log(LOG_INFO, "%s: state %s flags 0x%08x %s -> 0x%08x %s", info->name,
+	   statename(info->state),
+	   info->flags, flags_str(buf1, info->flags),
+           newflags, flags_str(buf2, newflags));
+
+    /* XXX put interface state-change rate limiting here */
+    if (0 /* flapping */) {
+	info->state = ST_INSANE;
+    }
+
+    if (changed & IFF_UP) {
+	if (newflags & IFF_UP) {
+	    switch(info->state) {
+	    case ST_DOWN:
+		info->state = ST_INACTIVE;
+		break;
+
+	    case ST_PROBING:
+		info->state = ST_PROBING_UP;
+		break;
+
+	    default:
+		do_log(LOG_ERR, "%s: unexpected state %s for UP", info->name, statename(info->state));
+		exit(1);
+	    }
+	} else {
+	    /* interface went down */
+	    switch(info->state) {
+	    case ST_OUTING:
+		/* went down during an OUT script - OK */
+		info->state = ST_DOWNANDOUT;
+		break;
+
+	    case ST_DOWN:
+		/* already down */
+		break;
+
+	    default:
+		/* All other states: kill off any scripts currently
+		   running, and go into the PROBING state, attempting
+		   to bring it up */
+		kill_script(info->worker);
+		info->state = ST_PROBING;
+
+		assert(info->worker == -1);
+		info->worker = run_netplug_bg(info->name, "probe");	    
+	    }
+	}
+    }
+
+    /* XXX hack - kick start things by accouting for the initial
+       probe */
+    if (info->state == ST_DOWN && (newflags & IFF_UP))
+	info->state = ST_INACTIVE;
+
+    if (changed & IFF_RUNNING) {
+	switch(info->state) {
+	case ST_INACTIVE:
+	    assert(!(info->flags & IFF_RUNNING));
+	    assert(info->worker == -1);
+
+	    info->worker = run_netplug_bg(info->name, "in");
+	    info->state = ST_INNING;
+	    break;
+
+	case ST_INNING:
+	    assert(info->flags & IFF_RUNNING);
+	    info->state = ST_WAIT_IN;
+	    break;
+
+	case ST_WAIT_IN:
+	    /* unaffected by interface flag changing */
+	    break;
+
+	case ST_ACTIVE:
+	    assert(info->flags & IFF_RUNNING);	    
+	    assert(info->worker == -1);
+
+	    info->worker = run_netplug_bg(info->name, "out");
+	    info->state = ST_OUTING;
+	    break;
+
+	case ST_OUTING:
+	    /* always go to INACTIVE regardless of flag state */
+	    break;
+
+	case ST_PROBING:
+	case ST_PROBING_UP:
+	    /* ignore running state */
+	    break;
+
+	case ST_INSANE:
+	    /* stay insane until there's been quiet for a while, then
+	       down interface and switch to ST_DOWN */
+	    break;
+
+	case ST_DOWN:
+	case ST_DOWNANDOUT:
+	    /* badness: somehow interface became RUNNING without being
+	       UP - ignore it */
+	    break;
+	}
+    }
+
+    do_log(LOG_INFO, "%s: moved to state %s; worker %d", 
+	   info->name, statename(info->state), info->worker);
+    info->flags = newflags;
+    info->lastchange = time(0);
+}
+
+/* handle a script termination and update the state accordingly */
+void ifsm_scriptdone(pid_t pid, int exitstatus)
+{
+    int exitok = WIFEXITED(exitstatus) && WEXITSTATUS(exitstatus) == 0;
+    struct if_info *info;
+    assert(WIFEXITED(exitstatus) || WIFSIGNALED(exitstatus));
+
+    for(int i = 0; i < INFOHASHSZ; i++) {
+	for(info = if_info[i]; info != NULL; info = info->next)
+	    if (info->worker == pid)
+		break;
+	if (info != NULL)
+	    break;
+    }
+
+    if (info == NULL) {
+	do_log(LOG_INFO, "Unexpected child %d exited with status %d",
+	       pid, exitstatus);
+	return;
+    }
+
+    do_log(LOG_INFO, "%s: state %s  pid %d exited status %d",
+	   info->name, statename(info->state), pid, exitstatus);
+
+    info->worker = -1;
+
+    switch(info->state) {
+    case ST_PROBING:
+	/* XXX should we expect interface to be up by now?  No, not
+	   necessarily: netlink is not synchronized with process
+	   exit. */
+	if (exitok)
+	    info->state = ST_INACTIVE;
+	else {
+	    info->state = ST_DOWN;
+            do_log(LOG_WARNING, "Could not bring %s back up", info->name);
+	}
+	break;
+
+    case ST_PROBING_UP:
+	/* regardless of script's exit status, the interface is
+	   actually up now, so just make it inactive */
+	info->state = ST_INACTIVE;
+	break;
+
+    case ST_DOWNANDOUT:
+	/* we were just waiting for the out script to finish - start a
+	   probe script for this interface */
+	info->state = ST_PROBING;
+	assert(info->worker == -1);
+	info->worker = run_netplug_bg(info->name, "probe");
+	break;
+
+    case ST_INNING:
+	if (exitok)
+	    info->state = ST_ACTIVE;
+	else
+	    info->state = ST_INSANE; /* ??? */
+	break;
+
+    case ST_OUTING:
+	/* What if !exitok?  What if interface is still active? ->ST_INSANE? */
+	info->state = ST_INACTIVE;
+	break;
+
+    case ST_WAIT_IN:
+	assert(info->worker == -1);
+	
+	info->worker = run_netplug_bg(info->name, "out");
+	info->state = ST_OUTING;
+	break;
+
+    case ST_INACTIVE:
+    case ST_ACTIVE:
+    case ST_INSANE:
+    case ST_DOWN:
+	do_log(LOG_ERR, "ifsm_scriptdone: %s: bad state %s for script termination", 
+	       info->name, statename(info->state));
+	exit(1);
+    }
+
+    do_log(LOG_INFO, "%s: moved to state %s", info->name, statename(info->state));
+}
 
 void
 parse_rtattrs(struct rtattr *tb[], int max, struct rtattr *rta, int len)
     }
 }
 
-
-static struct if_info *if_info[16];
-
-
 int if_info_save_interface(struct nlmsghdr *hdr, void *arg)
 {
     struct rtattr *attrs[IFLA_MAX + 1];
         return NULL;
     }
 
-    int x = info->ifi_index & 0xf;
+    int x = info->ifi_index & (INFOHASHSZ-1);
     struct if_info *i, **ip;
 
     for (ip = &if_info[x]; (i = *ip) != NULL; ip = &i->next) {
         i->next = *ip;
         i->index = info->ifi_index;
         *ip = i;
+
+	/* initialize state machine fields */
+	i->state = ST_DOWN;
+	i->lastchange = 0;
+	i->worker = -1;
     }
     return i;
 }
 #include <sys/wait.h>
 #include <syslog.h>
 #include <unistd.h>
+#include <assert.h>
 
 #include "netplug.h"
 
         return pid;
     }
 
-    do_log(LOG_INFO, "%s %s %s", NP_SCRIPT, ifname, action);
+    setpgrp();			/* become group leader */
+
+    do_log(LOG_INFO, "%s %s %s -> %d", NP_SCRIPT, ifname, action, getpid());
 
     execl(NP_SCRIPT, NP_SCRIPT, ifname, action, NULL);
 
     return WIFEXITED(status) ? WEXITSTATUS(status) : -WTERMSIG(status);
 }
 
+/* 
+   Synchronously kill a script
+
+   Assumes the pid is actually a leader of a group.  Kills first with
+   SIGTERM at first; if that doesn't work, follow up with a SIGKILL.
+ */
+void
+kill_script(pid_t pid)
+{
+    pid_t ret;
+    int status;
+    sigset_t mask, origmask;
+
+    if (pid == -1)
+	return;
+
+    assert(pid > 0);
+    
+    /* Block SIGCHLD while we go around killing things, so the SIGCHLD
+       handler doesn't steal things behind our back. */
+    sigemptyset(&mask);
+    sigaddset(&mask, SIGCHLD);
+    sigprocmask(SIG_BLOCK, &mask, &origmask);
+
+    /* ask nicely */
+    if (killpg(pid, SIGTERM) == -1) {
+	do_log(LOG_ERR, "Can't kill script pgrp %d: %m", pid);
+	goto done;
+    }
+
+    sleep(1);
+
+    ret = waitpid(pid, &status, WNOHANG);
+
+    if (ret == -1) {
+	do_log(LOG_ERR, "Failed to wait for %d: %m?!", pid);
+	goto done;
+    } else if (ret == 0) {
+	/* no more Mr. nice guy */
+	if (killpg(pid, SIGKILL) == -1) {
+	    do_log(LOG_ERR, "2nd kill %d failed: %m?!", pid);
+	    goto done;
+	}
+	ret = waitpid(pid, &status, 0);
+    } 
+
+    assert(ret == pid);
+
+ done:
+    sigprocmask(SIG_SETMASK, &origmask, NULL);
+}
 
 void *
 xmalloc(size_t n)
 #include <stdlib.h>
 #include <syslog.h>
 #include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <assert.h>
+#include <wait.h>
+#include <errno.h>
 
 #include "netplug.h"
 
 int use_syslog;
 
 
-#define flag_was_set(flag) \
-        (!(i->flags & (flag)) && (info->ifi_flags & (flag)))
-#define flag_was_unset(flag) \
-        ((i->flags & (flag)) && !(info->ifi_flags & (flag)))
-
-static const char *flags_str(char *buf, unsigned int fl)
-{
-    static struct flag {
-	const char *name;
-	unsigned int flag;
-    } flags[] = {
-#define  F(x)	{ #x, IFF_##x }
-	F(UP),
-	F(BROADCAST),
-	F(DEBUG),
-	F(LOOPBACK),
-	F(POINTOPOINT),
-	F(NOTRAILERS),
-	F(RUNNING),
-	F(NOARP),
-	F(PROMISC),
-	F(ALLMULTI),
-	F(MASTER),
-	F(SLAVE),
-	F(MULTICAST),
-#undef F
-    };
-    char *cp = buf;
-
-    *cp = '\0';
-
-    for(int i = 0; i < sizeof(flags)/sizeof(*flags); i++) {
-	if (fl & flags[i].flag) {
-	    fl &= ~flags[i].flag;
-	    cp += sprintf(cp, "%s,", flags[i].name);
-	}
-    }
-
-    if (fl != 0)
-	cp += sprintf(cp, "%x,", fl);
-
-    if (cp != buf)
-	cp[-1] = '\0';
-
-    return buf;
-}
-
-
 static int
 handle_interface(struct nlmsghdr *hdr, void *arg)
 {
     char *name = RTA_DATA(attrs[IFLA_IFNAME]);
 
     if (!if_match(name)) {
-    char *name = RTA_DATA(attrs[IFLA_IFNAME]);
+	char *name = RTA_DATA(attrs[IFLA_IFNAME]);
 
-    if (!if_match(name)) {
-        do_log(LOG_INFO, "%s: ignoring event", name);
-        return 0;
+	if (!if_match(name)) {
+	    do_log(LOG_INFO, "%s: ignoring event", name);
+	    return 0;
+	}
     }
 
-    }
+    struct if_info *i = if_info_get_interface(hdr, attrs);
 
-    struct if_info *i;
+    if (i == NULL)
+        return 0;
 
-    if ((i = if_info_get_interface(hdr, attrs)) == NULL) {
-        return 0;
-    }
+    ifsm_flagchange(i, info->ifi_flags);
 
-    if (i->flags == info->ifi_flags) {
-        goto done;
-    }
-
-    char buf1[512], buf2[512];
-    do_log(LOG_INFO, "%s: flags 0x%08x %s -> 0x%08x %s", name,
-	   i->flags, flags_str(buf1, i->flags),
-           info->ifi_flags, flags_str(buf2, info->ifi_flags));
-
-    if (flag_was_set(IFF_RUNNING)) {
-        run_netplug_bg(name, "in");
-    }
-    if (flag_was_unset(IFF_RUNNING)) {
-        run_netplug_bg(name, "out");
-    }
-    if (flag_was_unset(IFF_UP)) {
-        if (try_probe(name) == 0) {
-            do_log(LOG_WARNING, "Could not bring %s back up", name);
-        }
-    }
-
- done:
     if_info_update_interface(hdr, attrs);
 
     return 0;
     fclose(fp);
 }
 
+struct child_exit
+{
+    pid_t	pid;
+    int		status;
+};
+
+static int child_handler_pipe[2];
+
+static void
+child_handler(int sig, siginfo_t *info, void *v)
+{
+    struct child_exit ce;
+    int ret;
+
+    assert(sig == SIGCHLD);
+    
+    ce.pid = info->si_pid;
+    ret = waitpid(info->si_pid, &ce.status, 0);
+    if (ret == info->si_pid)
+	write(child_handler_pipe[1], &ce, sizeof(ce));
+}
 
 int
 main(int argc, char *argv[])
         }
     }
 
+    if (pipe(child_handler_pipe) == -1) {
+	do_log(LOG_ERR, "can't create pipe: %m");
+	exit(1);
+    }
+
+    struct sigaction sa;
+    sa.sa_sigaction = child_handler;
+    sa.sa_flags = SA_RESTART | SA_SIGINFO;
+    sigfillset(&sa.sa_mask);
+
+    if (sigaction(SIGCHLD, &sa, NULL) == -1) {
+	do_log(LOG_ERR, "can't set SIGCHLD handler: %m");
+	exit(1);
+    }
+
     int fd = netlink_open();
 
     netlink_request_dump(fd);
     netlink_receive_dump(fd, if_info_save_interface, NULL);
 
-    netlink_listen(fd, handle_interface, NULL);
+    if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
+	do_log(LOG_ERR, "can't set socket non-blocking: %m");
+	exit(1);
+    }
 
-    return fd ? 0 : 0;
+    if (fcntl(child_handler_pipe[0], F_SETFL, O_NONBLOCK) == -1) {
+	do_log(LOG_ERR, "can't set pipe non-blocking: %m");
+	exit(1);
+    }
+    
+    struct pollfd fds[] = {
+	{ fd, POLLIN, 0 },
+	{ child_handler_pipe[0], POLLIN, 0 },
+    };
+
+    for(;;) {
+	int ret = poll(fds, sizeof(fds)/sizeof(fds[0]), -1);
+
+	if (ret == -1) {
+	    if (errno == EINTR)
+		continue;
+	    do_log(LOG_ERR, "poll failed: %m");
+	    exit(1);
+	}
+	if (ret == 0)
+	    continue;		/* XXX??? */
+
+	if (fds[0].revents & POLLIN) {
+	    /* interface flag state change */
+	    if (netlink_listen(fd, handle_interface, NULL) == 0)
+		break;		/* done */
+	}
+
+	if (fds[1].revents & POLLIN) {
+	    /* netplug script finished */
+	    int ret;
+	    struct child_exit ce;
+		
+	    do {
+		ret = read(child_handler_pipe[0], &ce, sizeof(ce));
+
+		assert(ret == 0 || ret == -1 || ret == sizeof(ce));
+		
+		if (ret == sizeof(ce))
+		    ifsm_scriptdone(ce.pid, ce.status);
+		else if (ret == -1 && errno != EAGAIN) {
+		    do_log(LOG_ERR, "pipe read failed: %m");
+		    exit(1);
+		}
+	    } while(ret == sizeof(ce));
+	}
+    }
+
+    return 0;
 }
 
 
 }
 
 
-void
+int
 netlink_listen(int fd, netlink_callback callback, void *arg)
 {
     char   buf[8192];
         if (status == -1) {
             if (errno == EINTR)
                 continue;
+	    if (errno == EAGAIN)
+		return 1;
+
             do_log(LOG_ERR, "OVERRUN: %m");
             continue;
         }
 
                 if ((err = callback(hdr, arg)) == -1) {
                     do_log(LOG_ERR, "Callback failed");
-                    return;
+                    return 0;
                 }
             }
 
 int netlink_open(void);
 void netlink_request_dump(int fd);
 void netlink_receive_dump(int fd, netlink_callback callback, void *arg);
-void netlink_listen(int fd, netlink_callback callback, void *arg);
+int  netlink_listen(int fd, netlink_callback callback, void *arg);
 
 
 /* network interface info management */
     int addr_len;
     unsigned char addr[8];
     char name[16];
+
+    enum ifstate {
+	ST_DOWN,		/* uninitialized */
+	ST_DOWNANDOUT,		/* went down while running out script */
+	ST_PROBING,		/* running probe script */
+	ST_PROBING_UP,		/* running probe, and interface went UP */
+	ST_INACTIVE,		/* interface inactive */
+	ST_INNING,		/* plugin script is running */
+	ST_WAIT_IN,		/* wait until plugin script is done */
+	ST_ACTIVE,		/* interface active */
+	ST_OUTING,		/* plugout script is running */
+	ST_INSANE,		/* interface seems to be flapping */
+    }		state;
+
+    pid_t	worker;		/* pid of current in/out script */
+    time_t	lastchange;	/* timestamp of last state change */
 };
 
 struct if_info *if_info_get_interface(struct nlmsghdr *hdr,
 int if_info_save_interface(struct nlmsghdr *hdr, void *arg);
 void parse_rtattrs(struct rtattr *tb[], int max, struct rtattr *rta, int len);
 
+void ifsm_flagchange(struct if_info *info, unsigned int newflags);
+void ifsm_scriptdone(pid_t pid, int exitstatus);
 
 /* utilities */
 
     __attribute__ ((format (printf, 2, 3)));
 pid_t run_netplug_bg(char *ifname, char *action);
 int run_netplug(char *ifname, char *action);
+void kill_script(pid_t pid);
 void *xmalloc(size_t n);
 
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.