Commits

Bryan O'Sullivan committed b08cd5e

Merge

  • Participants
  • Parent commits e3f9dd1
  • Tags v1.0.1

Comments (0)

Files changed (6)

docs/StateMachine.sxd

Binary file modified.
 #include <stdlib.h>
 #include <string.h>
 #include <syslog.h>
+#include <assert.h>
+#include <time.h>
+#include <wait.h>
+#include <net/if.h>
 
 #include "netplug.h"
 
+#define INFOHASHSZ	16	/* must be a power of 2 */
+static struct if_info *if_info[INFOHASHSZ];
+
+static const char *
+statename(enum ifstate s)
+{
+    switch(s) {
+#define S(x)	case ST_##x: return #x
+	S(DOWN);
+	S(DOWNANDOUT);
+	S(PROBING);
+	S(PROBING_UP);
+	S(INACTIVE);
+	S(INNING);
+	S(WAIT_IN);
+	S(ACTIVE);
+	S(OUTING);
+	S(INSANE);
+#undef S
+    default: return "???";
+    }
+}
+
+static const char *
+flags_str(char *buf, unsigned int fl)
+{
+    static struct flag {
+	const char *name;
+	unsigned int flag;
+    } flags[] = {
+#define  F(x)	{ #x, IFF_##x }
+	F(UP),
+	F(BROADCAST),
+	F(DEBUG),
+	F(LOOPBACK),
+	F(POINTOPOINT),
+	F(NOTRAILERS),
+	F(RUNNING),
+	F(NOARP),
+	F(PROMISC),
+	F(ALLMULTI),
+	F(MASTER),
+	F(SLAVE),
+	F(MULTICAST),
+#undef F
+    };
+    char *cp = buf;
+
+    *cp = '\0';
+
+    for(int i = 0; i < sizeof(flags)/sizeof(*flags); i++) {
+	if (fl & flags[i].flag) {
+	    fl &= ~flags[i].flag;
+	    cp += sprintf(cp, "%s,", flags[i].name);
+	}
+    }
+
+    if (fl != 0)
+	cp += sprintf(cp, "%x,", fl);
+
+    if (cp != buf)
+	cp[-1] = '\0';
+
+    return buf;
+}
+
+void
+for_each_iface(int (*func)(struct if_info *))
+{
+    for(int i = 0; i < INFOHASHSZ; i++) {
+	for(struct if_info *info = if_info[i]; info != NULL; info = info->next) {
+	    if ((*func)(info))
+		return;
+	}
+    }
+}
+
+/* Reevaluate the state machine based on the current state and flag settings */
+void 
+ifsm_flagpoll(struct if_info *info)
+{
+    enum ifstate state = info->state;
+
+    switch(info->state) {
+    case ST_DOWN:
+	if ((info->flags & (IFF_UP|IFF_RUNNING)) == 0)
+	    break;
+	/* FALLTHROUGH */
+    case ST_INACTIVE:
+	if (!(info->flags & IFF_UP)) {
+	    assert(info->worker == -1);
+	    info->worker = run_netplug_bg(info->name, "probe");
+	    info->state = ST_PROBING;
+	}
+	if (info->flags & IFF_RUNNING) {
+	    assert(info->worker == -1);
+	    info->worker = run_netplug_bg(info->name, "in");
+	    info->state = ST_INNING;
+	}
+	break;
+
+    case ST_PROBING:
+    case ST_PROBING_UP:
+    case ST_WAIT_IN:
+    case ST_DOWNANDOUT:
+	break;
+
+    case ST_INNING:
+	if (!(info->flags & IFF_RUNNING))
+	    info->state = ST_WAIT_IN;
+	break;
+
+    case ST_ACTIVE:
+	if (!(info->flags & IFF_RUNNING)) {
+	    assert(info->worker == -1);
+	    info->worker = run_netplug_bg(info->name, "out");
+	    info->state = ST_OUTING;
+	}
+	break;
+
+    case ST_OUTING:
+	if (!(info->flags & IFF_UP))
+	    info->state = ST_DOWNANDOUT;
+
+    case ST_INSANE:
+	break;
+    }
+
+    if (info->state != state)
+	do_log(LOG_DEBUG, "ifsm_flagpoll %s: moved from state %s to %s", 
+	       info->name, statename(state), statename(info->state));
+}
+
+/* if_info state machine transitions caused by interface flag changes (edge triggered) */
+void
+ifsm_flagchange(struct if_info *info, unsigned int newflags)
+{
+    unsigned int changed = (info->flags ^ newflags) & (IFF_RUNNING | IFF_UP);
+
+    if (changed == 0)
+	return;
+
+    char buf1[512], buf2[512];
+    do_log(LOG_INFO, "%s: state %s flags 0x%08x %s -> 0x%08x %s", info->name,
+	   statename(info->state),
+	   info->flags, flags_str(buf1, info->flags),
+           newflags, flags_str(buf2, newflags));
+
+    /* XXX put interface state-change rate limiting here */
+    if (0 /* flapping */) {
+	info->state = ST_INSANE;
+    }
+
+    if (changed & IFF_UP) {
+	if (newflags & IFF_UP) {
+	    switch(info->state) {
+	    case ST_DOWN:
+		info->state = ST_INACTIVE;
+		break;
+
+	    case ST_PROBING:
+		info->state = ST_PROBING_UP;
+		break;
+
+	    default:
+		do_log(LOG_ERR, "%s: unexpected state %s for UP", info->name, statename(info->state));
+		exit(1);
+	    }
+	} else {
+	    /* interface went down */
+	    switch(info->state) {
+	    case ST_OUTING:
+		/* went down during an OUT script - OK */
+		info->state = ST_DOWNANDOUT;
+		break;
+
+	    case ST_DOWN:
+		/* already down */
+		break;
+
+	    default:
+		/* All other states: kill off any scripts currently
+		   running, and go into the PROBING state, attempting
+		   to bring it up */
+		kill_script(info->worker);
+		info->state = ST_PROBING;
+
+		assert(info->worker == -1);
+		info->worker = run_netplug_bg(info->name, "probe");	    
+	    }
+	}
+    }
+
+    if (changed & IFF_RUNNING) {
+	switch(info->state) {
+	case ST_INACTIVE:
+	    assert(!(info->flags & IFF_RUNNING));
+	    assert(info->worker == -1);
+
+	    info->worker = run_netplug_bg(info->name, "in");
+	    info->state = ST_INNING;
+	    break;
+
+	case ST_INNING:
+	    assert(info->flags & IFF_RUNNING);
+	    info->state = ST_WAIT_IN;
+	    break;
+
+	case ST_WAIT_IN:
+	    /* unaffected by interface flag changing */
+	    break;
+
+	case ST_ACTIVE:
+	    assert(info->flags & IFF_RUNNING);	    
+	    assert(info->worker == -1);
+
+	    info->worker = run_netplug_bg(info->name, "out");
+	    info->state = ST_OUTING;
+	    break;
+
+	case ST_OUTING:
+	    /* always go to INACTIVE regardless of flag state */
+	    break;
+
+	case ST_PROBING:
+	case ST_PROBING_UP:
+	    /* ignore running state */
+	    break;
+
+	case ST_INSANE:
+	    /* stay insane until there's been quiet for a while, then
+	       down interface and switch to ST_DOWN */
+	    break;
+
+	case ST_DOWN:
+	case ST_DOWNANDOUT:
+	    /* badness: somehow interface became RUNNING without being
+	       UP - ignore it */
+	    break;
+	}
+    }
+
+    do_log(LOG_DEBUG, "%s: moved to state %s; worker %d", 
+	   info->name, statename(info->state), info->worker);
+    info->flags = newflags;
+    info->lastchange = time(0);
+}
+
+/* handle a script termination and update the state accordingly */
+void ifsm_scriptdone(pid_t pid, int exitstatus)
+{
+    int exitok = WIFEXITED(exitstatus) && WEXITSTATUS(exitstatus) == 0;
+    struct if_info *info;
+    assert(WIFEXITED(exitstatus) || WIFSIGNALED(exitstatus));
+
+    int find_pid(struct if_info *i) {
+	if (i->worker == pid) {
+	    info = i;
+	    return 1;
+	}
+	return 0;
+    }
+
+    info = NULL;
+    for_each_iface(find_pid);
+
+    if (info == NULL) {
+	do_log(LOG_INFO, "Unexpected child %d exited with status %d",
+	       pid, exitstatus);
+	return;
+    }
+
+    do_log(LOG_INFO, "%s: state %s pid %d exited status %d",
+	   info->name, statename(info->state), pid, exitstatus);
+
+    info->worker = -1;
+
+    switch(info->state) {
+    case ST_PROBING:
+	/* If we're still in PROBING state, then it means that the
+	   interface flags have not come up, even though the script
+	   finished.  Go back to DOWN and wait for the UP flag
+	   setting. */
+	if (!exitok)
+            do_log(LOG_WARNING, "Could not bring %s back up", info->name);
+
+	info->state = ST_DOWN;
+	break;
+
+    case ST_PROBING_UP:
+	/* regardless of script's exit status, the interface is
+	   actually up now, so just make it inactive */
+	info->state = ST_INACTIVE;
+	break;
+
+    case ST_DOWNANDOUT:
+	/* we were just waiting for the out script to finish - start a
+	   probe script for this interface */
+	info->state = ST_PROBING;
+	assert(info->worker == -1);
+	info->worker = run_netplug_bg(info->name, "probe");
+	break;
+
+    case ST_INNING:
+	if (exitok)
+	    info->state = ST_ACTIVE;
+	else
+	    info->state = ST_INSANE; /* ??? */
+	break;
+
+    case ST_OUTING:
+	/* What if !exitok?  What if interface is still active? ->ST_INSANE? */
+	info->state = ST_INACTIVE;
+	break;
+
+    case ST_WAIT_IN:
+	assert(info->worker == -1);
+	
+	info->worker = run_netplug_bg(info->name, "out");
+	info->state = ST_OUTING;
+	break;
+
+    case ST_INACTIVE:
+    case ST_ACTIVE:
+    case ST_INSANE:
+    case ST_DOWN:
+	do_log(LOG_ERR, "ifsm_scriptdone: %s: bad state %s for script termination", 
+	       info->name, statename(info->state));
+	exit(1);
+    }
+
+    do_log(LOG_DEBUG, "%s: moved to state %s", info->name, statename(info->state));
+}
 
 void
 parse_rtattrs(struct rtattr *tb[], int max, struct rtattr *rta, int len)
     }
 }
 
-
-static struct if_info *if_info[16];
-
-
 int if_info_save_interface(struct nlmsghdr *hdr, void *arg)
 {
     struct rtattr *attrs[IFLA_MAX + 1];
         return NULL;
     }
 
-    int x = info->ifi_index & 0xf;
+    int x = info->ifi_index & (INFOHASHSZ-1);
     struct if_info *i, **ip;
 
     for (ip = &if_info[x]; (i = *ip) != NULL; ip = &i->next) {
         i->next = *ip;
         i->index = info->ifi_index;
         *ip = i;
+
+	/* initialize state machine fields */
+	i->state = ST_DOWN;
+	i->lastchange = 0;
+	i->worker = -1;
     }
     return i;
 }
 #include <sys/wait.h>
 #include <syslog.h>
 #include <unistd.h>
+#include <assert.h>
 
 #include "netplug.h"
 
     va_list ap;
     va_start(ap, fmt);
 
+    if (pri == LOG_DEBUG && !debug)
+	return;
+
     if (use_syslog) {
         vsyslog(pri, fmt, ap);
     } else {
         return pid;
     }
 
-    do_log(LOG_INFO, "%s %s %s", NP_SCRIPT, ifname, action);
+    setpgrp();			/* become group leader */
+
+    do_log(LOG_INFO, "%s %s %s -> %d", NP_SCRIPT, ifname, action, getpid());
 
     execl(NP_SCRIPT, NP_SCRIPT, ifname, action, NULL);
 
     return WIFEXITED(status) ? WEXITSTATUS(status) : -WTERMSIG(status);
 }
 
+/* 
+   Synchronously kill a script
+
+   Assumes the pid is actually a leader of a group.  Kills first with
+   SIGTERM at first; if that doesn't work, follow up with a SIGKILL.
+ */
+void
+kill_script(pid_t pid)
+{
+    pid_t ret;
+    int status;
+    sigset_t mask, origmask;
+
+    if (pid == -1)
+	return;
+
+    assert(pid > 0);
+    
+    /* Block SIGCHLD while we go around killing things, so the SIGCHLD
+       handler doesn't steal things behind our back. */
+    sigemptyset(&mask);
+    sigaddset(&mask, SIGCHLD);
+    sigprocmask(SIG_BLOCK, &mask, &origmask);
+
+    /* ask nicely */
+    if (killpg(pid, SIGTERM) == -1) {
+	do_log(LOG_ERR, "Can't kill script pgrp %d: %m", pid);
+	goto done;
+    }
+
+    sleep(1);
+
+    ret = waitpid(pid, &status, WNOHANG);
+
+    if (ret == -1) {
+	do_log(LOG_ERR, "Failed to wait for %d: %m?!", pid);
+	goto done;
+    } else if (ret == 0) {
+	/* no more Mr. nice guy */
+	if (killpg(pid, SIGKILL) == -1) {
+	    do_log(LOG_ERR, "2nd kill %d failed: %m?!", pid);
+	    goto done;
+	}
+	ret = waitpid(pid, &status, 0);
+    } 
+
+    assert(ret == pid);
+
+ done:
+    sigprocmask(SIG_SETMASK, &origmask, NULL);
+}
 
 void *
 xmalloc(size_t n)
 
 #define _GNU_SOURCE
 #include <net/if.h>
+#include <netinet/in.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <syslog.h>
 #include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <assert.h>
+#include <wait.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/ioctl.h>
 
 #include "netplug.h"
 
 int use_syslog;
 
 
-#define flag_was_set(flag) \
-        (!(i->flags & (flag)) && (info->ifi_flags & (flag)))
-#define flag_was_unset(flag) \
-        ((i->flags & (flag)) && !(info->ifi_flags & (flag)))
-
 static int
 handle_interface(struct nlmsghdr *hdr, void *arg)
 {
     char *name = RTA_DATA(attrs[IFLA_IFNAME]);
 
     if (!if_match(name)) {
-    char *name = RTA_DATA(attrs[IFLA_IFNAME]);
-
-    if (!if_match(name)) {
-        do_log(LOG_INFO, "%s: ignoring event", name);
-        return 0;
+	do_log(LOG_INFO, "%s: ignoring event", name);
+	return 0;
     }
 
-    }
+    struct if_info *i = if_info_get_interface(hdr, attrs);
 
-    struct if_info *i;
+    if (i == NULL)
+        return 0;
 
-    if ((i = if_info_get_interface(hdr, attrs)) == NULL) {
-        return 0;
-    }
+    ifsm_flagchange(i, info->ifi_flags);
 
-    if (i->flags == info->ifi_flags) {
-        goto done;
-    }
-
-    do_log(LOG_INFO, "%s: flags 0x%08x -> 0x%08x", name, i->flags,
-           info->ifi_flags);
-
-    if (flag_was_set(IFF_RUNNING)) {
-        run_netplug_bg(name, "in");
-    }
-    if (flag_was_unset(IFF_RUNNING)) {
-        run_netplug_bg(name, "out");
-    }
-    if (flag_was_unset(IFF_UP)) {
-        if (try_probe(name) == 0) {
-            do_log(LOG_WARNING, "Could not bring %s back up", name);
-        }
-    }
-
- done:
     if_info_update_interface(hdr, attrs);
 
     return 0;
     fclose(fp);
 }
 
+struct child_exit
+{
+    pid_t	pid;
+    int		status;
+};
+
+static int child_handler_pipe[2];
+
+static void
+child_handler(int sig, siginfo_t *info, void *v)
+{
+    struct child_exit ce;
+    int ret;
+
+    assert(sig == SIGCHLD);
+    
+    ce.pid = info->si_pid;
+    ret = waitpid(info->si_pid, &ce.status, 0);
+    if (ret == info->si_pid)
+	write(child_handler_pipe[1], &ce, sizeof(ce));
+}
+
+/* Poll the existing interface state, so we can catch any state
+   changes for which we may not have neen a netlink message. */
+static void
+poll_interfaces(void)
+{
+    static int sockfd = -1;
+
+    if (sockfd == -1) {
+	sockfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
+	if (sockfd == -1) {
+	    do_log(LOG_ERR, "can't create interface socket: %m");
+	    exit(1);
+	}
+    }
+
+    int pollflags(struct if_info *info) {
+	struct ifreq ifr;
+	
+	if (!if_match(info->name))
+	    return 0;
+
+	memcpy(ifr.ifr_name, info->name, sizeof(ifr.ifr_name));
+	if (ioctl(sockfd, SIOCGIFFLAGS, &ifr) < 0)
+	    do_log(LOG_ERR, "%s: can't get flags: %m", info->name);
+	else {
+	    ifsm_flagchange(info, ifr.ifr_flags);
+	    ifsm_flagpoll(info);
+	}
+
+	return 0;
+    }
+
+    for_each_iface(pollflags);
+}
+
+int debug = 0;
 
 int
 main(int argc, char *argv[])
     int probe = 1;
     int c;
 
-    while ((c = getopt(argc, argv, "FPc:hi:p:")) != EOF) {
+    while ((c = getopt(argc, argv, "DFPc:hi:p:")) != EOF) {
         switch (c) {
+	case 'D':
+	    debug = 1;
+	    break;
         case 'F':
             foreground = 1;
             break;
         }
     }
 
+    if (pipe(child_handler_pipe) == -1) {
+	do_log(LOG_ERR, "can't create pipe: %m");
+	exit(1);
+    }
+
+    if (fcntl(child_handler_pipe[0], F_SETFL, O_NONBLOCK) == -1) {
+	do_log(LOG_ERR, "can't set pipe non-blocking: %m");
+	exit(1);
+    }
+    
+    struct sigaction sa;
+    sa.sa_sigaction = child_handler;
+    sa.sa_flags = SA_RESTART | SA_SIGINFO;
+    sigfillset(&sa.sa_mask);
+
+    if (sigaction(SIGCHLD, &sa, NULL) == -1) {
+	do_log(LOG_ERR, "can't set SIGCHLD handler: %m");
+	exit(1);
+    }
+
     int fd = netlink_open();
 
     netlink_request_dump(fd);
     netlink_receive_dump(fd, if_info_save_interface, NULL);
 
-    netlink_listen(fd, handle_interface, NULL);
+    if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
+	do_log(LOG_ERR, "can't set socket non-blocking: %m");
+	exit(1);
+    }
 
-    return fd ? 0 : 0;
+    struct pollfd fds[] = {
+	{ fd, POLLIN, 0 },
+	{ child_handler_pipe[0], POLLIN, 0 },
+    };
+
+    {
+	/* Run over each of the interfaces we know and care about, and
+	   make sure the state machine has done the appropriate thing
+	   for their current state. */
+	int poll_flags(struct if_info *i) {
+	    if (if_match(i->name))
+		ifsm_flagpoll(i);
+	    return 0;
+	}
+	for_each_iface(poll_flags);
+    }
+
+    for(;;) {
+	int ret;
+
+	/* Make sure we don't miss anything interesting */
+	poll_interfaces();
+
+	ret = poll(fds, sizeof(fds)/sizeof(fds[0]), -1);
+
+	if (ret == -1) {
+	    if (errno == EINTR)
+		continue;
+	    do_log(LOG_ERR, "poll failed: %m");
+	    exit(1);
+	}
+	if (ret == 0)
+	    continue;		/* XXX??? */
+
+	if (fds[0].revents & POLLIN) {
+	    /* interface flag state change */
+	    if (netlink_listen(fd, handle_interface, NULL) == 0)
+		break;		/* done */
+	}
+
+	if (fds[1].revents & POLLIN) {
+	    /* netplug script finished */
+	    int ret;
+	    struct child_exit ce;
+		
+	    do {
+		ret = read(child_handler_pipe[0], &ce, sizeof(ce));
+
+		assert(ret == 0 || ret == -1 || ret == sizeof(ce));
+		
+		if (ret == sizeof(ce))
+		    ifsm_scriptdone(ce.pid, ce.status);
+		else if (ret == -1 && errno != EAGAIN) {
+		    do_log(LOG_ERR, "pipe read failed: %m");
+		    exit(1);
+		}
+	    } while(ret == sizeof(ce));
+	}
+    }
+
+    return 0;
 }
 
 
 }
 
 
-void
+int
 netlink_listen(int fd, netlink_callback callback, void *arg)
 {
     char   buf[8192];
         if (status == -1) {
             if (errno == EINTR)
                 continue;
+	    if (errno == EAGAIN)
+		return 1;
+
             do_log(LOG_ERR, "OVERRUN: %m");
             continue;
         }
 
                 if ((err = callback(hdr, arg)) == -1) {
                     do_log(LOG_ERR, "Callback failed");
-                    return;
+                    return 0;
                 }
             }
 
 int try_probe(char *iface);
 void probe_interfaces(void);
 
+extern int debug;
 
 /* netlink interfacing */
 
 int netlink_open(void);
 void netlink_request_dump(int fd);
 void netlink_receive_dump(int fd, netlink_callback callback, void *arg);
-void netlink_listen(int fd, netlink_callback callback, void *arg);
+int  netlink_listen(int fd, netlink_callback callback, void *arg);
 
 
 /* network interface info management */
     int addr_len;
     unsigned char addr[8];
     char name[16];
+
+    enum ifstate {
+	ST_DOWN,		/* uninitialized */
+	ST_DOWNANDOUT,		/* went down while running out script */
+	ST_PROBING,		/* running probe script */
+	ST_PROBING_UP,		/* running probe, and interface went UP */
+	ST_INACTIVE,		/* interface inactive */
+	ST_INNING,		/* plugin script is running */
+	ST_WAIT_IN,		/* wait until plugin script is done */
+	ST_ACTIVE,		/* interface active */
+	ST_OUTING,		/* plugout script is running */
+	ST_INSANE,		/* interface seems to be flapping */
+    }		state;
+
+    pid_t	worker;		/* pid of current in/out script */
+    time_t	lastchange;	/* timestamp of last state change */
 };
 
 struct if_info *if_info_get_interface(struct nlmsghdr *hdr,
                                          struct rtattr *attrs[]);
 int if_info_save_interface(struct nlmsghdr *hdr, void *arg);
 void parse_rtattrs(struct rtattr *tb[], int max, struct rtattr *rta, int len);
+void for_each_iface(int (*func)(struct if_info *));
 
+void ifsm_flagpoll(struct if_info *info);
+void ifsm_flagchange(struct if_info *info, unsigned int newflags);
+void ifsm_scriptdone(pid_t pid, int exitstatus);
 
 /* utilities */
 
     __attribute__ ((format (printf, 2, 3)));
 pid_t run_netplug_bg(char *ifname, char *action);
 int run_netplug(char *ifname, char *action);
+void kill_script(pid_t pid);
 void *xmalloc(size_t n);