Commits

Anonymous committed 7eeef08

initial commit of working(ish) psed

Comments (0)

Files changed (5)

+.POSIX:
+
+# redefine CC from c99 to gcc
+CC=gcc
+# redefining CFLAGS is bad, is there a good way to do this?
+# or just add my own macro xCFLAGS and redefine .c: suffix rule to use both?
+CFLAGS=-std=c11 -pedantic -Wall -Wextra -Os
+
+psed:
+psed - Minimally POSIX compliant sed
+====================================
+psed is an attempt to create sed that is strictly POSIX compliant, without any
+extensions. As such psed does a number of things that are rather annoying and
+no sensible person would appreciate. These annoyances include:
+  - 8192 byte pattern and hold space (overflow is truncated)
+  - maximum of 10 wfiles (w command, s command's w flag)
+  - maximum LINE_MAX lines (all input and output, overflow is truncated)
+  - maximum 8 byte label names (overflow is truncated)
+  - labels and wfiles are read to end of line (including spaces and semicolons)
+  - enforce one command per line inside {} (TODO)
+  - enforce } alone on line (TODO)
+  - ... (what else?)
+
+psed complies (as far as I can tell) to:
+  - c11
+  - POSIX.1-2008
+#define _POSIX_C_SOURCE 200809L
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <regex.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#define MAX_WFILES  ((size_t)  10) // minimum required by POSIX
+#define LABEL_BYTES ((size_t)   9) // minimum required by POSIX + '\0'
+#define SPACE_BYTES ((size_t)8192) // minimum required by POSIX
+
+#define LENGTH(a) (sizeof(a) / sizeof(*a))
+#define USE(a)    ((void)(a)) // to escape unused warnings
+#define serror()  ((void)fprintf(stderr, "%s: %s,%s,%d: %s\n", prog_name, __FILE__, __func__, __LINE__, strerror(errno)))
+#define warn(...) do{ fprintf(stderr, "%s: %zu: ", prog_name, line_number); fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); }while(0)
+
+// Types
+typedef struct {
+    enum {
+        IGNORE, // empty address, ignore
+        EVERY , // every line
+        LINE  , // line number
+        LAST  , // last line ($)
+        REGEX , // use included regex
+        LASTRE, // use most recently used regex
+    } type;
+    union {
+        size_t  line_number;
+        regex_t regex;
+    };
+} Address;
+
+// naddr == 0 iff beg.type == EVERY  && end.type == IGNORE
+// naddr == 1 iff beg.type != IGNORE && end.type == IGNORE
+// naddr == 2 iff beg.type != IGNORE && end.type != IGNORE
+typedef struct {
+    unsigned naddr; // not necessary, but helpful
+    Address  beg;
+    Address  end;
+} Range;
+
+typedef struct _command Command; // forward declaration for Sedfunc
+
+typedef int    (*Action )(void           ); // Action taken after current command executes
+typedef Action (*Sedfunc)(Command*       ); // Sed function, takes current command
+typedef int    (*Print  )(char*   , FILE*); // Print function for a,c,i,r
+typedef char  *(*Getarg )(Command*, char*); // Read arguments for given command from line
+typedef void   (*Freearg)(Command*       ); // Free anything that was malloced in Getarg
+
+struct _command {
+    Range         range;
+    Sedfunc       func;
+    Freearg       freearg;
+    union { // did have a void *extra; but with the union there's less to malloc/free
+        FILE     *file;                 // used for w
+        Command  *jump;                 // used for   b,t during execution
+        char      label[LABEL_BYTES];   // used for :,b,t during building
+        char     *text;                 // used for y
+        ptrdiff_t offset;               // used for { (instead of jump because pointers break when we realloc)
+        struct {                        // used for s
+            regex_t       regex;        // regex to find
+            char         *replace;      // text to replace with ("\\n" has been changed to "\n")
+            FILE         *file;         // if non NULL, file for w flag
+            unsigned      occurrence;   // which match to replace, 0 for all (g flag)
+            unsigned char flag_p    :1; // print flag
+            unsigned char last_regex:1; // use last regex, not included regex
+        } s;
+        struct {                        // used for a,c,i,r
+            char *text;                 // malloced text for insertion (or filename for r)
+            size_t size;                // size of malloced text (not strictly necessary, useful)
+            Print print;                // print function (check_puts for a,c,i write_file for r)
+        } acir;
+    };
+    unsigned char in_match:1;           // are we currently inside of a match
+    unsigned char negate  :1;           // is the range negated for this command
+};
+
+typedef struct {    // used to build the lookup table of functions
+    char     letter;
+    Sedfunc  func;
+    Getarg   getarg;
+    Freearg  freearg;
+    unsigned naddr; // max addresses function takes
+} Sedfunc_info;
+
+typedef struct {    // used with push/pop as a dynamicly sized array
+    Command **data; // was void *, any reason to keep void *?
+    size_t    size;
+    size_t    capacity;
+} Vector;
+
+// Globals
+struct { // files for w commands and s///w flag
+    char *name;
+    FILE *file;
+} wfiles[MAX_WFILES];
+
+struct { // global flags
+    unsigned char n       :1; // -n (no print)
+    unsigned char s       :1; // s/// happened
+    unsigned char aci_cont:1; // a,c,i text continuation
+} gflags;
+
+// store addresses of { in braces, : in labels, bt in branches during building
+// store addresses of scheduled writes during execution
+// (store (cmd - prog) in the data so we don't screw up pointers when we realloc)
+Vector braces, labels, branches, writes;
+
+// pattern and hold spaces
+char  space1[SPACE_BYTES],  space2[SPACE_BYTES];
+char *patt_space = space1, *hold_space = space2;
+
+regex_t *last_regex;     // last used regex for use with empty regex searches
+char    **files;         // list of file names from argv
+FILE    *file;           // current file we are reading
+char    *prog_name;      // argv[0]
+Command *prog, *pc, *ni; // program memory, program counter, next instruction
+size_t   prog_size;      // number of Commands in prog
+size_t   line_number;    // of script when building, of input when running
+
+// Forward Delcarations
+// Sedfuncs
+Action cmd_a     (Command *cmd);
+Action cmd_b     (Command *cmd);
+Action cmd_c     (Command *cmd);
+Action cmd_d     (Command *cmd);
+Action cmd_D     (Command *cmd);
+Action cmd_g     (Command *cmd);
+Action cmd_G     (Command *cmd);
+Action cmd_h     (Command *cmd);
+Action cmd_H     (Command *cmd);
+Action cmd_i     (Command *cmd);
+Action cmd_l     (Command *cmd);
+Action cmd_n     (Command *cmd);
+Action cmd_N     (Command *cmd);
+Action cmd_p     (Command *cmd);
+Action cmd_P     (Command *cmd);
+Action cmd_q     (Command *cmd);
+Action cmd_r     (Command *cmd);
+Action cmd_s     (Command *cmd);
+Action cmd_t     (Command *cmd);
+Action cmd_w     (Command *cmd);
+Action cmd_x     (Command *cmd);
+Action cmd_y     (Command *cmd);
+Action cmd_colon (Command *cmd);
+Action cmd_equal (Command *cmd);
+Action cmd_lbrace(Command *cmd);
+Action cmd_rbrace(Command *cmd);
+Action cmd_last  (Command *cmd);
+
+// Getargs / Freeargs
+void  free_acir_arg (Command *cmd);
+void  free_s_arg    (Command *cmd);
+void  free_y_arg    (Command *cmd);
+char *get_aci_arg   (Command *cmd, char *str);
+char *get_bt_arg    (Command *cmd, char *str);
+char *get_colon_arg (Command *cmd, char *str);
+char *get_lbrace_arg(Command *cmd, char *str);
+char *get_r_arg     (Command *cmd, char *str);
+char *get_rbrace_arg(Command *cmd, char *str);
+char *get_s_arg     (Command *cmd, char *str);
+char *get_w_arg     (Command *cmd, char *str);
+char *get_y_arg     (Command *cmd, char *str);
+
+// Actions
+int noop     (void); // continue execution
+int new_line (void); // read   new line, continue current cycle
+int app_line (void); // append new line, continue current cycle
+int new_next (void); // move to new cycle, read new line
+int app_next (void); // move to new cycle, append new line
+int old_next (void); // move to new cycle, reuse pattern space
+int quit     (void); // do not start a new cycle
+int unimp    (void); // unimplemented feature
+int error    (void); // he's dead Jim
+
+// Utilities
+int      check_puts  (char *str, FILE *stream);
+char    *chomp       (char *str);
+int      resize      (void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next, int clear);
+char    *find_delim  (char *str, char delim);
+int      is_eof      (FILE *stream);
+int      next_file   (void);
+Command *pop         (Vector *vec);
+Command *push        (Vector *vec, Command *cmd);
+int      read_line   (char *buf, size_t size, FILE* file);
+int      sized_memcpy(char *dest, char *src, size_t n, char *buf, size_t buf_size);
+void     strlcpy     (char *dest, char *src, size_t size);
+void     strlcat     (char *dest, char *src, size_t size);
+int      write_file  (char *in_path, FILE *out_stream);
+
+// Build and run
+int      build        (char *line);
+void     cleanup      (void);
+Command *find_label   (Vector *vec, char *label);
+int      in_range     (Command *cmd);
+int      insert_labels(void);
+char    *make_address (Address *addr, char *str);
+char    *make_range   (Range *range, char *str);
+int      match_addr   (Address *addr);
+int      read_script  (char *path);
+int      run          (void);
+
+// Lookup table of functions
+#define info(letter, sedfunc, getargs, freeargs, max_addr) \
+    [letter] = { letter, sedfunc, getargs, freeargs, max_addr }
+Sedfunc_info funcs[] = {
+    info('a', cmd_a     , get_aci_arg   , free_acir_arg , 1), // schedule write of text for later
+    info('b', cmd_b     , get_bt_arg    , NULL          , 2), // branch to label (extra holds char *label while building, Command *label while running)
+    info('c', cmd_c     , get_aci_arg   , free_acir_arg , 2), // delete pattern space, at 0 or 1 addr or end of 2 addr, write text
+    info('d', cmd_d     , NULL          , NULL          , 2), // delete pattern space
+    info('D', cmd_D     , NULL          , NULL          , 2), // delete to first newline and start new cycle without reading (if no newline, d)
+    info('g', cmd_g     , NULL          , NULL          , 2), // replace pattern space with hold space
+    info('G', cmd_G     , NULL          , NULL          , 2), // append newline and hold space to pattern space
+    info('h', cmd_h     , NULL          , NULL          , 2), // replace hold space with pattern space
+    info('H', cmd_H     , NULL          , NULL          , 2), // append newline and pattern space to hold space
+    info('i', cmd_i     , get_aci_arg   , free_acir_arg , 1), // write text
+    info('l', cmd_l     , NULL          , NULL          , 2), // write pattern space in 'visually unambiguous form'
+    info('n', cmd_n     , NULL          , NULL          , 2), // write pattern space (unless -n) read to replace pattern space (if no input, quit)
+    info('N', cmd_N     , NULL          , NULL          , 2), // append to pattern space separated by newline, line number changes (if no input, quit)
+    info('p', cmd_p     , NULL          , NULL          , 2), // write pattern space
+    info('P', cmd_P     , NULL          , NULL          , 2), // write pattern space up to first newline
+    info('q', cmd_q     , NULL          , NULL          , 1), // quit
+    info('r', cmd_r     , get_r_arg     , free_acir_arg , 1), // write contents of file (unable to open/read treated as empty file)
+    info('s', cmd_s     , get_s_arg     , free_s_arg    , 2), // find/replace/all that crazy s stuff
+    info('t', cmd_t     , get_bt_arg    , NULL          , 2), // if s/// succeeded (since input or last t) brance to label (end if no label)
+    info('w', cmd_w     , get_w_arg     , NULL          , 2), // append pattern space to file
+    info('x', cmd_x     , NULL          , NULL          , 2), // exchange pattern and hold spaces
+    info('y', cmd_y     , get_y_arg     , free_y_arg    , 2), // replace characters in set1 with characters in set2 (sets is two adjacent strings)
+    info(':', cmd_colon , get_colon_arg , NULL          , 0), // defines label for later b and t commands
+    info('=', cmd_equal , NULL          , NULL          , 1), // printf("%d\n", line_number);
+    info('{', cmd_lbrace, get_lbrace_arg, NULL          , 2), // if we match, run commands, otherwise jump to close
+    info('}', cmd_rbrace, get_rbrace_arg, NULL          , 0), // noop, hold onto open for ease of building scripts
+
+    [CHAR_MAX] = { 0, NULL, NULL, NULL, 0 } // fill out the rest of the array
+};
+
+/*
+ * Sedfuncs
+ * Check if cmd is applicable, perform function if it is, return Action to run
+ * after command
+ */
+Action cmd_a(Command *cmd)
+{
+    if (in_range(cmd))
+        if (!push(&writes, cmd))
+            return error;
+    return noop;
+}
+
+Action cmd_b(Command *cmd)
+{
+    if (in_range(cmd))
+        pc = cmd->jump;
+    return noop;
+}
+
+Action cmd_c(Command *cmd)
+{
+    if (!in_range(cmd))
+        return noop;
+
+    *patt_space = '\0';
+    if (!cmd->in_match && check_puts(cmd->acir.text, stdout))
+        return error;
+    return new_next;
+}
+
+Action cmd_d(Command *cmd)
+{
+    if (!in_range(cmd))
+        return noop;
+
+    *patt_space = '\0';
+    return new_next;
+}
+
+Action cmd_D(Command *cmd)
+{
+    char *p;
+
+    if (!in_range(cmd))
+        return noop;
+
+    if (!(p = strchr(patt_space, '\n')))
+        return cmd_d(cmd);
+    p++;
+    memmove(patt_space, p, strlen(p));
+    return old_next;
+}
+
+Action cmd_g(Command *cmd)
+{
+    if (in_range(cmd))
+        strlcpy(patt_space, hold_space, SPACE_BYTES);
+    return noop;
+}
+
+Action cmd_G(Command *cmd)
+{
+    if (!in_range(cmd))
+        return noop;
+
+    if (strlen(patt_space) + strlen(hold_space) + 1 > SPACE_BYTES)
+        warn("truncating pattern space to %zu bytes", SPACE_BYTES);
+
+    strlcat(patt_space, "\n"      , SPACE_BYTES);
+    strlcat(patt_space, hold_space, SPACE_BYTES);
+    return noop;
+}
+
+Action cmd_h(Command *cmd)
+{
+    if (in_range(cmd))
+        strlcpy(hold_space, patt_space, SPACE_BYTES);
+    return noop;
+}
+
+Action cmd_H(Command *cmd)
+{
+    if (!in_range(cmd))
+        return noop;
+
+    if (strlen(patt_space) + strlen(hold_space) + 1 > SPACE_BYTES)
+        warn("truncating hold space to %zu bytes", SPACE_BYTES);
+
+    strlcat(hold_space, "\n"      , SPACE_BYTES);
+    strlcat(hold_space, patt_space, SPACE_BYTES);
+    return noop;
+}
+
+Action cmd_i(Command *cmd)
+{
+    if (in_range(cmd) && check_puts(cmd->acir.text, stdout))
+        return error;
+    return noop;
+}
+
+Action cmd_l(Command *cmd)
+{
+    USE(cmd);
+    return unimp;
+}
+
+Action cmd_n(Command *cmd)
+{
+    if (in_range(cmd))
+        return cmd_last(cmd);
+    return noop;
+}
+
+Action cmd_N(Command *cmd)
+{
+    if (!in_range(cmd))
+        return noop;
+
+    cmd_last(cmd);
+    return app_next;
+}
+
+Action cmd_p(Command *cmd)
+{
+    if (in_range(cmd) && check_puts(patt_space, stdout))
+        return error;
+    return noop;
+}
+
+Action cmd_P(Command *cmd)
+{
+    char *p;
+    if (!in_range(cmd))
+        return noop;
+
+    if ((p = strchr(patt_space, '\n')))
+        *p = '\0';
+    if (check_puts(patt_space, stdout))
+        return error;
+    if (p)
+        *p = '\n';
+    return noop;
+}
+
+Action cmd_q(Command *cmd)
+{
+    if (in_range(cmd))
+        return quit;
+    return noop;
+}
+
+Action cmd_r(Command *cmd)
+{
+    if (in_range(cmd))
+        if (!push(&writes, cmd))
+            return error;
+    return noop;
+}
+
+// FIXME: check overflow on all the *(new++) =
+Action cmd_s(Command *cmd)
+{
+    if (!in_range(cmd))
+        return noop;
+
+    regex_t   *re  = cmd->s.last_regex ? last_regex : &cmd->s.regex;
+    regmatch_t pmatch[re->re_nsub + 1];
+    char       buf[SPACE_BYTES], *new = buf, *str = patt_space;
+    unsigned   matches = 0;
+
+    last_regex = re;
+
+    while (!regexec(re, str, LENGTH(pmatch), pmatch, 0)) {
+        if (++matches == cmd->s.occurrence || !cmd->s.occurrence) { // correct match or global
+            unsigned escape = 0;
+
+            // copy over everything before the match
+            if (sized_memcpy(new, str, pmatch[0].rm_so, buf, sizeof(buf)))
+                break;
+            new += pmatch[0].rm_so;
+            for (char *p = cmd->s.replace; *p; p++) {
+                if (escape) {
+                    escape = 0;
+                    if (isdigit(*p)) {
+                        regmatch_t *rm  = &pmatch[*p - '0'];
+                        if (rm->rm_so != -1) { // copy the backreference
+                            size_t len = rm->rm_eo - rm->rm_so;
+                            if (sized_memcpy(new, str + rm->rm_so, len, buf, sizeof(buf)))
+                                break;
+                            new += len;
+                        }
+                    } else
+                        *(new++) = *p; // copy the escaped character
+                } else switch (*p) {
+                    default  : *(new++) = *p; break;
+                    case '\\': escape = 1;    break;
+                    case '&' :
+                    { // insert matched text
+                        regmatch_t *rm  = pmatch;
+                        size_t      len = rm->rm_eo - rm->rm_so;
+                        if (sized_memcpy(new, str + rm->rm_so, len, buf, sizeof(buf)))
+                            break;
+                        new += len;
+                        break;
+                    }
+                }
+            }
+        } else {
+            // copy over everything including the match
+            if (sized_memcpy(new, str, pmatch[0].rm_eo, buf, sizeof(buf)))
+                break;
+            new += pmatch[0].rm_eo;
+        }
+        str += pmatch[0].rm_eo;
+    }
+    gflags.s = matches && matches >= cmd->s.occurrence;
+    if (!matches)
+        return noop;
+
+    sized_memcpy(new, str, strlen(str) + 1, buf, sizeof(buf));
+    strlcpy(patt_space, buf, SPACE_BYTES);
+    if (cmd->s.flag_p && check_puts(patt_space, stdout))
+        return error;
+    if (cmd->s.file && check_puts(patt_space, cmd->s.file))
+        return error;
+    return noop;
+}
+
+Action cmd_t(Command *cmd)
+{
+    if (in_range(cmd) && gflags.s) {
+        pc = cmd->jump;
+        gflags.s = 0;
+    }
+    return noop;
+}
+
+Action cmd_w(Command *cmd)
+{
+    if (in_range(cmd) && check_puts(patt_space, cmd->file))
+        return error;
+    return noop;
+}
+
+Action cmd_x(Command *cmd)
+{
+    USE(cmd);
+    char *tmp  = patt_space;
+    patt_space = hold_space;
+    hold_space = tmp;
+    return noop;
+}
+
+Action cmd_y(Command *cmd)
+{
+    char *find = cmd->text;
+    char *repl = find + strlen(find) + 1;
+
+    for (char *p = patt_space; (p = strpbrk(p, find)); p++)
+        *p = repl[strchr(find, *p) - find];
+
+    return noop;
+}
+
+Action cmd_colon(Command *cmd)
+{
+    USE(cmd);
+    return noop;
+}
+
+Action cmd_equal(Command *cmd)
+{
+    USE(cmd);
+    printf("%zu\n", line_number);
+    return noop;
+}
+
+Action cmd_lbrace(Command *cmd)
+{
+    if (!in_range(cmd))
+        pc = prog + cmd->offset;
+    return noop;
+}
+
+Action cmd_rbrace(Command *cmd)
+{
+    USE(cmd);
+    return noop;
+}
+
+// not actually a Sedfunc/cmd_ but acts like one
+// command at end of script, print if not -n, perform scheduled a,r writes,
+// start new cycle
+Action cmd_last(Command *cmd)
+{
+    if (!gflags.n && check_puts(patt_space, stdout))
+        return error;
+
+    for (unsigned i = 0; i < writes.size; i++) {
+        cmd = writes.data[i];
+        cmd->acir.print(cmd->acir.text, stdout);
+    }
+    while (pop(&writes))
+        ;
+    return new_next;
+}
+
+/*
+ * Getargs / Freeargs
+ * Read the first argument from str into cmd->extra and return pointer to 1
+ * past end of argument or NULL on error
+ * On arguments that are whole line, return a pointer to any '\0'
+ */
+// prepare for text for a,c,i functions, set aci_cont flag so build() will read
+// text instead of parsing a command
+char *get_aci_arg(Command *cmd, char *str)
+{
+    cmd->acir.print = check_puts;
+    if (!(cmd->acir.text = calloc(1, 1))) { // start with empty string (1 nul byte)
+        serror();
+        return NULL;
+    }
+    if (*str != '\\' || *(str + 1) != '\0') {
+        warn("trailing characters");
+        return NULL;
+    }
+    gflags.aci_cont = 1;
+    *str = '\0';
+    return str;
+}
+
+void free_acir_arg(Command *cmd)
+{
+    free(cmd->acir.text);
+}
+
+// read label for b,t commands. for strict POSIX compliance read rest of line
+// (including spaces and semicolons) and truncate to 8 bytes
+char *get_bt_arg(Command *cmd, char *str)
+{
+    char *p = chomp(str);
+
+    if (*p)
+        strlcpy(cmd->label, p, LABEL_BYTES);
+    else
+        *cmd->label = '\0'; // I think this shouldn't be necessary...
+
+    if (*p && strlen(p) > LABEL_BYTES - 1)
+        warn("truncating label '%s' to '%s' (%zu bytes)", p, cmd->label, LABEL_BYTES);
+    if (!push(&branches, (Command *)(cmd - prog)))
+        return NULL;
+    *p = '\0';
+    return p;
+}
+
+// read label for : command. for strict POSIX compliance read rest of line
+// (including spaces and semicolons) and truncate to 8 bytes
+char *get_colon_arg(Command *cmd, char *str)
+{
+    char *p = chomp(str);
+
+    if (!*p) {
+        warn("no label name");
+        return NULL;
+    }
+    strlcpy(cmd->label, p, LABEL_BYTES);
+    if (strlen(p) > LABEL_BYTES - 1)
+        warn("truncating label '%s' to '%s' (%zu bytes)", p, cmd->label, LABEL_BYTES);
+    if (!push(&labels, (Command *)(cmd - prog)))
+        return NULL;
+    *p = '\0';
+    return p;
+}
+
+// push address of { so we can pop for }. not really getarg, but called at the
+// same place. (use cmd - prog offset instead of address so we don't screw up
+// pointers when we realloc)
+char *get_lbrace_arg(Command *cmd, char *str)
+{
+    if (!push(&braces, (Command *)(cmd - prog)))
+        return NULL;
+    return str;
+}
+
+// pop { and set offset to cmd so we can jump from { to }
+char *get_rbrace_arg(Command *cmd, char *str)
+{
+    Command *lbrace = prog + (ptrdiff_t)pop(&braces);
+
+    if (!lbrace) {
+        warn("extra }");
+        return NULL;
+    }
+    lbrace->offset = cmd - prog;
+    return str;
+}
+
+// read label for : command. for strict POSIX compliance read rest of line
+// (including spaces and semicolons)
+char *get_r_arg(Command *cmd, char *str)
+{
+    char *p;
+
+    cmd->acir.print = write_file;
+    if (!isblank(*str)) {
+        warn("no space before file name");
+        return NULL;
+    }
+    p = chomp(str);
+    if (!*p) {
+        warn("no file name");
+        return NULL;
+    }
+    if (!(cmd->acir.text = strdup(p))) {
+        serror();
+        return NULL;
+    }
+    *p = '\0';
+    return p;
+}
+
+// read arguments for s function, replace "\\n" with literal newline "\n". if w
+// flag read rest of line for strict POSIX compliance (including spaces and
+// semicolons)
+char *get_s_arg(Command *cmd, char *str)
+{
+    char *p, *q;
+    int  err;
+    regex_t re_buf;
+
+    cmd->s.occurrence = 1;
+    if (*str != '/') {
+        warn("s arguments must be delimited by /");
+        return NULL;
+    }
+    if (*(str + 1) == '/') // empty regex, use last_regex
+        cmd->s.last_regex = 1;
+    if (!(p = find_delim(str + 1, *str))) {
+        warn("bad s argument: %s", str);
+        return NULL;
+    }
+    for (q = strstr(str + 1, "\\n"); q && q < p; q = strstr(q, "\\n")) {
+        *q = '\n';
+        memmove(q + 1, q + 2, strlen(q + 2));
+        p--;
+    }
+    *p = '\0';
+    if (!cmd->s.last_regex && (err = regcomp(&re_buf, str + 1, 0))) {
+        char msg[128]; // TODO: size? own regcomp() function so no repeats
+        regerror(err, &cmd->s.regex, msg, sizeof(msg));
+        warn("bad regex: %s: %s", str, msg);
+        return NULL;
+    }
+    cmd->s.regex = re_buf;
+    *p = *str; // replace delim for error messages
+    if (!(q = find_delim(p + 1, *str))) {
+        warn("bad s argument: %s", str);
+        return NULL;
+    }
+    *q = '\0';
+    if (!(cmd->s.replace = strdup(p + 1))) {
+        serror();
+        return NULL;
+    }
+    *q = *str; // replace delim for error messages
+    for (p = q + 1; *p; p++) {
+        if (isdigit(*p)) {
+            long num = strtol(p, &p, 10);
+            if (num == LONG_MAX)
+                serror();
+            if (num == 0) {
+                warn("invalid match number: 0");
+                return NULL;
+            }
+            cmd->s.occurrence = num;
+            p--;
+        } else switch (*p) {
+            default : return p;
+            case 'p': cmd->s.flag_p     = 1; break;
+            case 'g': cmd->s.occurrence = 0; break;
+            case 'w':
+            {
+                Command buf;
+                if (!(p = get_w_arg(&buf, p + 1)))
+                    return NULL;
+                cmd->s.file = buf.file;
+                break;
+            }
+        }
+        if (!*p) // w flag ate rest of line
+            break;
+    }
+    return p;
+}
+
+void free_s_arg(Command *cmd)
+{
+    free(cmd->s.replace);
+    if (!cmd->s.last_regex)
+        regfree(&cmd->s.regex);
+}
+
+// read file name for w function for strict POSIX compliance read rest of line
+// (including spaces and semicolons) and limit to MAX_WFILES wfiles
+char *get_w_arg(Command *cmd, char *str)
+{
+    char    *p;
+    unsigned i;
+
+    if (!isblank(*str)) {
+        warn("no space before file name");
+        return NULL;
+    }
+    p = chomp(str);
+    if (!*p) {
+        warn("no file name");
+        return NULL;
+    }
+    for (i = 0; i < MAX_WFILES && wfiles[i].file; i++)
+        if (!strcmp(p, wfiles[i].name))
+            break;
+    if (i == MAX_WFILES) {
+        warn("too many wfiles");
+        return NULL;
+    }
+    if (!wfiles[i].file) { // no match
+        wfiles[i].name = strdup(p);
+        if (!(wfiles[i].file = fopen(p, "w"))) {
+            serror();
+            return NULL;
+        }
+    }
+    cmd->file = wfiles[i].file;
+    *p = '\0';
+    return p;
+}
+
+// read find and replace sets for y function, put in cmd->text as two adjacent
+// nul terminated strings (i.e. char *set2 = cmd->text + strlen(cmd->text) + 1)
+char *get_y_arg(Command *cmd, char *str)
+{
+    char *p;
+    size_t len1, len2;
+
+    if (*str != '/') {
+        warn("argument for y must be delimited with /");
+        return NULL;
+    }
+    if (!(p = find_delim(str + 1, *str))) {
+        warn("bad y argument, missing middle delimiter: %s", str);
+        return NULL;
+    }
+    len1 = p - str;
+    if (!(p = find_delim(p + 1, *p))) {
+        warn("bad y argument, missing end delimiter: %s", str);
+        return NULL;
+    }
+    len2 = p - str - len1;
+    if (len1 != len2) {
+        warn("bad y argument, different length sets: %s", str);
+        return NULL;
+    }
+    *p = '\0';
+    if (!(cmd->text = strdup(str + 1))) {
+        serror();
+        return NULL;
+    }
+    *find_delim(cmd->text, *str) = '\0';
+    return p + 1;
+}
+
+void free_y_arg(Command *cmd)
+{
+    free(cmd->text);
+}
+
+/*
+ * Actions
+ * Read, append, print, reset pc, etc.
+ * Return -1 on error, 0 to continue execution, 1 to halt
+ */
+// append new line, continue current cycle
+int app_line(void)
+{
+    int  ret;
+    char line[SPACE_BYTES];
+
+    while (read_line(line, SPACE_BYTES, file) == EOF)
+        if ((ret = next_file()))
+            return ret;
+
+    if (strlen(patt_space) + strlen(line) + 1 > SPACE_BYTES)
+        warn("truncating pattern space to %zu bytes", SPACE_BYTES);
+
+    strlcat(patt_space, "\n", SPACE_BYTES);
+    strlcat(patt_space, line, SPACE_BYTES);
+    gflags.s = 0;
+    line_number++;
+    return 0;
+}
+
+// move to new cycle, append new line
+int app_next(void)
+{
+    int ret = app_line();
+
+    pc = prog - 1;
+    return ret;
+}
+
+// he's dead Jim
+int error(void)
+{
+    return -1;
+}
+
+// read new line, continue current cycle
+int new_line(void)
+{
+    int ret;
+
+    while (read_line(patt_space, SPACE_BYTES, file) == EOF)
+        if ((ret = next_file()))
+            return ret;
+    gflags.s = 0;
+    line_number++;
+    return 0;
+}
+
+// move to new cycle, read new line
+int new_next(void)
+{
+    int ret = new_line();
+
+    pc = prog - 1;
+    return ret;
+}
+
+// continue execution
+int noop(void)
+{
+    return 0;
+}
+
+// move to new cycle, reuse pattern space
+int old_next(void)
+{
+    pc = prog - 1;
+    return 0;
+}
+
+// do not start a new cycle
+int quit(void)
+{
+    return 1;
+}
+
+// unimplemented feature
+int unimp(void)
+{
+    warn("unimplemented feature");
+    return 0;
+}
+
+/*
+ * Utilities
+ */
+// write str to stream followed by newline and check for errors
+int check_puts(char *str, FILE *stream)
+{
+    if (fputs(str, stream) == EOF) {
+        serror();
+        return -1;
+    }
+    if (fputs("\n", stream) == EOF) {
+        serror();
+        return -1;
+    }
+    return 0;
+}
+
+// return pointer to first non blank character in str
+char *chomp(char *str)
+{
+    for (; *str && isblank(*str); str++)
+        ;
+    return str;
+}
+
+// given memory pointed to by *ptr that currently holds *nmemb members of size
+// size, realloc to hold new_nmemb members, return new_nmemb in *memb and one
+// past old end in *next. if clear is nonzero clear new memory. if realloc fails
+// change nothing. (should work to shrink, too...)
+int resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next, int clear)
+{
+    void *n, *tmp = realloc(*ptr, new_nmemb * size);
+    if (!tmp && new_nmemb)
+        return -1;
+    n = (char *)tmp + *nmemb * size;
+    if (clear && new_nmemb > *nmemb)
+        memset(n, 0, (new_nmemb - *nmemb) * size);
+    *nmemb = new_nmemb;
+    *ptr   = tmp;
+    if (next)
+        *next = n;
+    return 0;
+}
+
+// Find first non escaped instance of delim in str
+// TODO: ignore delim in [], \(\), and maybe \{\}
+char *find_delim(char *str, char delim)
+{
+    unsigned escape = 0;
+
+    for (char *p = str; *p; p++)
+        if (escape)
+            escape = 0;
+        else if (*p == '\\')
+            escape = 1;
+        else if (*p == delim)
+            return p;
+    return NULL;
+}
+
+// test of stream is at EOF
+int is_eof(FILE *stream)
+{
+    int c = fgetc(stream);
+
+    if (c == EOF && ferror(stream))
+        serror();
+    if (c != EOF && ungetc(c, stream) == EOF)
+        serror();
+
+    return c == EOF;
+}
+
+// move to next file on command line or stdin if this is the first call and
+// there are no files on the command line
+// return 0 for success, -1 for error, 1 for no more files
+int next_file()
+{
+    if (file && fclose(file))
+        serror();
+    if (!*files) {
+        if (!file) { // first call, no file arguments, use stdin
+            file = stdin;
+            return 0;
+        }
+        return 1;
+    }
+    if (!strcmp(*files, "-")) {
+        file = stdin;
+        files++;
+    } else if (!(file = fopen(*(files++), "r"))) {
+        serror();
+        return -1;
+    }
+    return 0;
+}
+
+// pop cmd from vec, shrinking if we're using < 1/4 of capcity
+Command *pop(Vector *vec)
+{
+    Command *tmp;
+
+    if (!vec->size)
+        return NULL;
+    tmp = vec->data[--vec->size];
+
+    if (vec->size <= vec->capacity / 4)
+        if (resize((void **)&vec->data, &vec->capacity, sizeof(vec->data), vec->capacity / 2, NULL, 0))
+            return NULL;
+    return tmp;
+}
+
+// push cmd onto vec, growing (doubling) if needed
+Command *push(Vector *vec, Command *cmd)
+{
+    if (vec->size == vec->capacity)
+        if (resize((void **)&vec->data, &vec->capacity, sizeof(vec->data), vec->capacity * 2 + 1, NULL, 0))
+            return NULL;
+    return (vec->data[vec->size++] = cmd);
+}
+
+// read one line (max size bytes) from file into buf. warn if line is truncated
+int read_line(char *buf, size_t size, FILE* file)
+{
+    size_t len;
+    if (fgets(buf, size, file) == NULL) {
+        if (ferror(file))
+            serror();
+        return EOF;
+    }
+    len = strlen(buf) - 1;
+    if (buf[len] == '\n') {
+        buf[len] =  '\0';
+    } else {
+        warn("truncating line to %zu bytes", size);
+        do {
+            if (fgets(buf, size, file) == NULL) {
+                if (ferror(file))
+                    serror();
+                return EOF;
+            }
+        } while (buf[strlen(buf)] != '\n');
+    }
+    return 0;
+}
+
+// memcpy from src to dest min(n, buf_size - (dest - buf)), warn if truncating
+// dest is a pointer to a location inside buf (i.e. dest - buf <= buf_size)
+int sized_memcpy(char *dest, char *src, size_t n, char *buf, size_t buf_size)
+{
+    int ret = 0;
+
+    if (dest - buf + n > buf_size) {
+        warn("truncating pattern space to %zu bytes", buf_size);
+        n   = buf_size - (dest - buf);
+        ret = -1;
+    }
+    memcpy(dest, src, n);
+    return ret;
+}
+
+void strlcpy(char *dest, char *src, size_t size)
+{
+    if (!size || !dest || !src)
+        return;
+
+    while (--size && (*(dest++) = (*src++)))
+        ;
+    if (!size)
+        *dest = '\0';
+}
+
+void strlcat(char *dest, char *src, size_t size)
+{
+    size_t len;
+
+    if (!size || !dest || !src)
+        return;
+    if ((len = strlen(dest)) >= size)
+        return;
+    dest += len;
+    size -= len;
+    strlcpy(dest, src, size);
+}
+
+// read file at in_path a line at a time (truncating if long lines) and write
+// to out_stream
+int write_file(char *in_path, FILE *out_stream)
+{
+    char line[LINE_MAX];
+    FILE *in_stream = fopen(in_path, "r");
+
+    if (!in_stream) {
+        serror();
+        return -1;
+    }
+    while (read_line(line, sizeof(line), in_stream) != EOF)
+        if (check_puts(line, out_stream))
+            return -1;
+    if (fclose(in_stream))
+        serror(); // TODO: return error? or just keep going?
+    return 0;
+}
+
+/*
+ * Build and run
+ */
+// read given line building the program. return 0 on success -1 on error
+int build(char *line)
+{
+    line_number++;
+    if (gflags.aci_cont) {
+        char  **text = &(ni - 1)->acir.text;
+        size_t *size = &(ni - 1)->acir.size;
+        size_t  len  = strlen(line);
+
+        if (!len--) {// empty line
+            gflags.aci_cont = 0;
+            return 0;
+        }
+        if (line[len] == '\\')
+            line[len] =  '\n';
+        else
+            gflags.aci_cont = 0;
+
+        // already have 1 null byte
+        if (resize((void **)text, size, 1, *size + len, NULL, 0)) {
+            serror();
+            return -1;
+        }
+        strcat(*text, line);
+    } else for (char *p = line; *p; p++) {
+        Sedfunc_info *info;
+
+        if (ni == prog + prog_size)
+            if (resize((void **)&prog, &prog_size, sizeof(*prog), prog_size * 2 + 1, (void **)&ni, 1))
+                return -1;
+
+        for (; isblank(*p) || *p == ';'; p++)
+            ;
+        if (*p == '#')
+            continue;
+        if (!(p = make_range(&ni->range, p)))
+            return -1;
+        p = chomp(p);
+        if (*p == '!')
+            ni->negate = 1;
+        for (; *p == '!'; p++)
+            ;
+        if (!(info = &funcs[(int)*p])) { // stupid -Wchar-subcripts
+            warn("bad sed function: %c", *p);
+            return -1;
+        }
+        ni->func    = info->func;
+        ni->freearg = info->freearg;
+        if (ni->range.naddr > info->naddr) {
+            warn("function %c only takes %d addr", *p, info->naddr);
+            return -1;
+        }
+        p++;
+        if (info->getarg && !(p = info->getarg(ni, p)))
+                return -1;
+        ni++;
+        if (!*p) // getarg() ate the rest of the line
+            break;
+    }
+    return 0;
+}
+
+void cleanup(void)
+{
+    free(braces  .data);
+    free(labels  .data);
+    free(branches.data);
+
+    for (unsigned i = 0; i < MAX_WFILES && wfiles[i].file; i++) {
+        free(wfiles[i].name);
+        if (fclose(wfiles[i].file))
+            serror();
+    }
+
+    for (pc = prog; pc < ni; pc++)
+        if (pc->freearg)
+            pc->freearg(pc);
+
+    free(prog);
+}
+
+Command *find_label(Vector *vec, char *label)
+{
+    for (unsigned i = 0; i < vec->size; i++)
+        if (!strcmp(label, (prog + (ptrdiff_t)vec->data[i])->label))
+            return prog + (ptrdiff_t)vec->data[i];
+    return NULL;
+}
+
+// test if current line is within cmd->range taking into account cmd->negate
+int in_range(Command *cmd)
+{
+    if (match_addr(&cmd->range.beg)) {
+        if (cmd->range.naddr == 2) {
+            if (cmd->range.end.type == LINE && cmd->range.end.line_number <= line_number)
+                cmd->in_match = 0;
+            else
+                cmd->in_match = 1;
+        }
+        return !cmd->negate;
+    }
+    if (match_addr(&cmd->range.end)) {
+        cmd->in_match = 0;
+        return !cmd->negate;
+    }
+    return cmd->in_match ^ cmd->negate;
+}
+
+// make jump for b,t point to corresponding label
+int insert_labels(void)
+{
+    Command *cmd, *label;
+
+    // side effect of stuffing ptrdiff_t into Command*, NULL (0) is a valid
+    // value, so check size explicitly
+    while (branches.size) {
+        cmd = prog + (ptrdiff_t)pop(&branches);
+        if (!*cmd->label) // no label, branch to end of script
+            label = ni - 1;
+        else
+            label = find_label(&labels, cmd->label);
+
+        if (!label) {
+            warn("no such label: %s", cmd->label);
+            return -1;
+        }
+        cmd->jump = label;
+    }
+    return 0;
+}
+
+// Read the first address from str into addr and return pointer to character 1
+// past end of address or NULL on error
+char *make_address(Address *addr, char *str)
+{
+    char *end = NULL;
+
+    if (*str == '$') {
+        addr->type = LAST;
+        end = str + 1;
+    } else if (isdigit(*str)) { // line number
+        // TODO: strtol func so I don't repeat it
+        long num = strtol(str, &end, 10);
+        if (num == LONG_MAX)
+            serror();
+        if (num == 0) {
+            warn("unsupported address: 0");
+            return NULL;
+        }
+        *addr = (Address){ .type = LINE, .line_number = num };
+    } else if (*str == '/') { // TODO: \c any delimiter. make_regex() ?
+        if (*(str + 1) == '/') {
+            addr->type = LASTRE;
+            end = str + 2;
+        } else if (!(end = find_delim(str + 1, *str))) {
+            warn("unclosed regex: %s", str);
+        } else {
+            int err;
+            *(end++) = '\0';
+            addr->type = REGEX;
+            if ((err = regcomp(&addr->regex, str + 1, 0))) {
+                char msg[128]; // TODO: size?
+                regerror(err, &addr->regex, msg, sizeof(msg));
+                warn("bad regex: %s: %s", str, msg);
+                end = NULL;
+            }
+        }
+    } else {
+        addr->type = EVERY;
+        end = str;
+    }
+    return end;
+}
+
+// Read the first range from str in range and return pointer to character 1
+// past end of range or NULL on error
+char *make_range(Range *range, char *str)
+{
+    char *p = str;
+
+    if (!(p = make_address(&range->beg, p)))
+        return NULL;
+
+    if (*p != ',')
+        range->end.type = IGNORE;
+    else if (!(p = make_address(&range->end, p + 1)))
+        return NULL;
+
+    if      (range->beg.type == EVERY  && range->end.type == IGNORE)
+        range->naddr = 0;
+    else if (range->beg.type != IGNORE && range->end.type == IGNORE)
+        range->naddr = 1;
+    else if (range->beg.type != IGNORE && range->end.type != IGNORE)
+        range->naddr = 2;
+    else { // This should never happen right?
+        warn("bad range: %s", str);
+        return NULL;
+    }
+
+    return p;
+}
+
+// test if addr matches current pattern space
+int match_addr(Address *addr)
+{
+    switch(addr->type) {
+    default    :
+    case IGNORE: return 0;                                              // empty address, ignore
+    case EVERY : return 1;                                              // every line
+    case LINE  : return line_number == addr->line_number;               // line number
+    case LAST  : return is_eof(file);                                   // last line ($)
+    case REGEX : last_regex = &addr->regex;
+                 return !regexec(&addr->regex, patt_space, 0, NULL, 0); // use included regex
+    case LASTRE: return !regexec(last_regex  , patt_space, 0, NULL, 0); // use most recently used regex
+    }
+}
+
+// read lines from file at path, and build() each line
+int read_script(char *path)
+{
+    char  line[LINE_MAX];
+    FILE *file = fopen(path, "r");
+    int   err  = 0;
+
+    if (!file) {
+        serror();
+        return -1;
+    }
+    // FIXME: random backslashes?
+    while (read_line(line, sizeof(line), file) != EOF)
+        if ((err = build(line)))
+            break;
+    if (fclose(file))
+        serror();
+    return err;
+}
+
+// finishing touches, initial setup, run the program
+// quit when a command returns a function that returns non zero
+int run(void)
+{
+    int ret;
+
+    line_number = 0;
+    if (braces.size) {
+        warn("extra {");
+        return -1;
+    }
+    if (insert_labels())
+        return -1;
+    if (next_file())
+        return -1;
+    if (new_line())
+        return -1;
+
+    for (pc = prog; !(ret = pc->func(pc)()); pc++)
+        ;
+
+    return ret;
+}
+
+int main(int argc, char **argv)
+{
+    int c, err = 0;
+
+    prog_name = argv[0];
+
+    if (argc == 1) {
+        warn("USAGE:... at some point");
+        return EXIT_FAILURE;
+    }
+
+    // -e script, -f file, -n
+    while (!err && (c = getopt(argc, argv, ":e:f:n")) != -1) {
+        switch (c) {
+            case 'n': gflags.n = 1;                                                       break;
+            case 'e': err = build(optarg);                                                break;
+            case 'f': err = read_script(optarg);                                          break;
+            case ':': err++; fprintf(stderr, "Option -%c requires an operand\n", optopt); break;
+            case '?': err++; fprintf(stderr, "Unrecognized option: -%c\n"      , optopt); break;
+        }
+    }
+
+    if (ni == prog) // no script yet, first argument is script
+        err = build(argv[optind++]);
+
+    if (err) {
+        cleanup();
+        return EXIT_FAILURE;
+    }
+
+    // add our last instruction
+    if (ni == prog + prog_size)
+        if (resize((void **)&prog, &prog_size, sizeof(*prog), prog_size + 1, (void **)&ni, 1))
+            return EXIT_FAILURE;
+
+    ni->range.beg.type = EVERY;
+    ni->range.end.type = IGNORE;
+    ni->func           = cmd_last;
+    ++ni;
+
+    files   = &argv[optind];
+    err     = run();
+
+    cleanup();
+    return err < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}

sed.c

-#define _POSIX_C_SOURCE 200809L
-
-#include <stdio.h>
-#include <string.h>
-#include <limits.h>
-#include <sys/types.h>
-#include <regex.h>
-#include <stdlib.h>
-
-//FIXME: just write own strlcat/strlcpy to stay POSIX compliant
-#include <bsd/string.h>
-
-enum {
-    CONTINUE, // continue execution
-    NEW_LINE, // read new line, continue current cycle
-    APP_LINE, // append new line, continue current cycle
-    NEW_NEXT, // move to new cycle, read new line
-    OLD_NEXT, // move to new cycle, reuse pattern space
-    QUIT    , // do not start a new cycle
-};
-
-enum {
-    EVERY  =  0,
-    LAST   = -1,
-    REGEX  = -2,
-    LASTRE = -3,
-};
-
-// if line is EVERY , every line
-// if line is LAST  , $ line
-// if line is REGEX , regex
-// if line is LASTRE, last used regex
-typedef struct {
-    regex_t regex;
-    int     line;
-} Address;
-
-typedef struct {
-    Address begin;
-    Address end;
-} Range;
-
-typedef struct command_struct Command;
-struct command_struct {
-    Command      *next;
-    void         *other; // Command* for b, {, etc. regex_t for s///, ...
-    Range         range;
-    size_t        line_number;
-    char         *text;
-    int         (*cmd_func)(Command*);
-    char          negate;
-};
-
-typedef struct sched_write_struct Sched_write;
-struct sched_write_struct {
-    Sched_write *next;
-    char        *text;
-    char         is_file;
-};
-
-char *prog_name;
-
-#define SPACE_BYTES 8192
-
-char   space1[SPACE_BYTES],  space2[SPACE_BYTES];
-char  *patt_space = space1, *hold_space = space2;
-
-regex_t *last_regex;
-Sched_write *sched_writes;
-Command *program, *pc;
-size_t line_number;
-
-struct {
-    unsigned char no_print:1;
-    unsigned char did_sub :1;
-} flags;
-
-char takes_argument[] = "brstwy:";          // function may take an argument
-char following_semi[] = "dDgGhHlnNpPqsxy="; // function may be followed by a semicolon
-int (*cmd_funcs)(Command*)[] = {
-    ['a'] = cmd_a,
-    ['b'] = cmd_b,
-    ['c'] = cmd_c,
-    ['d'] = cmd_d,
-    ['D'] = cmd_D,
-    ['g'] = cmd_g,
-    ['G'] = cmd_G,
-    ['h'] = cmd_h,
-    ['H'] = cmd_H,
-    ['i'] = cmd_i,
-    ['l'] = cmd_l,
-    ['n'] = cmd_n,
-    ['N'] = cmd_N,
-    ['p'] = cmd_p,
-    ['P'] = cmd_P,
-    ['q'] = cmd_q,
-    ['r'] = cmd_r,
-    ['s'] = cmd_s,
-    ['t'] = cmd_t,
-    ['w'] = cmd_w,
-    ['x'] = cmd_x,
-    ['y'] = cmd_y,
-    [':'] = cmd_colon,
-    ['='] = cmd_equal,
-    ['{'] = cmd_lbrace,
-    ['}'] = cmd_rbrace,
-};
-
-
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
-#define truncating(s,l) fprintf(stderr,                                  \
-    "%s: script line, %zu: input line, %zu: truncating %s to %d bytes\n",\
-    prog_name, pc->line_number, line_number, (s), (l))
-#define badaddr(s)      fprintf(stderr,                                  \
-    "%s: script line, %zu: bad address, %s\n",                           \
-    prog_name, pc->line_number, (s))
-#define undefined(s)    fprintf(stderr,                                  \
-    "%s: script line, %zu: undefined results, %s\n",                     \
-    prog_name, pc->line_number, (s))
-
-// FIXME: brackets
-char *regex_end(char *str, char delim)
-{
-    char *p, escape;
-    for (p = str, escape = 0; *p; p++)
-        if (escape)
-            escape = 0;
-        else if (*p == '\\')
-            escape = 1;
-        else if (*p == delim)
-            return p;
-    return NULL;
-}
-
-char *make_address(Address *addr, char *str)
-{
-    if (*str == '/' || *str == '\\') { // regex
-        char delim, *end;
-        if (*str == '\\') {
-            str++;
-            if (*str == '\\' || *str == '\n') {
-                badaddr(str);
-                return NULL;
-            }
-        }
-        delim = *str;
-        if (*++str == delim) { // empty regex
-            end = str;
-            *addr = (Address){ .line = LASTRE };
-        } else {
-            end = regex_end(str, delim);
-            if (!end) {
-                badaddr(str);
-                return NULL;
-            }
-            *end = '\0';
-            addr->line = REGEX;
-            if (regcomp(&addr->regex, str, REG_NOSUB)) {
-                badaddr(str);
-                return NULL;
-            }
-        }
-        return end + 1;
-    }
-    if (*str == '$') { // last line
-        *addr = (Address) { .line = LAST };
-        return str + 1;
-    }
-    { // line number
-        char *end;
-        long line = strtol(str, &end, 10);
-        if (line == LONG_MIN || line == LONG_MAX) {
-            badaddr(str);
-            return NULL;
-        }
-        *addr = (Address){ .line = line ? line : EVERY};
-        return end;
-    }
-}
-
-char *make_range(Range *range, char *str)
-{
-    str = make_address(&range->begin, str);
-    if (str && *str == ',') {
-        if (range->begin->line == EVERY)
-            undefined("empty start of range");
-        str = make_address(&range->end, str + 1);
-        if (range->end->line == EVERY)
-            undefined("empty end of range");
-    } else
-        range->end = range->begin;
-    return str;
-}
-
-FILE *file_open(char *path, char *mode)
-{
-    FILE* file = fopen(path, mode);
-    if (!file)
-        perror(prog_name);
-    return file;
-}
-
-void file_close(FILE *file)
-{
-    if (fclose(file) == EOF)
-        perror(prog_name);
-}
-
-void print(FILE *file, char *str, int only_first)
-{
-    for (char *end = strchr(str, '\n'); end; str = end + 1, end = strchr(str, '\n')) {
-        int len = end - str + 1;
-        if (len > LINE_MAX)
-            truncating("output line", LINE_MAX);
-        printf("%.*s\n", MIN(len - 1, LINE_MAX), str);
-    }
-    if (*str) {
-        if (strlen(str) > LINE_MAX)
-            truncating("output line", LINE_MAX);
-        printf("%.*s\n", LINE_MAX, str);
-    }
-}
-
-int read_line(char *buf, size_t size, FILE* file)
-{
-    size_t len;
-    if (fgets(buf, size, file) == NULL) {
-        if (ferror(file))
-            perror(prog_name);
-        return EOF;
-    }
-    len = strlen(buf);
-    if (buf[len] == '\n') {
-        buf[len] =  '\0';
-    } else {
-        truncating("intput line", size);
-        do {
-            if (fgets(buf, size, file) == NULL) {
-                if (ferror(file))
-                    perror(prog_name);
-                return EOF;
-            }
-        } while (buf[strlen(buf)] != '\n');
-    }
-    return 0;
-}
-
-// *line == func letter
-char *read_s_argument(Command *cmd, char *line)
-{
-    char     delim = *line, *end;
-    regex_t *re = malloc(sizeof(regex_t));
-
-    if (!re) {
-        perror(prog_name);
-        return NULL;
-    }
-
-    if (!(end = regex_end(++line, delim))) { // get regex for s
-        fprintf(stderr, "bad regex in s///\n"); // FIXME: proper error message
-        return NULL;
-    }
-    *end = '\0';
-    if (regcomp(re, line, 0)) {
-        fprintf(stderr, "bad regex in s///\n");
-        return NULL;
-    }
-    cmd->other = (void*)re;
-    line = end + 1;
-    if (!(end = regex_end(line, delim))) { // get replacement text for s
-        fprintf(stderr, "bad replacement text in s///\n");
-        return NULL;
-    }
-    *end = '\0';
-    cmd->text = strdup(line);
-
-    for (char *p = end + 1; *p; p++)
-}
-
-int read_script(char *line)
-{
-    for (char *p = line; *p; p++) {
-        if (isblank(*p) || *p == ';')
-            continue;
-        pc = new_command();
-        pc->line_number = line_number;
-        p = make_range(&pc->range, p);
-        while (isblank(*p))
-            p++;
-        if (*p == '!')
-            pc->negate = 1;
-        while (*p == '!')
-            p++;
-        if (pc->negate && isblank(*p))
-            undefined("blank following !");
-        if (!cmd_funcs[*p]) {
-            fprintf(stderr, "no such function %c\n", *p);
-            return -1;
-        }
-        pc->cmd_func = cmd_funcs[*p];
-        if (strchr(takes_argument, *p))
-            p = read_argument(pc, p);
-
-        // FIXME: 0,1,2 addr
-    }
-}
-
-int read_char(FILE* file)
-{
-    int c = fgetc(file);
-    if (c == EOF && ferror(file))
-        perror(prog_name);
-    return c;
-}
-
-Command *new_command(void)
-{
-    Command **p, *new = calloc(1, sizeof(Command));
-
-    if (!new) {
-        perror(prog_name);
-        return;
-    }
-    for (p = &Program; *p; p = &(*p)->next)
-        ;
-    return (*p = new);
-}
-
-void schedule_write(char *text, int is_file)
-{
-    Sched_write **p, *new = malloc(sizeof(Sched_write));
-
-    if (!new) {
-        perror(prog_name);
-        return;
-    }
-    *new = (Sched_write){ NULL, text, is_file };
-
-    for (p = &sched_writes; *p; p = &(*p)->next)
-        ;
-    *p = new;
-}
-
-void do_sched_writes(void)
-{
-    for (Sched_write *p = sched_writes; p; p = p->next) {
-        if (p->is_file) {
-            char line[LINE_MAX];
-            FILE *file = file_open(p->text, "r");
-            if (!file)
-                continue;
-            while (read_line(line, LINE_MAX, file) != EOF)
-                print(stdout, line, 0);
-            file_close(file);
-        } else {
-            print(stdout, p->text, 0);
-        }
-    }
-}
-
-int cmd_a(Command *cmd)
-{
-    schedule_write(cmd->text, 0);
-    return CONTINUE;
-}
-
-int cmd_b(Command *cmd)
-{
-    pc = cmd->other;
-    return CONTINUE;
-}
-
-int cmd_c(Command *cmd)
-{
-    *patt_space = '\0';
-    print(stdout, cmd->text, 0);
-    return CONTINUE;
-}
-
-int cmd_d(Command *cmd)
-{
-    return NEW_NEXT;
-}
-
-int cmd_D(Command *cmd)
-{
-    char *n = strchr(patt_space, '\n');
-    if (!n)
-        return NEW_NEXT;
-    memmove(patt_space, n, strlen(n) + 1);
-    return OLD_NEXT;
-}
-
-int cmd_g(Command *cmd)
-{
-    strcpy(patt_space, hold_space);
-    return CONTINUE;
-}
-
-int cmd_G(Command *cmd)
-{
-    size_t patt_len = strlen(patt_space);
-    size_t hold_len = strlen(hold_space);
-
-    if (patt_len + hold_len + 2 >= SPACE_BYTES)
-        truncating("pattern space", SPACE_BYTES);
-
-    strlcat(patt_space, "\n"      , SPACE_BYTES);
-    strlcat(patt_space, hold_space, SPACE_BYTES);
-
-    return CONTINUE;
-}
-
-int cmd_h(Command *cmd)
-{
-    strcpy(hold_space, patt_space);
-    return CONTINUE;
-}
-
-int cmd_H(Command *cmd)
-{
-    size_t patt_len = strlen(patt_space);
-    size_t hold_len = strlen(hold_space);
-
-    if (patt_len + hold_len + 2 >=  SPACE_BYTES)
-        truncating("hold space", SPACE_BYTES);
-
-    strlcat(hold_space, "\n"      , SPACE_BYTES);
-    strlcat(hold_space, patt_space, SPACE_BYTES);
-
-    return 0;
-}
-
-int cmd_i(Command *cmd)
-{
-    print(stdout, cmd->text, 0);
-    return CONTINUE;
-}
-
-int cmd_l(Command *cmd)
-{
-    return CONTINUE;
-}
-
-int cmd_n(Command *cmd)
-{
-    if (!flags.no_print)
-        print(stdout, patt_space, 0);
-
-    return NEW_LINE;
-}
-
-int cmd_N(Command *cmd)
-{
-    return APP_LINE;
-}
-
-int cmd_p(Command *cmd)
-{
-    print(stdout, patt_space, 0);
-    return CONTINUE;
-}
-
-int cmd_P(Command *cmd)
-{
-    print(stdout, patt_space, 1);
-    return CONTINUE;
-}
-
-int cmd_q(Command *cmd)
-{
-    return QUIT;
-}
-
-int cmd_r(Command *cmd)
-{
-    schedule_write(cmd->text, 1);
-    return CONTINUE;
-}
-
-int cmd_t(Command *cmd)
-{
-    if (flags.did_sub)
-        pc = cmd->other;
-    return CONTINUE;
-}
-
-int cmd_w(Command *cmd)
-{
-    FILE *f = file_open(cmd->text, "a");
-
-    if (f) {
-        print(f, patt_space, 0);
-        file_close(f);
-    }
-    return CONTINUE;
-}
-
-int cmd_x(Command *cmd)
-{
-    char *tmp  = hold_space;
-    hold_space = patt_space;
-    patt_space = tmp;
-    return CONTINUE;
-}
-
-int cmd_y(Command *cmd)
-{
-    return CONTINUE;
-}
-
-int cmd_colon(Command *cmd)
-{
-    return CONTINUE;
-}
-
-int cmd_equal(Command *cmd)
-{
-    printf("%zu\n", line_number);
-    return CONTINUE;
-}
-
-int cmd_lbrace(Command *cmd)
-{
-    return CONTINUE;
-}
-
-int cmd_rbrace(Command *cmd)
-{
-    return CONTINUE;
-}
-
-int main(int argc, char **argv)
-{
-    // -e script, -f file, -n
-    int c;
-
-    while ((c = getopt(argc, argv, ":e:f:n")) != -1) {
-        switch (c) {
-            case 'n': flags.no_print = 1; break;
-            case 'e': break; // FIXME: read script
-            case 'f': break; // FIXME: read script
-            case ':': fprintf(stderr, "Option -%c requires an operand\n", optopt); break;
-            case '?': fprintf(stderr, "Unrecognized option: -%c\n"      , optopt); break;
-        }
-    }
-
-    // FIXME: die/usage if getopt errors
-    for (; optind < argc; optind++) {
-        // FIXME: if no -e and no -f first arg here is program
-        // FIXME: files to work on
-    }
-
-    return 0;
-}

sed2.c

-#define _POSIX_C_SOURCE 200809L
-#include <ctype.h>
-#include <errno.h>
-#include <limits.h>
-#include <regex.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#define MAX_WFILES  10   // minimum required by POSIX
-#define LABEL_BYTES 9    // minimum required by POSIX + '\0'
-#define SPACE_BYTES 8192 // minimum required by POSIX
-
-#define USE(a) ((void)(a)) // to escape unused warnings
-#define serror() fprintf(stderr, "%s: %s,%s,%d: %s\n", prog_name, __FILE__, __func__, __LINE__, strerror(errno))
-#define warn(...) do{ fprintf(stderr, "%s: %zu: ", prog_name, line_number); fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); }while(0)
-
-typedef enum {
-    CONTINUE, // continue execution
-    NEW_LINE, // read new line, continue current cycle
-    APP_LINE, // append new line, continue current cycle
-    NEW_NEXT, // move to new cycle, read new line
-    OLD_NEXT, // move to new cycle, reuse pattern space
-    QUIT    , // do not start a new cycle
-    UNIMP   , // unimplemented feature
-    ERROR   , // he's dead Jim
-} Action;
-
-typedef struct {
-    enum {
-        IGNORE, // empty address, ignore
-        EVERY , // every line
-        LINE  , // line number
-        LAST  , // last line ($)
-        REGEX , // use included regex
-        LASTRE, // use most recently used regex
-    } type;
-    union {
-        size_t  line_number;
-        regex_t regex;
-    };
-} Address;
-
-// naddr == 0 iff beg.type == EVERY  && end.type == IGNORE
-// naddr == 1 iff beg.type != IGNORE && end.type == IGNORE
-// naddr == 2 iff beg.type != IGNORE && end.type != IGNORE
-typedef struct {
-    unsigned naddr; // not necessary, but helpful
-    Address  beg;
-    Address  end;
-} Range;
-
-typedef Action (*Sedfunc)(void);
-typedef struct {
-    Range   range;
-    Sedfunc func;
-    void   *extra;
-    unsigned char negate;
-} Command;
-
-typedef char  *(*Argfunc)(Command*, char*);
-typedef struct {
-    char          letter;
-    Sedfunc       func;
-    Argfunc       getarg;
-    unsigned char naddr    :2; // max address function takes
-    unsigned char semicolon:1; // can this command be followed by a semicolon?
-} Sedfunc_info;
-
-typedef struct {
-    regex_t       regex;
-    char         *replace;
-    FILE         *file;
-    unsigned      occurrence;
-    unsigned char flag_p:1;
-    unsigned char flag_g:1;
-} Extra_s;
-
-typedef struct {
-    char  *text;
-    size_t size;
-} Extra_aci;
-
-struct {
-    char *name;
-    FILE *file;
-} wfiles[MAX_WFILES];
-
-#define unimp_func(c) Action cmd_##c(void) { return UNIMP; }
-unimp_func(a)
-unimp_func(b)
-unimp_func(c)
-unimp_func(d)
-unimp_func(D)
-unimp_func(g)
-unimp_func(G)
-unimp_func(h)
-unimp_func(H)
-unimp_func(i)
-unimp_func(l)
-unimp_func(n)
-unimp_func(N)
-unimp_func(p)
-unimp_func(P)
-unimp_func(q)
-unimp_func(r)
-unimp_func(s)
-unimp_func(t)
-unimp_func(w)
-unimp_func(x)
-unimp_func(y)
-unimp_func(colon)
-unimp_func(equal)
-unimp_func(lbrace)
-unimp_func(rbrace)
-
-char *get_aci_arg  (Command*, char*);
-char *get_bt_arg   (Command*, char*);
-char *get_r_arg    (Command*, char*);
-char *get_s_arg    (Command*, char*);
-char *get_w_arg    (Command*, char*);
-char *get_y_arg    (Command*, char*);
-char *get_colon_arg(Command*, char*);
-
-Sedfunc_info funcs[] = {
-            //|Command letter
-            //|    |Command function
-            //|    |           |Get argument function
-            //|    |           |              |Max addresses
-            //|    |           |              |  |Can be followed by a semicolon
-            //|    |           |              |  |       |Argument
-            //|    |           |              |  |       |               |Explanation
-    ['a'] = { 'a', cmd_a     , get_aci_arg  , 1, 0, },// char    *text : schedule write of text for later
-    ['b'] = { 'b', cmd_b     , get_bt_arg   , 2, 0, },// Command *label: branch to label (extra holds char *label while building, Command *label while running)
-    ['c'] = { 'c', cmd_c     , get_aci_arg  , 2, 0, },// char    *text : delete pattern space, at 0 or 1 addr or end of 2 addr, write text
-    ['d'] = { 'd', cmd_d     , NULL         , 2, 1, },//               : delete pattern space
-    ['D'] = { 'D', cmd_D     , NULL         , 2, 1, },//               : delete to first newline and start new cycle without reading (if no newline, d)
-    ['g'] = { 'g', cmd_g     , NULL         , 2, 1, },//               : replace pattern space with hold space
-    ['G'] = { 'G', cmd_G     , NULL         , 2, 1, },//               : append newline and hold space to pattern space
-    ['h'] = { 'h', cmd_h     , NULL         , 2, 1, },//               : replace hold space with pattern space
-    ['H'] = { 'H', cmd_H     , NULL         , 2, 1, },//               : append newline and pattern space to hold space
-    ['i'] = { 'i', cmd_i     , get_aci_arg  , 1, 0, },// char    *text : write text
-    ['l'] = { 'l', cmd_l     , NULL         , 2, 1, },//               : write pattern space in 'visually unambiguous form'
-    ['n'] = { 'n', cmd_n     , NULL         , 2, 1, },//               : write pattern space (unless -n) read to replace pattern space (if no input, quit)
-    ['N'] = { 'N', cmd_N     , NULL         , 2, 1, },//               : append to pattern space separated by newline, line number changes (if no input, quit)
-    ['p'] = { 'p', cmd_p     , NULL         , 2, 1, },//               : write pattern space
-    ['P'] = { 'P', cmd_P     , NULL         , 2, 1, },//               : write pattern space up to first newline
-    ['q'] = { 'q', cmd_q     , NULL         , 1, 1, },//               : quit
-    ['r'] = { 'r', cmd_r     , get_r_arg    , 1, 0, },// char    *file : write contents of file (unable to open/read treated as empty file)
-    ['s'] = { 's', cmd_s     , get_s_arg    , 2, 1, },// Extra_s *ext  : find/replace/all that crazy s stuff
-    ['t'] = { 't', cmd_t     , get_bt_arg   , 2, 0, },// Command *label: if s/// succeeded (since input or last t) brance to label (end if no label)
-    ['w'] = { 'w', cmd_w     , get_w_arg    , 2, 0, },// FILE    *file : append pattern space to file
-    ['x'] = { 'x', cmd_x     , NULL         , 2, 1, },//               : exchange pattern and hold spaces
-    ['y'] = { 'y', cmd_y     , get_y_arg    , 2, 1, },// char    *sets : replace characters in set1 with characters in set2 (sets is two adjacent strings)
-    [':'] = { ':', cmd_colon , get_colon_arg, 0, 0, },// char    *label: defines label for later b and t commands
-    ['='] = { '=', cmd_equal , NULL         , 1, 1, },//               : printf("%d\n", line_number);
-    ['{'] = { '{', cmd_lbrace, NULL         , 2, 0, },// Command *close: if we match, run commands, otherwise jump to close
-    ['}'] = { '}', cmd_rbrace, NULL         , 0, 0, },// Command *open : noop, hold onto open for ease of building scripts
-
-    [CHAR_MAX] = { 0, NULL, NULL, 0, 0 }
-};
-
-struct {
-    unsigned char n       :1; // -n (no print)
-    unsigned char s       :1; // s/// happened
-    unsigned char aci_cont:1; // a,c,i text continuation
-} gflags;
-
-char     space1[SPACE_BYTES],  space2[SPACE_BYTES];
-char    *patt_space = space1, *hold_space = space2;
-
-char    *prog_name;      // argv[0]
-Command *prog, *pc, *ni; // program memory, program counter, next instruction (while creating program)
-size_t   prog_size;      // number of Commands in prog
-
-size_t   line_number;    // of script when building, of input when running
-
-// given memory pointed to by *ptr that currently holds *nmemb members of size
-// size, realloc to hold new_nmemb members, return new_nmemb in *memb and one
-// past old end in *next. if clear is nonzero clear new memory. if realloc fails
-// change nothing. (should work to shrink, too...)
-int grow(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next, int clear)
-{
-	void *n, *tmp = realloc(*ptr, new_nmemb * size);
-	if (!tmp)
-        return -1;
-    n = (char *)*ptr + *nmemb * size;
-	if (clear && new_nmemb > *nmemb)
-        memset(n, 0, (new_nmemb - *nmemb) * size);
-    *nmemb = new_nmemb;
-	*ptr   = tmp;
-    if (next)
-        *next = n;
-	return 0;
-}
-
-char *chomp(char *str) {
-    for (; *str && isblank(*str); str++)
-        ;
-    return str;
-}
-
-// Find first non escaped instance of delim in str
-// TODO: ignore delim in [], \(\), and maybe \{\}
-char *find_delim(char *str, char delim) {
-    unsigned escape;
-
-    for (char *p = str; *p; p++)
-        if (escape)
-            escape = 0;
-        else if (*p == '\\')
-            escape = 1;
-        else if (*p == delim)
-            return p;
-    return NULL;
-}
-
-// Read the first address from str into addr and return pointer to character 1
-// past end of address or NULL on error
-char *make_address(Address *addr, char *str) {
-    char *end = NULL;
-
-    if (*str == '$') {
-        addr->type = LAST;
-        end = str + 1;
-    } else if (isdigit(*str)) { // line number
-        // TODO: strtol func so I don't repeat it
-        long num = strtol(str, &end, 10);
-        if (num == LONG_MAX)
-            serror();
-        if (num == 0) {
-            warn("unsupported address: 0");
-            return NULL;
-        }
-        *addr = (Address){ .type = LINE, .line_number = num };
-    } else if (*str == '/') { // TODO: \c any delimiter. make_regex() ?
-        if (*(str + 1) == '/') {
-            addr->type = LASTRE;
-            end = str + 2;
-        } else if (!(end = find_delim(str + 1, *str))) {
-            warn("unclosed regex: %s", str);
-        } else {
-            int err;
-            *end = '\0';
-            addr->type = REGEX;
-            if ((err = regcomp(&addr->regex, str + 1, REG_NOSUB))) {
-                char msg[128]; // TODO: size?
-                regerror(err, &addr->regex, msg, sizeof(msg));
-                warn("bad regex: %s: %s", str, msg);
-                end = NULL;
-            }
-        }
-    } else {
-        addr->type = EVERY;
-        end = str;
-    }
-    return end;
-}
-
-// Read the first range from str in range and return pointer to character 1
-// past end of range or NULL on error
-char *make_range(Range *range, char *str) {
-    char *p = str;
-
-    if (!(p = make_address(&range->beg, p)))
-        return NULL;
-
-    if (*p != ',')
-        range->end.type = IGNORE;
-    else if (!(p = make_address(&range->end, p + 1)))
-        return NULL;
-
-    if      (range->beg.type == EVERY  && range->end.type == IGNORE)
-        range->naddr = 0;
-    else if (range->beg.type != IGNORE && range->end.type == IGNORE)
-        range->naddr = 1;
-    else if (range->beg.type != IGNORE && range->end.type != IGNORE)
-        range->naddr = 2;
-    else { // This should never happen right?
-        warn("bad range: %s", str);
-        return NULL;
-    }
-
-    return p;
-}
-
-// Read the first argument from str into cmd->extra and return pointer to 1
-// past end of argument or NULL on error
-// On arguments that are whole line, return a pointer to any '\0'
-char *ge