Commits

Jason Moiron committed 10ca238

add code to filebench

  • Participants

Comments (0)

Files changed (6)

+
+CC=gcc
+OBJS=filetest test.o
+#OBJS=filetest memcachedtest test.o
+CFLAGS=-Wall -pedantic -std=gnu99  -g
+LIBS=-lm -pthread
+# -c = object only, -lm links math
+LIBFLAGS=-c
+MCSRC=-I./
+MCLIB=./libmemcached/libmemcached.la 
+#-lmemcached
+
+all: filetest
+
+objects: test.c
+	$(CC) $(LIBFLAGS) $(CFLAGS) test.c
+
+filetest: filetest.c objects
+	$(CC) $(CFLAGS) $(LIBS) filetest.c test.o -o filetest
+
+#memcachedtest: memcachedtest.c
+#	libtool --mode=link $(CC) $(CFLAGS) $(LIBS) $(MCSRC) memcachedtest.c test.o $(MCLIB) -o memcachedtest
+
+clean:
+	rm -f $(OBJS)
+/* for lseek64 */
+#define _LARGEFILE64_SOURCE
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <ctype.h>
+
+#include <sys/time.h>
+#include <time.h>
+
+#include <sys/types.h>
+#include <pthread.h>
+
+#include "test.h"
+
+/* This simple test is one of two that are built to demonstrate that absolute
+ * latency is not the only measure of speed;  that latency under load is very
+ * important as well.
+ *
+ * The hypothesis is that when doing a small disk read under low system load,
+ * the disk can very well be faster than doing a network RTT.  But when doing
+ * many reads, the disk slows down whereas loading from network memory remains
+ * fast.
+ *
+ * In addition, although threads will read in roughly the same blocks of the
+ * disk (to mimic the load of a webserver), the threads will _start_ in
+ * randomized portions of the disk, which will increase seeks and hurt
+ * performance.
+ */
+
+#define MAX_THREADS 512
+
+typedef long long unsigned int llui;
+
+struct options {
+    int verbose;
+    int blocksize;
+    int threads;
+    int reads;
+    char *device;
+};
+
+struct worker_arg {
+    struct options *opts;
+    uint64_t pos;
+    uint64_t maxsize;
+    int num;
+};
+
+void set_defaults(struct options *opts);
+void do_options(struct options *opts, int argc, char **argv);
+void do_test(struct options *opts);
+void *worker(void *arg);
+
+struct timeval *stats;
+
+int main(int argc, char **argv) {
+    struct options opts;
+    
+    set_defaults(&opts);
+    do_options(&opts, argc, argv);
+    stats = (struct timeval *)calloc(opts.threads, sizeof(struct timeval));
+    do_test(&opts);
+    return 0;
+}
+
+void err_exit(const char *string) {
+    perror(string);
+    exit(-1);
+}
+
+void do_test(struct options *opts) {
+    int fd, i;
+    uint64_t numblocks=0, bytes=0;
+    struct timeval t0, t1, *avg, *min, *max, *stdd;
+    struct worker_arg *a;
+    pthread_t *threads;
+
+    if ((fd = open(opts->device, O_RDONLY)) < 0) {
+        err_exit("open");
+    }
+    if (ioctl(fd, BLKGETSIZE, &numblocks) == -1) {
+        err_exit("ioctl");
+    }
+    bytes = numblocks<<9;
+    if (opts->verbose) {
+        printf("Benchmarking %s [%llu MB, %llu bytes]\n", opts->device, 
+           (llui)(numblocks / 2048), (llui)bytes);
+    }
+
+    threads = (pthread_t *)calloc(opts->threads, sizeof(pthread_t));
+
+    /* get t0 and seed the random number generator */
+    gettimeofday(&t0, NULL);
+    srand(t0.tv_usec);
+    for(i=0; i < opts->threads; i++) {
+        a = (struct worker_arg *)calloc(1, sizeof(struct worker_arg));
+        a->opts = opts;
+        a->pos = (random() * numblocks / RAND_MAX) << 9;
+        a->maxsize = bytes;
+        a->num = i;
+        /* bounds check */
+        if (a->pos + a->opts->blocksize > bytes) {
+            a->pos -= a->opts->blocksize;
+        }
+        pthread_create(&threads[i], NULL, worker, (void *)a);
+    }
+
+    for(i=0; i < opts->threads; i++) {
+        if((void *)threads[i] != NULL) {
+            pthread_join(threads[i], NULL);
+        }
+    }
+
+    delta(&t0, NULL, &t1);
+
+    avg = average_time(stats, opts->threads);
+    min = min_time(stats, opts->threads);
+    max = max_time(stats, opts->threads);
+    stdd = stddev_time(stats, opts->threads);
+    if (opts->verbose) {
+        printf("tot: %d.%06d, ", (int)t1.tv_sec, (int)t1.tv_usec);
+        printf("avg: %d.%06d, ", (int)avg->tv_sec, (int)avg->tv_usec);
+        printf("min: %d.%06d, ", (int)min->tv_sec, (int)min->tv_usec);
+        printf("max: %d.%06d, ", (int)max->tv_sec, (int)max->tv_usec);
+        printf("stddev: %d.%06d\n", (int)stdd->tv_sec, (int)stdd->tv_usec);
+    } else {
+        printf("%d.%06d, %d.%06d, %d.%06d, %d.%06d, %d.%06d\n",
+                (int)t1.tv_sec, (int)t1.tv_usec,
+                (int)avg->tv_sec, (int)avg->tv_usec,
+                (int)min->tv_sec, (int)min->tv_usec,
+                (int)max->tv_sec, (int)max->tv_usec,
+                (int)stdd->tv_sec, (int)stdd->tv_usec);
+    }
+    free(avg);
+    free(min);
+    free(max);
+    free(stdd);
+}
+
+void *worker(void *arg) {
+    /* int blocksize; int threads; int reads; int pos;*/
+    int fd, i;
+    long int r;
+    uint64_t location;
+    char *buffer;
+    struct worker_arg *w = (struct worker_arg *)arg;
+    struct timeval t0, t1;
+
+    buffer = (char *)calloc(w->opts->blocksize + 1, sizeof(char));
+
+    fd = open(w->opts->device, O_RDONLY);
+    if((lseek64(fd, (off64_t)w->pos, SEEK_SET)) == -1) {
+        err_exit("lseek64");
+    }
+    location = w->pos;
+    gettimeofday(&t0, NULL);
+    /* TODO: make sure we don't try to read past the end of the device.. */
+    for(i=0; i < w->opts->reads; i++) {
+        if(read(fd, buffer, w->opts->blocksize) < 0) {
+            err_exit("read");
+        }
+        /* we want to seek a random distance from the current position */
+        r = random();
+        /* if r is odd, make it a backwards seek */
+        if (r & 1) r = -r;
+        /* increment our current location and do bounds checking */
+        location += r;
+        if(location < 0 || location > (w->maxsize - w->opts->blocksize)) {
+            r = -r;
+            location += 2 * r;
+        }
+        lseek64(fd, (off64_t)r, SEEK_CUR); 
+    }
+    delta(&t0, NULL, &t1);
+    if (w->opts->verbose) {
+        printf("%3d read %d bytes in %d.%06d (%d reads, offset %7llu MB)\n", w->num, 
+                w->opts->blocksize * w->opts->reads, (int)t1.tv_sec, (int)t1.tv_usec, 
+                w->opts->reads, (llui)(w->pos / 1048576));
+    }
+    memcpy(&stats[w->num], &t1, sizeof(struct timeval));
+    free(arg);
+    return NULL;
+}
+
+/* help and options */
+
+void print_help(int more) {
+    printf("options: -v/-h, -n <nthreads>, -b <blocksize>, -r <reads> [device]\n");
+    if (more) {
+        printf("\n"
+        "Filetest does <reads> sequential reads in <nthreads> simultaneous threads to \n"
+        "simulate a webserver's load were it to use disk for cache.  Each read will\n"
+        "be <blocksize> bytes and occur somewhere within [device] (default: /dev/sda)\n"
+        "This is mostly to illustrate the point that networked RAM is faster than Disk.\n"
+        "By default, it outputs (total, avg(mean), min, max, standard_dev)\n");
+    }
+}
+
+/* defaults: 64 threads, 32 byte blocks, /dev/sda */
+
+void set_defaults(struct options *opts) {
+    opts->verbose = 0;
+    opts->blocksize = 128;
+    opts->threads = 64;
+    opts->reads = 100;
+    opts->device = (char *)calloc(strlen("/dev/sda"), sizeof(char));
+    strcpy(opts->device, "/dev/sda");
+}
+
+/* Handle the options for the main filtesting program. */
+
+void do_options(struct options *opts, int argc, char **argv) {
+    char c;
+    while((c = getopt(argc, argv, "hvb:r:n:")) != -1) {
+        switch(c) {
+            case 'h':
+                print_help(1);
+                exit(0);
+            case 'v':
+                opts->verbose = 1;
+                break;
+            case 'b':
+                opts->blocksize = strtol(optarg, (char **)NULL, 10);
+                break;
+            case 'r':
+                opts->reads = strtol(optarg, (char **)NULL, 10);
+                break;
+            case 'n':
+                opts->threads = strtol(optarg, (char **)NULL, 10);
+                if (opts->threads > MAX_THREADS) {
+                    printf("Error: max threads is %d (attempted %d)\n", MAX_THREADS, opts->threads);
+                    exit(-1);
+                }
+                break;
+            default:
+                print_help(0);
+                exit(0);
+        }
+    }
+    if (optind < argc) {
+        free(opts->device);
+        opts->device = (char *)calloc(strlen(argv[optind])+1, sizeof(char));
+        strncpy(opts->device, argv[optind], strlen(argv[optind]));
+    }
+    if (opts->verbose) {
+        printf("verbose: %d\nblocksize: %d\nthreads: %d\ndevice: %s\n",
+                opts->verbose, opts->blocksize, opts->threads, opts->device);
+    }
+}
+

File memcachedtest.c

+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <ctype.h>
+
+#include <sys/time.h>
+#include <time.h>
+
+#include <sys/types.h>
+#include <pthread.h>
+#include <libmemcached/memcached.h>
+
+#include "test.h"
+
+/* This simple test is two of two that are built to demonstrate that absolute
+ * latency is not the only measure of speed;  that latency under load is very
+ * important as well.
+ *
+ * The hypothesis is that when doing a small disk read under low system load,
+ * the disk can very well be faster than doing a network RTT.  But when doing
+ * many reads, the disk slows down whereas loading from network memory remains
+ * fast.
+ */
+
+#define MAX_THREADS 512
+
+struct options {
+    int verbose;
+    int blocksize;
+    int threads;
+    int reads;
+    char *host;
+    int port;
+};
+
+struct worker_arg {
+    struct options *opts;
+    uint64_t pos;
+    uint64_t maxsize;
+    int num;
+};
+
+void set_defaults(struct options *opts);
+void do_options(struct options *opts, int argc, char **argv);
+void do_test(struct options *opts);
+void *worker(void *arg);
+
+struct timeval *stats;
+
+int main(int argc, char **argv) {
+    struct options opts;
+    set_defaults(&opts);
+    do_options(&opts, argc, argv);
+    stats = (struct timeval *)calloc(opts.threads, sizeof(struct timeval));
+    do_test(&opts);
+    return 0;
+}
+
+void err_exit(const char *string) {
+    perror(string);
+    exit(-1);
+}
+
+void do_test(struct options *opts) {
+    int fd, i;
+    uint64_t numblocks=0, bytes=0;
+    struct timeval t0, t1, *avg, *min, *max, *stdd;
+    struct worker_arg *a;
+    pthread_t *threads;
+
+    /* memcached stuff */
+    memcached_return rc;
+    memcached_server_st *servers;
+    memcached_st *memc = memcached_create(NULL);
+
+    servers = memcached_server_list_append(NULL, opts->host, opts->port, &rc);
+    rc = memcached_server_push(memc, servers);
+
+
+    /* eventually */
+    /*
+    memcached_server_free(servers);
+    memcached_free(memc);
+    */
+
+    if (opts->verbose) {
+        printf("Benchmarking %s [%llu MB, %llu bytes]\n", opts->host, 
+            (numblocks / 2048), bytes);
+    }
+
+    threads = (pthread_t *)calloc(opts->threads, sizeof(pthread_t));
+
+    /* get t0 and seed the random number generator */
+    gettimeofday(&t0, NULL);
+    srand(t0.tv_usec);
+    for(i=0; i < opts->threads; i++) {
+        a = (struct worker_arg *)calloc(1, sizeof(struct worker_arg));
+        a->opts = opts;
+        a->pos = (random() * numblocks / RAND_MAX) << 9;
+        a->maxsize = bytes;
+        a->num = i;
+        /* bounds check */
+        if (a->pos + a->opts->blocksize > bytes) {
+            a->pos -= a->opts->blocksize;
+        }
+        pthread_create(&threads[i], NULL, worker, (void *)a);
+    }
+
+    for(i=0; i < opts->threads; i++) {
+        if(threads[i] != NULL) {
+            pthread_join(threads[i], NULL);
+        }
+    }
+    if(i != opts->threads) {
+        printf("Warning: You asked for %d threads, but only %d were created.\n",
+                opts->threads, i);
+        opts->threads = i;
+    }
+
+    delta(&t0, NULL, &t1);
+
+    avg = average_time(stats, opts->threads);
+    min = min_time(stats, opts->threads);
+    max = max_time(stats, opts->threads);
+    stdd = stddev_time(stats, opts->threads);
+    if (opts->verbose) {
+        printf("tot: %d.%06d, ", (int)t1.tv_sec, (int)t1.tv_usec);
+        printf("avg: %d.%06d, ", (int)avg->tv_sec, (int)avg->tv_usec);
+        printf("min: %d.%06d, ", (int)min->tv_sec, (int)min->tv_usec);
+        printf("max: %d.%06d, ", (int)max->tv_sec, (int)max->tv_usec);
+        printf("stddev: %d.%06d\n", (int)stdd->tv_sec, (int)stdd->tv_usec);
+    } else {
+        printf("%d.%06d, %d.%06d, %d.%06d, %d.%06d, %d.%06d\n",
+                (int)t1.tv_sec, (int)t1.tv_usec,
+                (int)avg->tv_sec, (int)avg->tv_usec,
+                (int)min->tv_sec, (int)min->tv_usec,
+                (int)max->tv_sec, (int)max->tv_usec,
+                (int)stdd->tv_sec, (int)stdd->tv_usec);
+    }
+    free(avg);
+    free(min);
+    free(max);
+    free(stdd);
+}
+
+void *worker(void *arg) {
+    /* int blocksize; int threads; int reads; int pos;*/
+    int fd, i;
+    long int r;
+    uint64_t location;
+    char *buffer;
+    struct worker_arg *w = (struct worker_arg *)arg;
+    struct timeval t0, t1;
+
+    gettimeofday(&t0, NULL);
+    /* TODO: make sure we don't try to read past the end of the device.. */
+    for(i=0; i < w->opts->reads; i++) {
+        /* we want to seek a random distance from the current position */
+        r = random();
+    }
+    delta(&t0, NULL, &t1);
+    if (w->opts->verbose) {
+        printf("%3d read %d bytes in %d.%06d (%d reads, offset %7llu MB)\n", w->num, 
+                w->opts->blocksize * w->opts->reads, (int)t1.tv_sec, (int)t1.tv_usec, 
+                w->opts->reads, w->pos / 1048576);
+    }
+    memcpy(&stats[w->num], &t1, sizeof(struct timeval));
+    free(arg);
+    return NULL;
+}
+
+/* help and options */
+
+void print_help(int more) {
+    printf("options: -v/-h, -n <nthreads>, -b <blocksize>, -r <reads> [device]\n");
+    if (more) {
+        printf("\n"
+        "Filetest does <reads> sequential reads in <nthreads> simultaneous threads to \n"
+        "simulate a webserver's load were it to use disk for cache.  Each read will\n"
+        "be <blocksize> bytes and occur somewhere within [device] (default: /dev/sda)\n"
+        "This is mostly to illustrate the point that networked RAM is faster than Disk.\n"
+        "By default, it outputs (total, avg(mean), min, max, standard_dev)\n");
+    }
+}
+
+/* defaults: 64 threads, 32 byte blocks, /dev/sda */
+
+void set_defaults(struct options *opts) {
+    opts->verbose = 0;
+    opts->blocksize = 128;
+    opts->threads = 64;
+    opts->reads = 100;
+    opts->host = NULL;
+    opts->port = 11211;
+}
+
+/* Handle the options for the main filtesting program. */
+
+void do_options(struct options *opts, int argc, char **argv) {
+    char c;
+    while((c = getopt(argc, argv, "hvb:r:n:p:")) != -1) {
+        switch(c) {
+            case 'h':
+                print_help(1);
+                exit(0);
+            case 'v':
+                opts->verbose = 1;
+                break;
+            case 'b':
+                opts->blocksize = strtol(optarg, (char **)NULL, 10);
+                break;
+            case 'r':
+                opts->reads = strtol(optarg, (char **)NULL, 10);
+                break;
+            case 'p':
+                opts->port = strtol(optarg, (char **)NULL, 11211);
+                break;
+            case 'n':
+                opts->threads = strtol(optarg, (char **)NULL, 10);
+                if (opts->threads > MAX_THREADS) {
+                    printf("Error: max threads is %d (attempted %d)\n", MAX_THREADS, opts->threads);
+                    exit(-1);
+                }
+                break;
+            default:
+                print_help(0);
+                exit(0);
+        }
+    }
+    if (optind < argc) {
+        opts->host = (char *)calloc(strlen(argv[optind])+1, sizeof(char));
+        strncpy(opts->host, argv[optind], strlen(argv[optind]));
+    }
+    if (opts->verbose) {
+        printf("verbose: %d\nblocksize: %d\nthreads: %d\nhost: %s\n",
+                opts->verbose, opts->blocksize, opts->threads, opts->host);
+    }
+}
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <string.h>
+#include <math.h>
+
+#include "test.h"
+
+/* Prints the current time w/ usec accuracy */
+
+void print_time(struct timeval *t) {
+    int alloced = 0;
+    if (t == NULL) {
+        t = (struct timeval *)calloc(1, sizeof(struct timeval));
+        gettimeofday(t, NULL);
+        alloced = 1;
+    }
+    printf("%d.%06d\n", (int)t->tv_sec, (int)t->tv_usec);
+    if (alloced) {
+        free(t);
+    }
+}
+
+/* Prints the delta between two timevals.  If t1 is NULL, then it takes
+ * the delta between t0 and the current time.
+ */
+
+void print_delta(struct timeval *t0, struct timeval *t1) {
+    struct timeval d;
+    delta(t0, t1, &d);
+    print_time(&d);
+}
+
+
+/* Performs a delta of two timevals, storing the result in d.
+ * If t1 is NULL, it uses current time.  Note that this is a fancy
+ * way of saying d = t1 - t0.
+ */
+
+void delta(struct timeval *t0, struct timeval *t1, struct timeval *d) {
+    int alloced = 0;
+    time_t secs, usecs;
+    if(t1 == NULL) {
+        t1 = (struct timeval *)calloc(1, sizeof(struct timeval));
+        gettimeofday(t1, NULL);
+        alloced = 1;
+    }
+    secs = t1->tv_sec - t0->tv_sec;
+    usecs = t1->tv_usec - t0->tv_usec;
+
+    if (usecs < 0) {
+        usecs = (t1->tv_usec + 1000000) - t0->tv_usec;
+        secs -= 1;
+    }
+    d->tv_sec = secs;
+    d->tv_usec = usecs;
+    if (alloced) {
+        free(t1);
+    }
+}
+
+double tv_to_double(struct timeval *tv) {
+    return (double)(tv->tv_sec + tv->tv_usec / 1000000.0);
+}
+
+/* calculate the total time for a list of timevals */
+
+struct timeval *total_time(struct timeval *tv, int len) {
+    int i;
+    struct timeval *ret;
+    struct timeval c;
+
+    memset(&c, 0, sizeof(struct timeval));
+    ret = (struct timeval *)calloc(1, sizeof(struct timeval));
+
+    for(i=0; i < len; i++) {
+        c.tv_sec += tv[i].tv_sec;
+        c.tv_usec += tv[i].tv_usec;
+        if (c.tv_usec >= 1000000) {
+            c.tv_sec += 1;
+            c.tv_usec = 1000000 - c.tv_usec;
+        }
+    }
+    memcpy(ret, &c, sizeof(struct timeval));
+    return ret;
+}
+
+/* calculate the average (mean) time for a list of timevals */
+
+struct timeval *average_time(struct timeval *tv, int len) {
+    struct timeval *total;
+    struct timeval *ret;
+    double avg;
+    
+    ret = (struct timeval *)calloc(1, sizeof(struct timeval));
+    total = total_time(tv, len);
+
+    avg = tv_to_double(total) / (float)len;
+    free(total);
+
+    ret->tv_sec = (int)avg;
+    ret->tv_usec = (int)((avg - (int)avg) * 1000000);
+    return ret;
+}
+
+/* calculate the min time for a list of timevals */
+
+struct timeval *min_time(struct timeval *tv, int len) {
+    int i;
+    struct timeval *min;
+
+    min = (struct timeval *)calloc(1, sizeof(struct timeval));
+    /* init min to first timeval */
+    memcpy(min, tv, sizeof(struct timeval));
+
+    for(i=1; i < len; i++) {
+        /* from second timeval, set min if tv is lower */
+        if ((min->tv_sec > tv[i].tv_sec) ||
+            (min->tv_sec == tv[i].tv_sec && min->tv_usec > tv[i].tv_usec)) {
+            memcpy(min, &tv[i], sizeof(struct timeval));
+        }
+    }
+    return min;
+}
+
+/* calculate the max time for a list of timevals */
+
+struct timeval *max_time(struct timeval *tv, int len) {
+    int i;
+    struct timeval *max;
+
+    max = (struct timeval *)calloc(1, sizeof(struct timeval));
+    /* init max to first timeval */
+    memcpy(max, tv, sizeof(struct timeval));
+
+    for(i=1; i < len; i++) {
+        /* from second timeval, set max if tv is higher */
+        if ((max->tv_sec < tv[i].tv_sec) || 
+            (max->tv_sec == tv[i].tv_sec && max->tv_usec < tv[i].tv_usec)) {
+            memcpy(max, &tv[i], sizeof(struct timeval));
+        }
+    }
+    return max;
+}
+
+/* calculate the standard deviation for a list of timevals */
+
+struct timeval *stddev_time(struct timeval *tv, int len) {
+    int i;
+    struct timeval tmp, *mean, *stddev;
+    double *acc, tot, avg, stddevf;
+
+    stddev = (struct timeval *)calloc(1, sizeof(struct timeval));
+    acc = (double *)calloc(len, sizeof(double));
+    mean = average_time(tv, len);
+
+    /* see: http://en.wikipedia.org/wiki/Standard_deviation */
+
+    for(i=0; i < len; i++) {
+        delta(mean, &tv[i], &tmp);
+        acc[i] = tv_to_double(&tmp);
+        acc[i] *= acc[i];
+    }
+    for(i=0; i < len; i++) {
+        tot += acc[i];
+    }
+    avg = tot / len;
+    stddevf = sqrt(avg);
+    
+    stddev->tv_sec = (int)stddevf;
+    stddev->tv_usec = (int)((stddevf - (int)stddevf) * 1000000);
+    return stddev;
+}
+
+
+/* test.c interface */
+
+void print_time(struct timeval *t);
+void print_delta(struct timeval *t0, struct timeval *t1);
+
+/* functions to take care of time & statistics */
+
+void delta(struct timeval *t0, struct timeval *t1, struct timeval *d);
+
+struct timeval *total_time(struct timeval *tv, int len);
+struct timeval *average_time(struct timeval *tv, int len);
+struct timeval *min_time(struct timeval *tv, int len);
+struct timeval *max_time(struct timeval *tv, int len);
+struct timeval *stddev_time(struct timeval *tv, int len);
+
+#!/usr/bin/env python
+
+import sys
+import time
+import optparse
+import subprocess
+
+def run_iteration(threads):
+    """Runs an iteration of the test on `threads` threads.  Sleeps for a
+    second afterwards to let the disk calm down."""
+    p = subprocess.Popen(['./filetest', '-n', str(threads), '-b', '1024', '-r', '25'],
+            stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    data = p.stdout.read().strip()
+    ret = [float(o) for o in data.split(', ')]
+    return ret
+
+def main():
+    parser = optparse.OptionParser(version='1.0', usage='%prog [runs]')
+    opts, args = parser.parse_args()
+    if not args:
+        parser.print_help()
+        return
+    vals = []
+    runs = int(args[0])
+    for r in range(runs):
+        ret = run_iteration(r+1)
+        vals.append(ret)
+        sys.stderr.write(str(r) + ' \r')
+
+    print "# (tot, avg, min, max, stddev)"
+    for i,v in enumerate(vals):
+        run = ('-n %d,' % (i+1)).rjust(6)
+        print "%s %0.6f, %0.6f, %0.6f, %0.6f, %0.6f" % (run, v[0],
+                v[1], v[2], v[3], v[4])
+
+
+if __name__ == "__main__":
+    main()