Commits

Francesco Romani committed 03a3b80

[filter][vid.stab] bump vid.stab to 0.79

  • Participants
  • Parent commits 5a36e46
  • Branches transcode-1_1

Comments (0)

Files changed (5)

 AC_SUBST(CFLAGS)
 AC_SUBST(CC)
 AC_SUBST(LDFLAGS)
+AC_SUBST(SIMD_FLAGS)
 
 AC_CONFIG_FILES([
 	Makefile

filter/stabilize/Changelog

-0.76    BUGFIX in calcFieldTransYUV caused SEGFAULT
+0.79
+	speed optimizations using SSE by Alexey Osipov
+	search tree cut, spiral search and sse:
+	 together speedup factor ~8 of stabilize run
+	stepsize is increased stepwise
 
-0.75    two meta parameter introduces: shakiness and accuracy and
+0.77
+	interpolation routines improved a lot, thanks to hints on ffmpeg list
+	added bicubic interpolation that uses 4x4 pixel (useful for large zoom values)
+	bilinear interpolation is now the default
+	transform plugin uses last transform for the remaining frames
+	 -> this enables to use the transform plugin for constant transformations
+
+0.76    
+	BUGFIX in calcFieldTransYUV caused SEGFAULT
+
+0.75    
+	two meta parameter introduces: shakiness and accuracy and
 	removal of fieldnum, fielsize, maxshift, maxfieldnum
 	field placement changed: Now they fill the frame perfectly
 	added bluring such that stepsize can be much larger: much faster now!
-        linear interpolation is now the default
+	linear interpolation is now the default
 	uncertain angles are set to 0 (more robustness)
 
-0.70    selects a maximal number of fields according to their contrast
+0.70
+	selects a maximal number of fields according to their contrast
 	nicely distributed over the frame
 	Todo: remove fieldnum, allowmax (now false), and compressed parameter
 	Todo: add accuracy parameter instead of maxfields
-        Bugfix of contrast routine (error in the calculation of maxi)
-	
-0.62     
+	Bugfix of contrast routine (error in the calculation of maxi)
+
+0.62
 	different interpolation functions. No interpolation is default and
 	 seems fine in most applications.
 	field placement changed. Now they fill the frame better.
 	"compress" option moves them more to the center (vertically)
 	fields and their areas and transforms can be drawn on the frame.
-	
-0.6    
-	new field placement allows for any number of fields 
+
+0.6
+	new field placement allows for any number of fields
 	 and respects aspect ratio
 	fields with low contrast are ignored
-	non-symmetric field setups are possible	
+	non-symmetric field setups are possible
 
-0.5    
+0.5	
 	global and per-frame zooming supported
-	optimal zoom value is calculated to avoid moving border	effect	
+	optimal zoom value is calculated to avoid moving border	effect
 	add filter unsharp to sharpen the frames
 	write parameters to transforms file
 
 	finished RGB #support
 	transforms file format supports comments
 
-0.42
+0.42	
 	input filename and tc_list stuff used
-0.41    
+0.41
 	code style from transcode used
 0.4	
 	moved to CVS version we use a cleaned median to select the right
 	maxshift default 50
 	allowmax added
 	scan only every second shift and then make a second small
-          scan around the best match
+	  scan around the best match
 	removed black rectangle around transformed frame

filter/stabilize/Makefile.am

 AM_CPPFLAGS = \
 	$(PTHREAD_CFLAGS) \
 	$(BG) \
+	$(SIMD_FLAGS) \
 	-I$(top_srcdir) \
-	-I$(top_srcdir)/src
-	-C99
+	-I$(top_srcdir)/src 
+
+# SIMD_FLAGS shouls contain them, but they are empty
+if HAVE_ASM_SSE2	
+AM_CPPFLAGS += -msse2 
+endif
+
 
 pkgdir = $(MOD_PATH)
 
 
 EXTRA_DIST = \
         transform.h 
-        

filter/stabilize/filter_stabilize.c

 /*
  *  filter_stabilize.c
  *
- *  Copyright (C) Georg Martius - June 2007
- *   georg dot martius at web dot de  
+ *  Copyright (C) Georg Martius - 2007 -- 2011
+ *   georg dot martius at web dot de
+ *   initial author
+ *
+ *  Copyright (C) Alexey Osipov - July 2011
+ *   simba at lerlan dot ru
+ *   speed optimizations including SSE2 code
  *
  *  This file is part of transcode, a video stream processing tool
- *      
+ *
  *  transcode is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2, or (at your option)
  *  any later version.
- *   
+ *
  *  transcode is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
- *   
+ *
  *  You should have received a copy of the GNU General Public License
  *  along with GNU Make; see the file COPYING.  If not, write to
- *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  */
 
 /* Typical call:
- *  transcode -V -J stabilize=shakiness=5:show=1,preview 
+ *  transcode -V -J stabilize=shakiness=5:show=1,preview
  *         -i inp.mpeg -y null,null -o dummy
  *  all parameters are optional
 */
 
 #define MOD_NAME    "filter_stabilize.so"
-#define MOD_VERSION "v0.76 (2011-02-01)"
+#define MOD_VERSION "v0.79 (2011-10-12)"
 #define MOD_CAP     "extracts relative transformations of \n\
     subsequent frames (used for stabilization together with the\n\
     transform filter in a second pass)"
     TC_MODULE_FEATURE_FILTER|TC_MODULE_FEATURE_VIDEO
 #define MOD_FLAGS  \
     TC_MODULE_FLAG_RECONFIGURABLE | TC_MODULE_FLAG_DELAY
-  
+
 #include "transcode.h"
 #include "filter.h"
 #include "libtc/libtc.h"
  * this is really just for debugging and development */
 // #define STABVERBOSE
 
+// #ifdef HAVE_SSE2 does not work, even though AC_SUBST(SIMD_FLAGS) is included
+#ifdef HAVE_ASM_SSE2
+/* use SSE2 for compareSubImg */
+#define USE_SSE2_CMP
+/* use SSE2 for compareSubImg even more,
+ * sometimes this may be slower,
+ * enabling this also limit SSE_SUM_ROWS to 8 */
+#define USE_SSE2_CMP_HOR
+
+/* how many 16-byte rows to summ in SSE2 registers
+ * before output them to regular variable
+ * from 1 to 255,
+ * bigger values faster, but may cause registers overflow,
+ * which leads to incorrect transformation data.
+ * lower values not much slower, but safer
+ * if USE_SSE_HORIZ enabled, then this must not be larger than 8 */
+#define SSE2_CMP_SUM_ROWS 8
+
+/* use SSE2 for contrastSubImg (only YUV version)
+ * may be used without USE_SSE */
+#define USE_SSE2_YUV_CONTRAST
+#include <emmintrin.h>
+
+#endif
+
+
+#define MAXLONG ((unsigned long int)(-1))
+
 typedef struct _field {
     int x;     // middle position x
     int y;     // middle position y
     TCList* transs;
 
     Field* fields;
-    
+
 
     /* Options */
     /* maximum number of pixels we expect the shift of subsequent frames */
-    int maxshift; 
+    int maxshift;
     int stepsize; // stepsize of field transformation detection
     int allowmax; // 1 if maximal shift is allowed
     int algo;     // algorithm to use
     int field_size; // size    = min(sd->width, sd->height)/10;
     int field_rows; // number of rows
     /* if 1 and 2 then the fields and transforms are shown in the frames */
-    int show; 
+    int show;
     /* measurement fields with lower contrast are discarded */
-    double contrast_threshold;            
+    double contrast_threshold;
     /* maximal difference in angles of fields */
     double maxanglevariation;
     /* meta parameter for maxshift and fieldsize between 1 and 10 */
-    int shakiness;   
+    int shakiness;
     int accuracy;   // meta parameter for number of fields between 1 and 10
-  
+
     int t;
     char* result;
     FILE* f;
     char conf_str[TC_BUF_MIN];
 } StabData;
 
-/* type for a function that calculates the transformation of a certain field 
+/* type for a function that calculates the transformation of a certain field
  */
 typedef Transform (*calcFieldTransFunc)(StabData*, const Field*, int);
 
-/* type for a function that calculates the contrast of a certain field 
+/* type for a function that calculates the contrast of a certain field
  */
 typedef double (*contrastSubImgFunc)(StabData* sd, const Field* field);
 
     "Overview:\n"
     "    Generates a file with relative transform information\n"
     "     (translation, rotation) about subsequent frames."
-    " See also transform.\n" 
+    " See also transform.\n"
     "Options\n"
     "    'result'      path to the file used to write the transforms\n"
     "                  (def:inputfile.stab)\n"
     "    'help'        print this help message\n";
 
 int initFields(StabData* sd);
-double compareImg(unsigned char* I1, unsigned char* I2, 
-		  int width, int height,  int bytesPerPixel, int d_x, int d_y);
-double compareSubImg(unsigned char* const I1, unsigned char* const I2, 
-		     const Field* field, 
-		     int width, int height, int bytesPerPixel,int d_x,int d_y);
+unsigned long int compareImg(unsigned char* I1, unsigned char* I2,
+                             int width, int height,  int bytesPerPixel, 
+                             int d_x, int d_y, unsigned long int threshold);
+unsigned long int compareSubImg(unsigned char* const I1, unsigned char* const I2,
+                                const Field* field, int width, int height, 
+                                int bytesPerPixel,int d_x,int d_y, 
+                                unsigned long int threshold);
 double contrastSubImgYUV(StabData* sd, const Field* field);
+#ifdef USE_SSE2_YUV_CONTRAST
+double contrastSubImgYUVSSE(unsigned char* const I, const Field* field, int width, int height);
+#endif
 double contrastSubImgRGB(StabData* sd, const Field* field);
-double contrastSubImg(unsigned char* const I, const Field* field, 
+double contrastSubImg(unsigned char* const I, const Field* field,
                       int width, int height, int bytesPerPixel);
 int cmp_contrast_idx(const void *ci1, const void* ci2);
 TCList* selectfields(StabData* sd, contrastSubImgFunc contrastfunc);
 Transform calcShiftYUVSimple(StabData* sd);
 double calcAngle(StabData* sd, Field* field, Transform* t,
                  int center_x, int center_y);
-Transform calcFieldTransYUV(StabData* sd, const Field* field, 
+Transform calcFieldTransYUV(StabData* sd, const Field* field,
                             int fieldnum);
-Transform calcFieldTransRGB(StabData* sd, const Field* field, 
+Transform calcFieldTransRGB(StabData* sd, const Field* field,
                             int fieldnum);
 Transform calcTransFields(StabData* sd, calcFieldTransFunc fieldfunc,
                           contrastSubImgFunc contrastfunc);
 void drawFieldScanArea(StabData* sd, const Field* field, const Transform* t);
 void drawField(StabData* sd, const Field* field, const Transform* t);
 void drawFieldTrans(StabData* sd, const Field* field, const Transform* t);
-void drawBox(unsigned char* I, int width, int height, int bytesPerPixel, 
+void drawBox(unsigned char* I, int width, int height, int bytesPerPixel,
              int x, int y, int sizex, int sizey, unsigned char color);
 void addTrans(StabData* sd, Transform sl);
 
 
 
 /** initialise measurement fields on the frame.
-    The size of the fields and the maxshift is used to 
+    The size of the fields and the maxshift is used to
     calculate an optimal distribution in the frame.
 */
 int initFields(StabData* sd)
 {
-    int size     = sd->field_size;
+    int size = sd->field_size;
     int rows = TC_MAX(3,(sd->height - sd->maxshift*2)/size-1);
     int cols = TC_MAX(3,(sd->width  - sd->maxshift*2)/size-1);
     // make sure that the remaining rows have the same length
     sd->field_num  = rows*cols;
     sd->field_rows = rows;
-    // tc_log_msg(MOD_NAME, "field setup: rows: %i cols: %i Total: %i fields", 
+    // tc_log_msg(MOD_NAME, "field setup: rows: %i cols: %i Total: %i fields",
     //            rows, cols, sd->field_num);
 
     if (!(sd->fields = tc_malloc(sizeof(Field) * sd->field_num))) {
    \param d_x shift in x direction
    \param d_y shift in y direction
 */
-double compareImg(unsigned char* I1, unsigned char* I2, 
-                  int width, int height,  int bytesPerPixel, int d_x, int d_y)
+unsigned long int compareImg(unsigned char* I1, unsigned char* I2,
+                  int width, int height,  int bytesPerPixel, int d_x, int d_y, unsigned long int treshold)
 {
     int i, j;
     unsigned char* p1 = NULL;
     unsigned char* p2 = NULL;
-    long int sum = 0;  
+    unsigned long int sum = 0;
     int effectWidth = width - abs(d_x);
     int effectHeight = height - abs(d_y);
 
     for (i = 0; i < effectHeight; i++) {
         p1 = I1;
         p2 = I2;
-        if (d_y > 0 ){ 
+        if (d_y > 0 ){
             p1 += (i + d_y) * width * bytesPerPixel;
             p2 += i * width * bytesPerPixel;
         } else {
             p1 += i * width * bytesPerPixel;
             p2 += (i - d_y) * width * bytesPerPixel;
         }
-        if (d_x > 0) { 
+        if (d_x > 0) {
             p1 += d_x * bytesPerPixel;
         } else {
-            p2 -= d_x * bytesPerPixel; 
+            p2 -= d_x * bytesPerPixel;
         }
         // TODO: use some mmx or sse stuff here
         for (j = 0; j < effectWidth * bytesPerPixel; j++) {
             /* debugging code continued */
             /* fwrite(p1,1,1,pic1);fwrite(p1,1,1,pic1);fwrite(p1,1,1,pic1);
-               fwrite(p2,1,1,pic2);fwrite(p2,1,1,pic2);fwrite(p2,1,1,pic2); 
+               fwrite(p2,1,1,pic2);fwrite(p2,1,1,pic2);fwrite(p2,1,1,pic2);
              */
             sum += abs((int)*p1 - (int)*p2);
             p1++;
-            p2++;      
+            p2++;
         }
+        if (sum > treshold)
+            break;
     }
     /*  fclose(pic1);
-        fclose(pic2); 
+        fclose(pic2);
      */
-    return sum/((double) effectWidth * effectHeight * bytesPerPixel);
+    return sum;
 }
 
 /**
-   compares a small part of two given images 
+   compares a small part of two given images
    and returns the average absolute difference.
-   Field center, size and shift have to be choosen, 
+   Field center, size and shift have to be choosen,
    so that no clipping is required
-     
-   \param field Field specifies position(center) and size of subimage 
+
+   \param field Field specifies position(center) and size of subimage
    \param d_x shift in x direction
-   \param d_y shift in y direction   
+   \param d_y shift in y direction
+   \param threshold minimum difference so far (can stop summing up if exceeded)
 */
-double compareSubImg(unsigned char* const I1, unsigned char* const I2, 
-                     const Field* field, 
-                     int width, int height, int bytesPerPixel, int d_x, int d_y)
+#ifndef USE_SSE2_CMP
+unsigned long int compareSubImg(unsigned char* const I1, unsigned char* const I2,
+                                const Field* field, int width, int height, 
+                                int bytesPerPixel, int d_x, int d_y,
+                                unsigned long int threshold) {
+  int k, j;
+  unsigned char* p1 = NULL;
+  unsigned char* p2 = NULL;
+  int s2 = field->size / 2;
+  unsigned long int sum = 0;
+
+  p1 = I1 + ((field->x - s2) + (field->y - s2) * width) * bytesPerPixel;
+  p2 = I2 + ((field->x - s2 + d_x) + (field->y - s2 + d_y) * width)
+    * bytesPerPixel;
+  for (j = 0; j < field->size; j++) {
+    for (k = 0; k < field->size * bytesPerPixel; k++) {
+      sum += abs((int) *p1 - (int) *p2);
+      p1++;
+      p2++;
+    }
+    if( sum > threshold) // no need to calculate any longer: worse than the best match
+      break;
+    p1 += (width - field->size) * bytesPerPixel;
+    p2 += (width - field->size) * bytesPerPixel;
+  }
+  return sum;
+}
+
+#else // USE_SSE2_CMP
+unsigned long int compareSubImg(unsigned char* const I1, unsigned char* const I2,
+                                const Field* field,
+                                int width, int height, int bytesPerPixel, 
+                                int d_x, int d_y, unsigned long int threshold)
 {
     int k, j;
     unsigned char* p1 = NULL;
     unsigned char* p2 = NULL;
     int s2 = field->size / 2;
-    double sum = 0;
+    unsigned long int sum = 0;
+
+    static unsigned char mask[16] = {0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00};    
+    unsigned char row = 0;
+#ifndef USE_SSE2_CMP_HOR
+    unsigned char summes[16];
+    int i;
+#endif
+    __m128i xmmsum, xmmmask;
+    xmmsum = _mm_setzero_si128();
+    xmmmask = _mm_loadu_si128((__m128i const*)mask);
 
     p1=I1 + ((field->x - s2) + (field->y - s2)*width)*bytesPerPixel;
     p2=I2 + ((field->x - s2 + d_x) + (field->y - s2 + d_y)*width)*bytesPerPixel;
-    // TODO: use some mmx or sse stuff here
     for (j = 0; j < field->size; j++){
-        for (k = 0; k < field->size * bytesPerPixel; k++) {
-            sum += abs((int)*p1 - (int)*p2);
-            p1++;
-            p2++;     
+        for (k = 0; k < field->size * bytesPerPixel; k+=16){
+            {
+                __m128i xmm0, xmm1, xmm2;
+                xmm0 = _mm_loadu_si128((__m128i const *)p1);
+                xmm1 = _mm_loadu_si128((__m128i const *)p2);
+
+                xmm2 = _mm_subs_epu8(xmm0, xmm1);
+                xmm0 = _mm_subs_epu8(xmm1, xmm0);
+                xmm0 = _mm_adds_epu8(xmm0, xmm2);
+
+                xmm1 = _mm_and_si128(xmm0, xmmmask);
+                xmm0 = _mm_srli_si128(xmm0, 1);
+                xmm0 = _mm_and_si128(xmm0, xmmmask);
+
+                xmmsum = _mm_adds_epu16(xmmsum, xmm0);
+                xmmsum = _mm_adds_epu16(xmmsum, xmm1);
+            }
+
+            p1+=16;
+            p2+=16;
+
+            row++;
+            if (row == SSE2_CMP_SUM_ROWS) {
+                row = 0;
+#ifdef USE_SSE2_CMP_HOR
+                {
+                    __m128i xmm1;
+
+                    xmm1 = _mm_srli_si128(xmmsum, 8);
+                    xmmsum = _mm_adds_epu16(xmmsum, xmm1);
+
+                    xmm1 = _mm_srli_si128(xmmsum, 4);
+                    xmmsum = _mm_adds_epu16(xmmsum, xmm1);
+
+                    xmm1 = _mm_srli_si128(xmmsum, 2);
+                    xmmsum = _mm_adds_epu16(xmmsum, xmm1);
+
+                    sum += _mm_extract_epi16(xmmsum, 0);
+                }
+#else
+                _mm_storeu_si128((__m128i*)summes, xmmsum);
+                for(i = 0; i < 16; i+=2)
+                    sum += summes[i] + summes[i+1]*256;
+#endif
+                xmmsum = _mm_setzero_si128();
+            }
         }
+        if (sum > threshold)
+            break;
         p1 += (width - field->size) * bytesPerPixel;
         p2 += (width - field->size) * bytesPerPixel;
     }
-    return sum/((double) field->size *field->size* bytesPerPixel);
+
+#if (SSE2_CMP_SUM_ROWS != 1) && (SSE2_CMP_SUM_ROWS != 2) && (SSE2_CMP_SUM_ROWS != 4) \
+  && (SSE2_CMP_SUM_ROWS != 8) && (SSE2_CMP_SUM_ROWS != 16)
+    //process all data left unprocessed
+    //this part can be safely ignored if
+    //SSE_SUM_ROWS = {1, 2, 4, 8, 16}
+#ifdef USE_SSE2_CMP_HOR
+    {
+        __m128i xmm1;
+
+        xmm1 = _mm_srli_si128(xmmsum, 8);
+        xmmsum = _mm_adds_epu16(xmmsum, xmm1);
+
+        xmm1 = _mm_srli_si128(xmmsum, 4);
+        xmmsum = _mm_adds_epu16(xmmsum, xmm1);
+
+        xmm1 = _mm_srli_si128(xmmsum, 2);
+        xmmsum = _mm_adds_epu16(xmmsum, xmm1);
+
+        sum += _mm_extract_epi16(xmmsum, 0);
+    }
+#else
+    _mm_storeu_si128((__m128i*)summes, xmmsum);
+    for(i = 0; i < 16; i+=2)
+       sum += summes[i] + summes[i+1]*256;
+#endif
+#endif
+
+    return sum;
 }
+#endif // USE_SSE2_CMP
 
 /** \see contrastSubImg called with bytesPerPixel=1*/
 double contrastSubImgYUV(StabData* sd, const Field* field){
+#ifdef USE_SSE2_YUV_CONTRAST
+    return contrastSubImgYUVSSE(sd->curr,field,sd->width,sd->height);
+#else
     return contrastSubImg(sd->curr,field,sd->width,sd->height,1);
+#endif
 }
 
-/** 
-    \see contrastSubImg three times called with bytesPerPixel=3 
-    for all channels   
+/**
+    \see contrastSubImg three times called with bytesPerPixel=3
+    for all channels
  */
 double contrastSubImgRGB(StabData* sd, const Field* field){
     unsigned char* const I = sd->curr;
-    return (  contrastSubImg(I,  field,sd->width,sd->height,3) 
+    return (  contrastSubImg(I,  field,sd->width,sd->height,3)
             + contrastSubImg(I+1,field,sd->width,sd->height,3)
             + contrastSubImg(I+2,field,sd->width,sd->height,3))/3;
 }
 
+
+#ifdef USE_SSE2_YUV_CONTRAST
+/**
+    \see contrastSubImg using SSE2 optimization, YUV only
+ */
+double contrastSubImgYUVSSE(unsigned char* const I, const Field* field,
+                     int width, int height)
+{
+    int k, j;
+    unsigned char* p = NULL;
+    int s2 = field->size / 2;
+
+    static unsigned char full[16] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
+
+    p = I + ((field->x - s2) + (field->y - s2)*width);
+
+    __m128i mmin, mmax;
+
+    mmin = _mm_loadu_si128((__m128i const*)full);
+    mmax = _mm_setzero_si128();
+
+    for (j = 0; j < field->size; j++){
+        for (k = 0; k < field->size; k += 16) {
+            __m128i xmm0;
+            xmm0 = _mm_loadu_si128((__m128i const*)p);
+            mmin = _mm_min_epu8(mmin, xmm0);
+            mmax = _mm_max_epu8(mmax, xmm0);
+            p += 16;
+        }
+        p += (width - field->size);
+    }
+
+    __m128i xmm1;
+    xmm1 = _mm_srli_si128(mmin, 8);
+    mmin = _mm_min_epu8(mmin, xmm1);
+    xmm1 = _mm_srli_si128(mmin, 4);
+    mmin = _mm_min_epu8(mmin, xmm1);
+    xmm1 = _mm_srli_si128(mmin, 2);
+    mmin = _mm_min_epu8(mmin, xmm1);
+    xmm1 = _mm_srli_si128(mmin, 1);
+    mmin = _mm_min_epu8(mmin, xmm1);
+    unsigned char mini = (unsigned char)_mm_extract_epi16(mmin, 0);
+
+    xmm1 = _mm_srli_si128(mmax, 8);
+    mmax = _mm_max_epu8(mmax, xmm1);
+    xmm1 = _mm_srli_si128(mmax, 4);
+    mmax = _mm_max_epu8(mmax, xmm1);
+    xmm1 = _mm_srli_si128(mmax, 2);
+    mmax = _mm_max_epu8(mmax, xmm1);
+    xmm1 = _mm_srli_si128(mmax, 1);
+    mmax = _mm_max_epu8(mmax, xmm1);
+    unsigned char maxi = (unsigned char)_mm_extract_epi16(mmax, 0);
+
+    return (maxi-mini)/(maxi+mini+0.1); // +0.1 to avoid division by 0
+}
+#endif
+
 /**
    calculates Michelson-contrast in the given small part of the given image
-     
-   \param I pointer to framebuffer 
-   \param field Field specifies position(center) and size of subimage 
+
+   \param I pointer to framebuffer
+   \param field Field specifies position(center) and size of subimage
    \param width width of frame
    \param height height of frame
    \param bytesPerPixel calc contrast for only for first channel
 */
-double contrastSubImg(unsigned char* const I, const Field* field, 
+double contrastSubImg(unsigned char* const I, const Field* field,
                      int width, int height, int bytesPerPixel)
 {
     int k, j;
     unsigned char maxi = 0;
 
     p = I + ((field->x - s2) + (field->y - s2)*width)*bytesPerPixel;
-    // TODO: use some mmx or sse stuff here
+
     for (j = 0; j < field->size; j++){
         for (k = 0; k < field->size * bytesPerPixel; k++) {
             mini = (mini < *p) ? mini : *p;
 {
     int x = 0, y = 0;
     int i, j;
-    double minerror = 1e20;  
+    unsigned long int minerror = MAXLONG;
     for (i = -sd->maxshift; i <= sd->maxshift; i++) {
         for (j = -sd->maxshift; j <= sd->maxshift; j++) {
-            double error = compareImg(sd->curr, sd->prev, 
-                                      sd->width, sd->height, 3, i, j);
+            unsigned long int error = compareImg(sd->curr, sd->prev,
+                                      sd->width, sd->height, 3, i, j, minerror);
             if (error < minerror) {
                 minerror = error;
                 x = i;
                 y = j;
-           }	
+           }
         }
-    } 
+    }
     return new_transform(x, y, 0, 0, 0);
 }
 
 
-/** tries to register current frame onto previous frame. 
+/** tries to register current frame onto previous frame.
     (only the luminance is used)
     This is the most simple algorithm:
     shift images to all possible positions and calc summed error
 #endif
 
     // we only use the luminance part of the image
-    Y_c  = sd->curr;  
+    Y_c  = sd->curr;
     //  Cb_c = sd->curr + sd->width*sd->height;
     //Cr_c = sd->curr + 5*sd->width*sd->height/4;
-    Y_p  = sd->prev;  
+    Y_p  = sd->prev;
     //Cb_p = sd->prev + sd->width*sd->height;
     //Cr_p = sd->prev + 5*sd->width*sd->height/4;
 
-    double minerror = 1e20;  
+    unsigned long int minerror = MAXLONG;
     for (i = -sd->maxshift; i <= sd->maxshift; i++) {
         for (j = -sd->maxshift; j <= sd->maxshift; j++) {
-            double error = compareImg(Y_c, Y_p, 
-                                      sd->width, sd->height, 1, i, j);
+            unsigned long int error = compareImg(Y_c, Y_p,
+                                      sd->width, sd->height, 1, i, j, minerror);
 #ifdef STABVERBOSE
             fprintf(f, "%i %i %f\n", i, j, error);
 #endif
                 minerror = error;
                 x = i;
                 y = j;
-            }	
+            }
         }
-    }  
+    }
 #ifdef STABVERBOSE
     fclose(f);
     tc_log_msg(MOD_NAME, "Minerror: %f\n", minerror);
 
 
 
-/* calculates rotation angle for the given transform and 
+/* calculates rotation angle for the given transform and
  * field with respect to the given center-point
  */
-double calcAngle(StabData* sd, Field* field, Transform* t, 
+double calcAngle(StabData* sd, Field* field, Transform* t,
                  int center_x, int center_y)
 {
-    // we better ignore fields that are to close to the rotation center 
+    // we better ignore fields that are to close to the rotation center
     if (abs(field->x - center_x) + abs(field->y - center_y) < sd->maxshift) {
         return 0;
     } else {
-        // double r = sqrt(field->x*field->x + field->y*field->y);   
+        // double r = sqrt(field->x*field->x + field->y*field->y);
         double a1 = atan2(field->y - center_y, field->x - center_x);
-        double a2 = atan2(field->y - center_y + t->y, 
+        double a2 = atan2(field->y - center_y + t->y,
                           field->x - center_x + t->x);
         double diff = a2 - a1;
-        return (diff>M_PI) ? diff - 2*M_PI 
-            : ( (diff<-M_PI) ? diff + 2*M_PI : diff);    
+        return (diff>M_PI) ? diff - 2*M_PI
+            : ( (diff<-M_PI) ? diff + 2*M_PI : diff);
     }
 }
 
     // we only use the luminance part of the image
     int i, j;
 
-/*     // check contrast in sub image */
-/*     double contr = contrastSubImg(Y_c, field, sd->width, sd->height, 1); */
-/*     if(contr < sd->contrast_threshold) { */
-/*         t.extra=-1; */
-/*         return t; */
-/*     } */
 #ifdef STABVERBOSE
-    // printf("%i %i %f\n", sd->t, fieldnum, contr);    
+    // printf("%i %i %f\n", sd->t, fieldnum, contr);
     FILE *f = NULL;
     char buffer[32];
     tc_snprintf(buffer, sizeof(buffer), "f%04i_%02i.dat", sd->t, fieldnum);
     f = fopen(buffer, "w");
     fprintf(f, "# splot \"%s\"\n", buffer);
-#endif    
+#endif
 
-    double minerror = 1e10;  
-    double error = 1e10;
+    unsigned long int minerror = compareSubImg(Y_c, Y_p, field, sd->width, sd->height,
+                                               1, 0, 0, MAXLONG);
+
+    unsigned long int error = MAXLONG;
     for (i = -sd->maxshift; i <= sd->maxshift; i += sd->stepsize) {
         for (j = -sd->maxshift; j <= sd->maxshift; j += sd->stepsize) {
-            error = compareSubImg(Y_c, Y_p, field, 
-                                         sd->width, sd->height, 1, i, j);
+            if( i==0 && j==0 ) 
+                continue; //no need to check this since already done                  
+            error = compareSubImg(Y_c, Y_p, field,
+                                  sd->width, sd->height, 1, i, j, minerror);
 #ifdef STABVERBOSE
             fprintf(f, "%i %i %f\n", i, j, error);
-#endif 
+#endif
             if (error < minerror) {
                 minerror = error;
                 tx = i;
                 ty = j;
-            }	
+            }
         }
     }
 
-    if (sd->stepsize > 1) {    // make fine grain check around the best match
-        int txc=tx; // save the shifts
-        int tyc=ty;    
-        int r = sd->stepsize - 1;
-        for (i = txc - r; i <= txc + r; i += 1) {
-            for (j = tyc - r; j <= tyc + r; j += 1) {
-                if (i == txc && j == tyc) 
+    int stepSize = sd->stepsize;
+    while(stepSize > 1) {// make fine grain check around the best match
+        int txc = tx; // save the shifts
+        int tyc = ty;
+        int newStepSize = stepSize/2; 
+        int r = stepSize - newStepSize;
+        for (i = txc - r; i <= txc + r; i += newStepSize) {
+            for (j = tyc - r; j <= tyc + r; j += newStepSize) {
+                if (i == txc && j == tyc)
                     continue; //no need to check this since already done
-                error = compareSubImg(Y_c, Y_p, field, 
-                                      sd->width, sd->height, 1, i, j);
-#ifdef STABVERBOSE                
+                unsigned int error = compareSubImg(Y_c, Y_p, field, sd->width,
+                                                   sd->height, 1, i, j, minerror);
+#ifdef STABVERBOSE
                 fprintf(f, "%i %i %f\n", i, j, error);
-#endif 	
-                if (error < minerror){
+#endif
+                if (error < minerror) {
                     minerror = error;
                     tx = i;
                     ty = j;
-                }	
+                }
             }
         }
+        stepSize /= 2;
     }
-#ifdef STABVERBOSE 
-    fclose(f); 
+    
+#ifdef STABVERBOSE
+    fclose(f);
     tc_log_msg(MOD_NAME, "Minerror: %f\n", minerror);
 #endif
 
     if (!sd->allowmax && fabs(tx) >= sd->maxshift+sd->stepsize) {
-#ifdef STABVERBOSE 
+#ifdef STABVERBOSE
         tc_log_msg(MOD_NAME, "maximal x shift ");
 #endif
         tx = 0;
     }
     if (!sd->allowmax && fabs(ty) == sd->maxshift+sd->stepsize) {
-#ifdef STABVERBOSE 
+#ifdef STABVERBOSE
         tc_log_msg(MOD_NAME, "maximal y shift ");
 #endif
         ty = 0;
     return t;
 }
 
-/* calculates the optimal transformation for one field in RGB 
+/* calculates the optimal transformation for one field in RGB
  *   slower than the YUV version because it uses all three color channels
  */
 Transform calcFieldTransRGB(StabData* sd, const Field* field, int fieldnum)
     Transform t = null_transform();
     uint8_t *I_c = sd->curr, *I_p = sd->prev;
     int i, j;
-  
-    double minerror = 1e20;  
+
+    unsigned long int minerror = MAXLONG;
     for (i = -sd->maxshift; i <= sd->maxshift; i += 2) {
-        for (j=-sd->maxshift; j <= sd->maxshift; j += 2) {      
-            double error = compareSubImg(I_c, I_p, field, 
-                                         sd->width, sd->height, 3, i, j);
+        for (j=-sd->maxshift; j <= sd->maxshift; j += 2) {
+            unsigned long int error = compareSubImg(I_c, I_p, field,
+                                         sd->width, sd->height, 3, i, j, minerror);
             if (error < minerror) {
                 minerror = error;
                 t.x = i;
                 t.y = j;
-            }	
+            }
         }
     }
     for (i = t.x - 1; i <= t.x + 1; i += 2) {
         for (j = -t.y - 1; j <= t.y + 1; j += 2) {
-            double error = compareSubImg(I_c, I_p, field, 
-                                         sd->width, sd->height, 3, i, j);
+            unsigned long int error = compareSubImg(I_c, I_p, field,
+                                         sd->width, sd->height, 3, i, j, minerror);
             if (error < minerror) {
                 minerror = error;
                 t.x = i;
                 t.y = j;
-            }	
+            }
         }
     }
     if (!sd->allowmax && fabs(t.x) == sd->maxshift) {
     return t;
 }
 
-/* compares contrast_idx structures respect to the contrast 
-   (for sort function) 
+/* compares contrast_idx structures respect to the contrast
+   (for sort function)
 */
 int cmp_contrast_idx(const void *ci1, const void* ci2)
 {
     int i,j;
     TCList* goodflds = tc_list_new(0);
     contrast_idx *ci = tc_malloc(sizeof(contrast_idx) * sd->field_num);
-    
+
     // we split all fields into row+1 segments and take from each segment
     // the best fields
     int numsegms = (sd->field_rows+1);
     contrast_idx *ci_segms = tc_malloc(sizeof(contrast_idx) * sd->field_num);
     int remaining   = 0;
     // calculate contrast for each field
-    for (i = 0; i < sd->field_num; i++) {        
+    for (i = 0; i < sd->field_num; i++) {
         ci[i].contrast = contrastfunc(sd, &sd->fields[i]);
         ci[i].index=i;
         if(ci[i].contrast < sd->contrast_threshold) ci[i].contrast = 0;
         // else printf("%i %lf\n", ci[i].index, ci[i].contrast);
-    }   
+    }
 
     memcpy(ci_segms, ci, sizeof(contrast_idx) * sd->field_num);
     // get best fields from each segment
         //printf("Segment: %i: %i-%i\n", i, startindex, endindex);
 
         // sort within segment
-        qsort(ci_segms+startindex, endindex-startindex, 
-              sizeof(contrast_idx), cmp_contrast_idx);        
+        qsort(ci_segms+startindex, endindex-startindex,
+              sizeof(contrast_idx), cmp_contrast_idx);
         // take maxfields/numsegms
         for(j=0; j<sd->maxfields/numsegms; j++){
             if(startindex+j >= endindex) continue;
-            // printf("%i %lf\n", ci_segms[startindex+j].index, 
+            // printf("%i %lf\n", ci_segms[startindex+j].index,
             //                    ci_segms[startindex+j].contrast);
-            if(ci_segms[startindex+j].contrast > 0){                
+            if(ci_segms[startindex+j].contrast > 0){
                 tc_list_append_dup(goodflds, &ci[ci_segms[startindex+j].index],
                                    sizeof(contrast_idx));
                 // don't consider them in the later selection process
-                ci_segms[startindex+j].contrast=0; 
-            }                                                     
+                ci_segms[startindex+j].contrast=0;
+            }
         }
     }
     // check whether enough fields are selected
     // printf("Phase2: %i\n", tc_list_size(goodflds));
-    remaining = sd->maxfields - tc_list_size(goodflds); 
+    remaining = sd->maxfields - tc_list_size(goodflds);
     if(remaining > 0){
         // take the remaining from the leftovers
-        qsort(ci_segms, sd->field_num,                   
+        qsort(ci_segms, sd->field_num,
               sizeof(contrast_idx), cmp_contrast_idx);
         for(j=0; j < remaining; j++){
             if(ci_segms[j].contrast > 0){
-                tc_list_append_dup(goodflds, &ci_segms[j], sizeof(contrast_idx));                    
-            }                                                     
+                tc_list_append_dup(goodflds, &ci_segms[j], sizeof(contrast_idx));
+            }
         }
-    }     
+    }
     // printf("Ende: %i\n", tc_list_size(goodflds));
     tc_free(ci);
     tc_free(ci_segms);
 
 
 
-/* tries to register current frame onto previous frame. 
+/* tries to register current frame onto previous frame.
  *   Algorithm:
  *   check all fields for vertical and horizontal transformation
  *   use minimal difference of all possible positions
- *   discards fields with low contrast 
+ *   discards fields with low contrast
  *   select maxfields field according to their contrast
  *   calculate shift as cleaned mean of all remaining fields
  *   calculate rotation angle of each field in respect to center of fields
     file = fopen(buffer, "w");
     fprintf(file, "# plot \"%s\" w l, \"\" every 2:1:0\n", buffer);
 #endif
-    
+
     TCList* goodflds = selectfields(sd, contrastfunc);
 
-    // use all "good" fields and calculate optimal match to previous frame 
+    // use all "good" fields and calculate optimal match to previous frame
     contrast_idx* f;
     while((f = (contrast_idx*)tc_list_pop(goodflds,0)) != 0){
         int i = f->index;
         t =  fieldfunc(sd, &sd->fields[i], i); // e.g. calcFieldTransYUV
 #ifdef STABVERBOSE
-        fprintf(file, "%i %i\n%f %f %i\n \n\n", sd->fields[i].x, sd->fields[i].y, 
+        fprintf(file, "%i %i\n%f %f %i\n \n\n", sd->fields[i].x, sd->fields[i].y,
                 sd->fields[i].x + t.x, sd->fields[i].y + t.y, t.extra);
 #endif
         if (t.extra != -1){ // ignore if extra == -1 (unused at the moment)
     tc_list_fini(goodflds);
 
     t = null_transform();
-    num_trans = index; // amount of transforms we actually have    
+    num_trans = index; // amount of transforms we actually have
     if (num_trans < 1) {
         tc_log_warn(MOD_NAME, "too low contrast! No field remains.\n \
                     (no translations are detected in frame %i)", sd->t);
         return t;
     }
-        
+
     int center_x = 0;
     int center_y = 0;
     // calc center point of all remaining fields
     for (i = 0; i < num_trans; i++) {
         center_x += fs[i]->x;
-        center_y += fs[i]->y;            
-    } 
+        center_y += fs[i]->y;
+    }
     center_x /= num_trans;
-    center_y /= num_trans;        
-    
+    center_y /= num_trans;
+
     if (sd->show){ // draw fields and transforms into frame.
-        // this has to be done one after another to handle possible overlap 
+        // this has to be done one after another to handle possible overlap
         if (sd->show > 1) {
             for (i = 0; i < num_trans; i++)
-                drawFieldScanArea(sd, fs[i], &ts[i]);            
+                drawFieldScanArea(sd, fs[i], &ts[i]);
         }
         for (i = 0; i < num_trans; i++)
-            drawField(sd, fs[i], &ts[i]);            
+            drawField(sd, fs[i], &ts[i]);
         for (i = 0; i < num_trans; i++)
-            drawFieldTrans(sd, fs[i], &ts[i]);            
-    } 
+            drawFieldTrans(sd, fs[i], &ts[i]);
+    }
     /* median over all transforms
        t= median_xy_transform(ts, sd->field_num);*/
-    // cleaned mean    
+    // cleaned mean
     t = cleanmean_xy_transform(ts, num_trans);
 
     // substract avg
     // figure out angle
     if (sd->field_num < 6) {
         // the angle calculation is inaccurate for 5 and less fields
-        t.alpha = 0; 
-    } else {      
+        t.alpha = 0;
+    } else {
         for (i = 0; i < num_trans; i++) {
             angles[i] = calcAngle(sd, fs[i], &ts[i], center_x, center_y);
         }
         t.alpha = -cleanmean(angles, num_trans, &min, &max);
         if(max-min>sd->maxanglevariation){
             t.alpha=0;
-            tc_log_info(MOD_NAME, "too large variation in angle(%f)\n", 
+            tc_log_info(MOD_NAME, "too large variation in angle(%f)\n",
                         max-min);
         }
     }
     double p_x = (center_x - sd->width/2);
     double p_y = (center_y - sd->height/2);
     t.x += (cos(t.alpha)-1)*p_x  - sin(t.alpha)*p_y;
-    t.y += sin(t.alpha)*p_x  + (cos(t.alpha)-1)*p_y;    
-    
+    t.y += sin(t.alpha)*p_x  + (cos(t.alpha)-1)*p_y;
+
 #ifdef STABVERBOSE
     fclose(file);
 #endif
 void drawFieldScanArea(StabData* sd, const Field* field, const Transform* t){
     if(!sd->vob->im_v_codec == CODEC_YUV)
         return;
-    drawBox(sd->curr, sd->width, sd->height, 1, field->x, field->y, 
-            field->size+2*sd->maxshift, field->size+2*sd->maxshift, 80);   
+    drawBox(sd->curr, sd->width, sd->height, 1, field->x, field->y,
+            field->size+2*sd->maxshift, field->size+2*sd->maxshift, 80);
 }
 
 /** draws the field */
 void drawField(StabData* sd, const Field* field, const Transform* t){
     if(!sd->vob->im_v_codec == CODEC_YUV)
         return;
-    drawBox(sd->curr, sd->width, sd->height, 1, field->x, field->y, 
+    drawBox(sd->curr, sd->width, sd->height, 1, field->x, field->y,
             field->size, field->size, t->extra == -1 ? 100 : 40);
 }
 
 void drawFieldTrans(StabData* sd, const Field* field, const Transform* t){
     if(!sd->vob->im_v_codec == CODEC_YUV)
         return;
-    drawBox(sd->curr, sd->width, sd->height, 1, 
+    drawBox(sd->curr, sd->width, sd->height, 1,
             field->x, field->y, 5, 5, 128);     // draw center
-    drawBox(sd->curr, sd->width, sd->height, 1, 
+    drawBox(sd->curr, sd->width, sd->height, 1,
             field->x + t->x, field->y + t->y, 8, 8, 250); // draw translation
 }
 
 /**
  * draws a box at the given position x,y (center) in the given color
-   (the same for all channels) 
+   (the same for all channels)
  */
-void drawBox(unsigned char* I, int width, int height, int bytesPerPixel, 
+void drawBox(unsigned char* I, int width, int height, int bytesPerPixel,
              int x, int y, int sizex, int sizey, unsigned char color){
-    
-    unsigned char* p = NULL;     
+
+    unsigned char* p = NULL;
     int j,k;
     p = I + ((x - sizex/2) + (y - sizey/2)*width)*bytesPerPixel;
     for (j = 0; j < sizey; j++){
  */
 static int stabilize_configure(TCModuleInstance *self,
             			       const char *options, vob_t *vob)
-{    
+{
     StabData *sd = NULL;
     TC_MODULE_SELF_CHECK(self, "configure");
     char* filenamecopy, *filebasename;
 
     sd = self->userdata;
 
-    /*    sd->framesize = sd->vob->im_v_width * MAX_PLANES * 
+    /*    sd->framesize = sd->vob->im_v_width * MAX_PLANES *
           sizeof(char) * 2 * sd->vob->im_v_height * 2;     */
-    sd->framesize = sd->vob->im_v_size;    
-    sd->prev = tc_zalloc(sd->framesize);    
+    sd->framesize = sd->vob->im_v_size;
+    sd->prev = tc_zalloc(sd->framesize);
     if (!sd->prev) {
         tc_log_error(MOD_NAME, "malloc failed");
         return TC_ERROR;
 
     sd->hasSeenOneFrame = 0;
     sd->transs = 0;
-    
+
     // Options
-    sd->stepsize   = 6;
+    sd->stepsize   = 4;
     sd->allowmax   = 0;
     sd->result = tc_malloc(TC_BUF_LINE);
     filenamecopy = tc_strdup(sd->vob->video_in_file);
 //    sd->field_num   = 64;
     sd->accuracy    = 4;
     sd->shakiness   = 4;
-    sd->field_size  = TC_MIN(sd->width, sd->height)/12;
+    sd->field_size  = 32; // defined below
     sd->show        = 0;
-    sd->contrast_threshold = 0.3; 
+    sd->contrast_threshold = 0.3;
     sd->maxanglevariation = 1;
-    
-    if (options != NULL) {            
-        // for some reason this plugin is called in the old fashion 
+
+    if (options != NULL) {
+        // for some reason this plugin is called in the old fashion
         //  (not with inspect). Anyway we support both ways of getting help.
         if(optstr_lookup(options, "help")) {
             tc_log_info(MOD_NAME,stabilize_help);
     sd->shakiness = TC_MIN(10,TC_MAX(1,sd->shakiness));
     sd->accuracy  = TC_MIN(15,TC_MAX(1,sd->accuracy));
     if(sd->accuracy < sd->shakiness/2){
-        tc_log_info(MOD_NAME, "accuracy should not be lower than shakiness/2");
-        sd->accuracy = sd->shakiness/2; 
+        tc_log_info(MOD_NAME, "accuracy should not be lower than shakiness/2 - fixed");
+        sd->accuracy = sd->shakiness/2;
     }
-    
+    if (sd->accuracy > 9 && sd->stepsize > 4) {
+        tc_log_info(MOD_NAME, "for high accuracy use lower stepsize - set to 4 now");
+        sd->stepsize = 4;
+    }
+
     if (verbose) {
         tc_log_info(MOD_NAME, "Image Stabilization Settings:");
         tc_log_info(MOD_NAME, "     shakiness = %d", sd->shakiness);
         tc_log_info(MOD_NAME, "        result = %s", sd->result);
     }
 
-    // shift and size: shakiness 1: height/40; 10: height/4
-    sd->maxshift     = TC_MAX(4,(TC_MIN(sd->width, sd->height)*sd->shakiness)/40);
-    sd->field_size   = TC_MAX(4,(TC_MIN(sd->width, sd->height)*sd->shakiness)/40);
-  
-    tc_log_info(MOD_NAME, "Fieldsize: %i, Maximal translation: %i pixel", 
+    // shift: shakiness 1: height/40; 10: height/4 
+    int minDimension = TC_MIN(sd->width, sd->height);
+    sd->maxshift = TC_MAX(4, (minDimension * sd->shakiness)/40);
+    // size: shakiness 1: height/40; 10: height/6 (clipped) 
+    sd->field_size
+        = TC_MAX(4, TC_MIN(minDimension/6, (minDimension * sd->shakiness)/40));
+
+#if defined(USE_SSE2_CMP) || defined(USE_SSE2_YUV_CONTRAST)
+    //must be multiple of 16 pixels for SSE2
+    sd->field_size   = (sd->field_size / 16 + 1) * 16; 
+#endif
+    tc_log_info(MOD_NAME, "Fieldsize: %i, Maximal translation: %i pixel",
                 sd->field_size, sd->maxshift);
-    if (sd->algo==1) {        
+    if (sd->algo==1) {
         // initialize measurement fields. field_num is set here.
         if (!initFields(sd)) {
             return TC_ERROR;
         tc_log_info(MOD_NAME, "Number of used measurement fields: %i out of %i",
                     sd->maxfields, sd->field_num);
     }
+    
+#ifdef USE_SSE2_CMP
+    tc_log_info(MOD_NAME, "use SSE2 optimizations");   
+#endif
     sd->f = fopen(sd->result, "w");
     if (sd->f == NULL) {
         tc_log_error(MOD_NAME, "cannot open result file %s!\n", sd->result);
         return TC_ERROR;
-    }    
+    }
     if (sd->show)
         sd->currcopy = tc_zalloc(sd->framesize);
 
     /* load unsharp filter to smooth the frames. This allows larger stepsize.*/
     char unsharp_param[128];
     int masksize = TC_MIN(13,sd->stepsize*1.8); // only works up to 13.
-    sprintf(unsharp_param,"luma=-1:luma_matrix=%ix%i:pre=1", 
+    sprintf(unsharp_param,"luma=-1:luma_matrix=%ix%i:pre=1",
             masksize, masksize);
     if (!tc_filter_add("unsharp", unsharp_param)) {
         tc_log_warn(MOD_NAME, "cannot load unsharp filter!");
  * See tcmodule-data.h for function details.
  */
 
-static int stabilize_filter_video(TCModuleInstance *self, 
+static int stabilize_filter_video(TCModuleInstance *self,
                                   vframe_list_t *frame)
 {
     StabData *sd = NULL;
-  
+
     TC_MODULE_SELF_CHECK(self, "filter_video");
     TC_MODULE_SELF_CHECK(frame, "filter_video");
-  
-    sd = self->userdata;    
+
+    sd = self->userdata;
 
     if(sd->show)  // save the buffer to restore at the end for prev
         memcpy(sd->currcopy, frame->video_buf, sd->framesize);
-    
+
     if (sd->hasSeenOneFrame) {
         sd->curr = frame->video_buf;
         if (sd->vob->im_v_codec == CODEC_RGB) {
             if (sd->algo == 0)
                 addTrans(sd, calcShiftRGBSimple(sd));
             else if (sd->algo == 1)
-                addTrans(sd, calcTransFields(sd, calcFieldTransRGB, 
+                addTrans(sd, calcTransFields(sd, calcFieldTransRGB,
                                              contrastSubImgRGB));
         } else if (sd->vob->im_v_codec == CODEC_YUV) {
             if (sd->algo == 0)
         sd->hasSeenOneFrame = 1;
         addTrans(sd, null_transform());
     }
-    
+
     if(!sd->show) { // copy current frame to prev for next frame comparison
         memcpy(sd->prev, frame->video_buf, sd->framesize);
     } else { // use the copy because we changed the original frame
         struct iterdata ID;
         ID.counter = 0;
         ID.f       = sd->f;
-        // write parameters as comments to file 
+        // write parameters as comments to file
         fprintf(sd->f, "#      accuracy = %d\n", sd->accuracy);
         fprintf(sd->f, "#     shakiness = %d\n", sd->shakiness);
         fprintf(sd->f, "#      stepsize = %d\n", sd->stepsize);
         fprintf(sd->f, "# Transforms\n#C FrameNr x y alpha zoom extra\n");
         // and all transforms
         tc_list_foreach(sd->transs, stabilize_dump_trans, &ID);
-    
+
         fclose(sd->f);
         sd->f = NULL;
     }
 			     const char *param, const char **value)
 {
     StabData *sd = NULL;
-    
+
     TC_MODULE_SELF_CHECK(self, "inspect");
     TC_MODULE_SELF_CHECK(param, "inspect");
     TC_MODULE_SELF_CHECK(value, "inspect");
         *value = stabilize_help;
     }
     CHECKPARAM("shakiness","shakiness=%d", sd->shakiness);
-    CHECKPARAM("accuracy", "accuracy=%d",  sd->accuracy); 
+    CHECKPARAM("accuracy", "accuracy=%d",  sd->accuracy);
     CHECKPARAM("stepsize", "stepsize=%d",  sd->stepsize);
     CHECKPARAM("allowmax", "allowmax=%d",  sd->allowmax);
     CHECKPARAM("algo",     "algo=%d",      sd->algo);
     return TC_OK;
 }
 
-static const TCCodecID stabilize_codecs_in[] = { 
-    TC_CODEC_YUV420P, TC_CODEC_YUV422P, TC_CODEC_RGB, TC_CODEC_ERROR 
+static const TCCodecID stabilize_codecs_in[] = {
+    TC_CODEC_YUV420P, TC_CODEC_YUV422P, TC_CODEC_RGB, TC_CODEC_ERROR
 };
-static const TCCodecID stabilize_codecs_out[] = { 
-    TC_CODEC_YUV420P, TC_CODEC_YUV422P, TC_CODEC_RGB, TC_CODEC_ERROR 
+static const TCCodecID stabilize_codecs_out[] = {
+    TC_CODEC_YUV420P, TC_CODEC_YUV422P, TC_CODEC_RGB, TC_CODEC_ERROR
 };
-TC_MODULE_FILTER_FORMATS(stabilize); 
+TC_MODULE_FILTER_FORMATS(stabilize);
 
 TC_MODULE_INFO(stabilize);
 

filter/stabilize/filter_transform.c

 /*
  *  filter_transform.c
  *
- *  Copyright (C) Georg Martius - June 2007
+ *  Copyright (C) Georg Martius - 2007 -- 2011
  *   georg dot martius at web dot de  
  *
  *  This file is part of transcode, a video stream processing tool
 */
 
 #define MOD_NAME    "filter_transform.so"
-#define MOD_VERSION "v0.75 (2009-10-31)"
+#define MOD_VERSION "v0.79 (2011-10-12)"
 #define MOD_CAP     "transforms each frame according to transformations\n\
  given in an input file (e.g. translation, rotate) see also filter stabilize"
 #define MOD_AUTHOR  "Georg Martius"