Ronald Oussoren avatar Ronald Oussoren committed e135a8f

Some more work scanframeworks. We're getting closer to a working version, but
are not there yet (although the script should be perfectly useable for most
frameworks, it just not good enough yet to fully wrap Foundation and AppKit)

- Scan dependent frameworks for types
- Add a hinting facility
- Add hints for Foundation
- Add a script that tells the differences between the old and new wrappers
(first try, its really trivial)
- Recognize 'typedef struct _Foo Foo' and wrap pointers to those as handles
- Various bugfixes
- Drop 'enum_generator.py': it is not relevant for scanframework

Comments (0)

Files changed (5)

sandbox/parsing/Foundation_hints.py

+#
+# Scanframework hints for the framework 'Foundation'
+#
+# See scanframework.py for a description of the contents of this file.
+#
+ADDITIONAL_IMPORTS=(
+    '_Foundation',
+)
+
+_ARGUMENT_KINDS={
+    ('NSGetSizeAndAlignment', 1):   'objc._C_OUT',
+    ('NSGetSizeAndAlignment', 2):   'objc._C_OUT',
+    ('NSDivideRect', 1):            'objc._C_OUT',
+    ('NSDivideRect', 2):            'objc._C_OUT',
+    ('NSJavaClassesFromPath', 3):   'objc._C_OUT',
+    ('NSJavaClassesForBundle', 2):  'objc._C_OUT',
+}
+
+def argument_kind(name, idx, nm, tp):
+    try:
+        return _ARGUMENT_KINDS[(name, idx)]
+    except KeyError:
+        if name.startswith('NSDecimal'):
+            return 'objc._C_OUT'
+        return None
+
+def should_create_struct_wrapper(name, fieldnames, fieldtypes):
+    if name == 'NSDecimal':
+        # NSDecimal is wrapped using a custom wrapper, to give
+        # it a full set of numeric methods.
+        return False
+
+    return True
+
+
+# Items that should not be wrapped. The key is the C object that shouldn't
+# be wrapped, the value should describe why it isn't wrapped.
+IGNORES={
+    'NSLogv': 'Varargs function, manually wrapped',
+    'NSLog':  'Varargs function, manually wrapped',
+
+    # If these turn out to be useful after all we should write a custom
+    # buffer-like type to represent memory.
+    'NSZoneFromPointer': 'Memory management, not useful in Python',
+    'NSZoneCalloc':      'Memory management, not useful in Python',
+    'NSZoneMalloc':      'Memory management, not useful in Python',
+    'NSZoneRealloc':     'Memory management, not useful in Python',
+    'NSZoneFree':        'Memory management, not useful in Python',
+    'NSAllocateMemoryPages':   'More memory management',
+    'NSDeallocateMemoryPages': 'More memory management',
+    'NSCopyMemoryPages':       'More memory management',
+    'NSAllocateCollectable':    'Yet more memory management',
+    'NSReallocateCollectable':  'Yet more memory management',
+
+    # Low-level exception handing, these are not useful in python
+    'NSGetUncaughtExceptionHandler': 'Low-level exception handling',
+    'NSSetUncaughtExceptionHandler': 'Low-level exception handling',
+
+    # Hashtables use callback functions and are not used in the highlevel
+    # API's. 
+    'NSCreateHashTableWithZone':            'NSHashtable are like dicts',
+    'NSCreateHashTable':                    'NSHashtable are like dicts',
+    'NSHashGet':                            'NSHashtable are like dicts',
+    'NSHashInsert':                         'NSHashtable are like dicts',
+    'NSHashInsertKnownAbsent':              'NSHashtable are like dicts',
+    'NSHashInsertIfAbsent':                 'NSHashtable are like dicts',
+    'NSHashRemove':                         'NSHashtable are like dicts',
+    'NSEnumerateHashTable':                 'NSHashtable are like dicts',
+    'NSNextHashEnumeratorItem':             'NSHashtable are like dicts',
+    'NSEndHashTableEnumeration':            'NSHashtable are like dicts',
+    'NSIntHashCallBacks':                   'NSHashtable are like dicts',
+    'NSNonOwnedPointerHashCallBacks':       'NSHashtable are like dicts',
+    'NSNonRetainedObjectHashCallBacks':     'NSHashtable are like dicts',
+    'NSObjectHashCallBacks':                'NSHashtable are like dicts',
+    'NSOwnedObjectIdentityHashCallBacks':   'NSHashtable are like dicts',
+    'NSOwnedPointerHashCallBacks':          'NSHashtable are like dicts',
+    'NSPointerToStructHashCallBacks':       'NSHashtable are like dicts',
+
+    # Maptables use callback functions and are not used in the highlevel
+    # API's.
+    'NSCreateMapTableWithZone':             'NSMaptables are like dicts',
+    'NSCreateMapTable':                     'NSMaptables are like dicts',
+    'NSMapMember':                          'NSMaptables are like dicts',
+    'NSMapGet':                             'NSMaptables are like dicts',
+    'NSMapInsert':                          'NSMaptables are like dicts',
+    'NSMapInsertKnownAbsent':               'NSMaptables are like dicts',
+    'NSMapInsertIfAbsent':                  'NSMaptables are like dicts',
+    'NSMapRemove':                          'NSMaptables are like dicts',
+    'NSEnumerateMapTable':                  'NSMaptables are like dicts',
+    'NSNextMapEnumeratorPair':              'NSMaptables are like dicts',
+    'NSEndMapTableEnumeration':             'NSMaptables are like dicts',
+    'NSIntMapKeyCallBacks':                 'NSMaptables are like dicts',
+    'NSNonOwnedPointerMapKeyCallBacks':     'NSMaptables are like dicts',
+    'NSNonOwnedPointerOrNullMapKeyCallBacks': 'NSMaptables are like dicts',
+    'NSNonRetainedObjectMapKeyCallBacks':   'NSMaptables are like dicts',
+    'NSObjectMapKeyCallBacks':              'NSMaptables are like dicts',
+    'NSOwnedPointerMapKeyCallBacks':        'NSMaptables are like dicts',
+    'NSIntMapValueCallBacks':               'NSMaptables are like dicts',
+    'NSNonOwnedPointerMapValueCallBacks':   'NSMaptables are like dicts',
+    'NSNonRetainedObjectMapValueCallBacks': 'NSMaptables are like dicts',
+    'NSOwnedPointerMapValueCallBacks':      'NSMaptables are like dicts',
+}

sandbox/parsing/diffWrappers.py

+#!/usr/bin/env python
+"""
+Script for checking if scanframeworks creates wrappers for at least as much
+stuff as the old scripts.
+"""
+import sys
+
+if len(sys.argv) != 3:
+    print "Usage: diffWrappers.py Foundation.old Foundation.new"
+    sys.exit(1)
+
+old = __import__(sys.argv[1])
+new = __import__(sys.argv[2])
+
+for k, v_old in old.__dict__.iteritems():
+    if not hasattr(new, k):
+        print "In OLD but not in NEW: %s (%r)" % (k, v_old)
+        continue
+
+    v_new = getattr(new, k)
+
+    if isinstance(v_old, (str, int, unicode)):
+        if v_old != v_new:
+            print v_new
+            print "OLD != NEW for %s (%r != %r)" % (k, v_old, v_new)
+
+    elif callable(v_old):
+        if not callable(v_new):
+            print "OLD is callable, NEW isn't for %s (%r, %r)" % (
+                    k, v_old, v_new)
+
+    elif callable(v_new):
+        if not callable(v_old):
+            print "OLD isn't callable, NEW is for %s (%r, %r)" % (
+                    k, v_old, v_new)
+
+    else:
+        print k, type(v_old), type(v_new)
+
+for k, v in new.__dict__.iteritems():
+    if not hasattr(old, k):
+        print "Not in OLD: %s (%r)" % (k, v)

sandbox/parsing/enum_generator.py

-from pyparsing import *
-from itertools import *
-import re
-try:
-    set
-except NameError:
-    from sets import Set as set
-
-_enum_parser = None
-def enum_parser():
-    global _enum_parser
-    if _enum_parser is not None:
-        return _enum_parser
-
-    singleLineComment = "//" + restOfLine
-    anyComment = singleLineComment | cStyleComment
-
-    ident = Word(alphas + "_", alphanums + "_")
-
-    anyInteger = Word( '+-'+nums, nums)
-    anyDecimal = Combine( anyInteger
-        + Optional( Literal('.') + Optional( Word( nums ) ) )
-        + Optional( CaselessLiteral('e') + anyInteger )
-        + Optional( CaselessLiteral('f') )
-    )
-    anyHex = Literal('0x') + Word(nums + 'abcdefABCDEF')
-    anyOctal = Word('0', '01234567', min=2)
-    anyNumber = anyOctal | anyDecimal | anyHex
-    
-    lineContinuation = Literal("\\") + LineEnd()
-
-    KEYWORDS = "extern volatile void const typedef static const"
-    defineValue = (
-        ~oneOf(KEYWORDS) +
-        ( ident | anyNumber | quotedString )
-    )
-    validDefineValue = ( defineValue
-        | Combine( Literal('(')
-            + defineValue
-            + Literal(')')
-            , adjacent=False )
-    )
-    define = Combine( Literal("#")
-        + Literal("define")
-        + ident.setResultsName("define")
-        + validDefineValue
-        + LineEnd()
-        , adjacent=False
-    ).setName('define').setResultsName('DEFINE')
-    
-    enumValue = ZeroOrMore( CharsNotIn(',}') ).setName('enumValue')
-    enumValue.ignore( anyComment )
-
-    enumPair = ident + Suppress( enumValue )
-    enumPair.ignore( anyComment )
-    
-    enum = Combine( Optional( Literal("typedef") )
-        + Literal("enum")
-        + Optional( ident )
-        + Literal("{")
-        + delimitedList( enumPair ).setResultsName("enum")
-        + Literal("}")
-        , adjacent=False
-    ).setName('enum').setResultsName('ENUM')
-    
-    bnf = define | enum
-    bnf.ignore( anyComment )
-    bnf.ignore( lineContinuation )
-    _enum_parser = bnf
-    return _enum_parser
-    
-
-HEADERSTUB = r"""
-#import <Foundation/Foundation.h>
-#import <Python/Python.h>
-#import <stdio.h>
-#import <unistd.h>
-
-#define _C_ID       '@'
-#define _C_CLASS    '#'
-#define _C_SEL      ':'
-#define _C_CHR      'c'
-#define _C_UCHR     'C'
-#define _C_SHT      's'
-#define _C_USHT     'S'
-#define _C_INT      'i'
-#define _C_UINT     'I'
-#define _C_LNG      'l'
-#define _C_ULNG     'L'
-#define _C_LNG_LNG  'q'
-#define _C_ULNG_LNG 'Q'
-#define _C_FLT      'f'
-#define _C_DBL      'd'
-#define _C_BFLD     'b'
-#define _C_VOID     'v'
-#define _C_UNDEF    '?'
-#define _C_PTR      '^'
-#define _C_CHARPTR  '*'
-#define _C_ATOM     '%'
-#define _C_ARY_B    '['
-#define _C_ARY_E    ']'
-#define _C_UNION_B  '('
-#define _C_UNION_E  ')'
-#define _C_STRUCT_B '{'
-#define _C_STRUCT_E '}'
-#define _C_VECTOR   '!'
-
-static void printPythonRepresentation(char *name, char *typecode, void *data) {
-    PyObject *obj = NULL;
-    int typelen = strlen(typecode);
-    if (typecode[1] == '\0') {
-        switch (typecode[0]) {
-            case _C_INT:
-            case _C_LNG:
-                obj = PyInt_FromLong(*((long *)data));
-                break;
-            case _C_UINT:
-            case _C_ULNG:
-                obj = PyLong_FromUnsignedLong(*((unsigned long *)data));
-                break;
-            case _C_LNG_LNG:
-                obj = PyLong_FromLongLong(*((long long *)data));
-                break;
-            case _C_ULNG_LNG:
-                obj = PyLong_FromUnsignedLongLong(*((unsigned long long *)data));
-                break;
-            case _C_FLT:
-                obj = PyFloat_FromDouble((double)(*(float *)data));
-                break;
-            case _C_DBL:
-                obj = PyFloat_FromDouble(*((double *)data));
-                break;
-        }
-    } else {
-        if (typelen > 3 && typecode[0] == '[' && typecode[typelen-1] == ']' && typecode[typelen-2] == 'c') {
-            obj = PyString_FromString((const char *)(*((char **)data)));
-        }
-    }
-    if (obj == NULL) { 
-        printf("# unrecognized type code for %s : \"%s\"\n", name, typecode);
-    } else { 
-        PyObject *repr = PyObject_Repr(obj); 
-        Py_DECREF(obj); 
-        if (repr == NULL) { 
-            printf("# exception during repr for %s\n", name); 
-        } else { 
-            printf("%s = %s\n", name, PyString_AsString(repr)); 
-            Py_DECREF(repr); 
-        } 
-    } 
-}
-#define PRINT_DECLARATION(name) do { \
-    typeof(name) name##copy = name; \
-    printPythonRepresentation(#name, @encode(typeof(name)), (void *)&(name##copy)); \
-} while (0)
-
-"""
-
-MAINBEGIN = """
-int main(int argc, char **argv)
-{
-    Py_Initialize();
-"""
-
-MAINEND = """
-    Py_Finalize();
-    return 1;
-}
-"""
-
-def emit_enum(names):
-    for name in names:
-        yield "    PRINT_DECLARATION(%s);" % (name,)
-
-def emit_define(name):
-    yield "#if defined(%s)" % (name,)
-    yield "    PRINT_DECLARATION(%s);" % (name,)
-    yield "#endif"
-
-def scanForImports(fn, framework):
-    data = file(fn).read()
-    return re.compile(r'#\s*import\s+<' + framework + r'/([^>]+)>').findall(data)
-
-import os
-import sys
-import glob
-
-def codeGeneratorForFramework(frameworkname, basepath, fileobj=None):
-    if fileobj is None:
-        fileobj = sys.stdout
-    if fileobj == sys.stdout:
-        flush = fileobj.flush
-    else:
-        flush = lambda: None
-    frameworkpath = os.path.join(basepath, frameworkname + '.framework', 'Headers')
-    files = [frameworkname + '.h']
-    processed = []
-    while files:
-        fn = files.pop(0)
-        if fn in processed:
-            continue
-        processed.append(fn)
-        files.extend(scanForImports(os.path.join(frameworkpath, fn), frameworkname))
-
-    print '#import <%s/%s.h>' % (frameworkname, frameworkname)
-    print HEADERSTUB
-    print MAINBEGIN
-    for fn in processed:
-        fn = os.path.join(frameworkpath, fn)
-        print r'    printf("# %s\n");' % (fn.replace('"', r'\"'),)
-        sys.stdout.flush()
-        for obj, s, e in enum_parser().scanString(file(fn).read()):
-            print r'    printf("# POS(%s:%s)\n");' % (s, e)
-            names = ()
-            if obj.ENUM:
-                emitter = emit_enum(obj.ENUM.enum)
-            elif obj.DEFINE:
-                emitter = emit_define(obj.DEFINE.define)
-            for line in emitter:
-                print line
-            sys.stdout.flush()
-    print MAINEND
-
-if __name__ == '__main__':
-    codeGeneratorForFramework('Foundation', '/System/Library/Frameworks')

sandbox/parsing/scanframework.py

 # TODO
 # - documentation
-# - scan other frameworks for type definitions
-#   (fall back to compiling test programs if all else fails)
+# - scan headers outsite of frameworks (/usr/include/foo.h)
 # - generate wrappers for 'static inline' functions
 # - parse protocols and categories -> generate informal_protocol definitions
 # - deal with embedded frameworks
-# - add hinting facility: which definitions should not be wrapped, hints
-#   about input/output arguments, ... . 
 # - compare results of scanframeworks with those of the 'old' generator
 #   scripts
 # - test, test and even more testing
+# - refactor script: make it possible to create wrappers for multiple 
+#   frameworks in one go, without rescanning headers over and over again.
 # - integrate into build process
+# - Scanframework of CoreFoundation seems to indicate that the tokenizer 
+#   isn't good enough yet (or that I broke it).
+# - How can we wrap CoreFoundation types (e.g. CFURLRef)? How can we 
+#   find CFRef-style types?
 import os
 try:
     set
 from tokenize_header import *
 from macholib.dyld import framework_find
 from itertools import *
+from textwrap import dedent
+
+class Hinter (object):
+    """
+    A hinter can be used to specify additional information about
+    a framework. 
+
+    NOTE: The hinter should only be used to insert information that
+    cannot be deduced from the header files, don't use the hinter to
+    work around limitations in the scanner!
+    """
+    def __init__(self, path):
+        self._globals = {}
+        execfile(path, self._globals)
+    
+        self._ignores = self._globals.get('IGNORES', {})
+        self._ignorefunc = self._globals.get('should_ignore', None)
+
+    def should_ignore(self, name):
+        if name in self._ignores:
+            return True
+        elif self._ignorefunc is not None and self._ignorefunc(name):
+            return True
+
+        return False
+
+    def write_additional_imports(self, fp):
+        """
+        Write additional import statements in the __init__.py. This
+        uses the 'ADDITIONAL_IMPORTS' in the hints file, if that exists.
+        """
+        imports = self._globals.get('ADDITIONAL_IMPORTS', ())
+
+        for imp in imports:
+            print >>fp, dedent("""\
+                    try:
+                        import %(imp)s
+                    except ImportError:
+                        pass
+                    """%locals())
+
+    def argument_kind(self, name, idx, nm, tp):
+        """
+        Determine the kind (in, out or inout) of an argument, using
+        the argument_kind function in the hints file. 
+        
+        The hint function should return None if it doesn't know the kind
+        of the argument.
+        """
+        fun = self._globals.get('argument_kind', None)
+        if fun is None:
+            return None
+
+        return fun(name, idx, nm, tp)
+
+    def should_create_struct_wrapper(self, name, fieldnames, fieldtypes):
+        fun = self._globals.get('should_create_struct_wrapper', None)
+        if fun is None:
+            return None
+
+        return fun(name, fieldnames, fieldtypes)
+
+
+
 
 def update_fallback(env, framework):
     frameworks = os.path.join(framework, 'Frameworks')
             except ValueError:
                 i = (value, 0)
 
+            if hinter is not None and hinter.should_ignore(name):
+                continue
+
             yield '\n%s = %s' % (name, value)
         elif isinstance(token, EnumBareMember):
             name = token['name']
             if isinstance(i, tuple):
                 i = (i[0], i[1] + 1)
+
+                if hinter is not None and hinter.should_ignore(name):
+                    continue
+
                 yield '\n%s = %s + %s'%(name, i[0], i[1])
 
             else:
                 i += 1
                 value = i
+
+                if hinter is not None and hinter.should_ignore(name):
+                    continue
+
                 yield '\n%s = %s' % (name, value)
         elif isinstance(token, (EnumEnd, NamedEnumEnd)):
             yield '\n'
         elif isinstance(token, SingleLineComment):
             yield ' # ' + token['comment']
 
-def do_struct(token, types, structs):
+def do_opaque_struct(token, types, opaque_pointers, hinter):
+    name = token['name']
+    tag = token['label']
+    indirect = token['indirection'].strip() == '*'
+
+    if hinter is not None and hinter.should_ignore(name):
+        return
+
+    encoded = 'objc._C_PTR + objc._C_STRUCT_B + %r + objc._C_STRUCT_E'%(tag,)
+    types[name +  '*'] = encoded
+
+    if not indirect:
+        name += 'Ptr'
+
+    opaque_pointers.append(
+        '%(name)s = objc.createOpaquePointerType(%(name)r, %(encoded)r, "typedef struct %(tag)s* %(name)s")\n'% locals() )
+
+
+def do_struct(token, types, structs, hinter):
+    # Skip structs containing function pointers, those cannot be
+    # wrapped automaticly (yet?)
+    if contains_instances_of(token.matches(), FunctionStructMember): return
+
     externalname = token.matches()[-1]['name']
+
+    if hinter is not None and hinter.should_ignore(externalname):
+        return
+
     structname = token['structname']
     body = token.matches()[:-1]
     fieldnames = []
+    fields = []
     elems = [
         'objc._C_STRUCT_B',
     ]
         elems.append('"="')
         for tk in body:
             if isinstance(tk, StructMember):
-                tp = normalize_whitespace(tk['type'])
+                tp = normalize_typename(tk['type'])
                 nm = tk['name']
                 nm = [ n.strip() for n in nm.split(',') ]
                 fieldnames.extend(nm)
+                for x in nm:
+                    fields.append('    %s\t%s;'%(tp, x))
 
                 try:
                     etp = types[tp]
     encoded = "''.join(("+ ', '.join(elems) + ",))"
     types[externalname] = '_%s_encoded'%(externalname,)
 
+    doc = 'typedef struct {\n%s\n} %s;'%('\n'.join(fields), externalname)
+
     structs.append('_%s_encoded = %s\n'%(externalname, encoded))
-    structs.append('%s = objc.createStructType("%s", _%s_encoded, %s, "")\n'%(
-        externalname, externalname, externalname, fieldnames))
 
-def normalize_whitespace(value):
+    if hinter is not None and not hinter.should_create_struct_wrapper(
+            externalname, fieldnames, fields):
+        return
+
+    structs.append('%s = objc.createStructType("%s", _%s_encoded, %s, %r)\n'%(
+        externalname, externalname, externalname, fieldnames, doc))
+
+def normalize_typename(value):
     value = value.strip().replace('\t', ' ')
     while '  ' in value:
         value = value.replace('  ', ' ')
+    value = value.replace(' *', '*')
     return value
 
-def do_function(token, types, functions):
-    returns = normalize_whitespace(token['returns'])
+def do_function(token, types, functions, hinter=None):
+    returns = normalize_typename(token['returns'])
     name = token['name'].strip()
 
+    # Ignore  private functions
+    if name.startswith('_'): return
+
+    if hinter is not None and hinter.should_ignore(name):
+        return
+
     if isinstance(token, ExportVoidFunction):
         if returns in types:
             functions.append("\n        (%r, %s, '%s %s(void)'),"%(
                     name, 
                     types[returns],
-                    returns.replace(' *', '*'), name))
+                    returns, name))
         else:
             print "Ignore function %s (retval) %r"%(name, returns)
         return
 
+    if contains_instances_of(token.matches(), FunctionElipsisParameter):
+        print "Ignore varargs function %s"%(name,)
+        return
+
     if returns not in types and returns != 'void':
         print "Ignore function %s (retval) %r"%(name, returns)
         return
 
     arglist = []
     if returns == 'void':
-        argtypes = ['_objc._C_VOID',]
+        argtypes = ['objc._C_VOID',]
     else:
         argtypes = [types[returns],]
 
-    for arg in token.matches()[:-1]:
-        tp = normalize_whitespace(arg['type'])
+    for idx, arg in enumerate(token.matches()[:-1]):
+        tp = normalize_typename(arg['type'])
         nm = arg['name']
         if nm is None:
-            arglist.append('%s'%(tp.replace(' *', '*'),))
+            arglist.append('%s'%(tp,))
         else:
             nm = nm.strip()
-            arglist.append('%s %s'%(tp.replace(' *', '*'), nm))
-        if tp not in types:
+            arglist.append('%s %s'%(tp, nm))
+
+        if tp in types:
+            encoded = types[tp]
+        elif tp[-1] == '*':
+            kind = None
+            if tp.startswith('const '):
+                kind = 'objc._C_IN'
+                c = tp[5:].strip()
+            else:
+                c = tp
+
+
+            ptr = 0
+            while c[-1] == '*':
+                c = c[:-1]
+                ptr += 1
+            if c not in types:
+                print 'Ignore function %s (arg) nm: %r, tp: %r'%(name, nm, tp)
+                return
+            
+            # XXX: we really need a hinting mechanism here: someone needs to
+            # tell us if the argument is an input or output argument.
+            if kind is None:
+                if hinter is not None:
+                    kind = hinter.argument_kind(name, idx, nm, tp)
+                    if kind is None:
+                        print 'Ignore function %s (arg) nm: %r, tp: %r'%(
+                            name, nm, tp)
+                        return
+                else:
+                    print 'Ignore function %s (arg) nm: %r, tp: %r'%(
+                        name, nm, tp)
+                    return
+
+            if ptr != 1:
+                print 'Ignore function %s (arg) nm: %r, tp: %r'%(name, nm, tp)
+                return
+                #encoded = kind + ' + (objc._C_PTR * %d ) + '%(ptr,) + types[c]
+            else:
+                encoded = kind + ' + objc._C_PTR + ' + types[c]
+        else:
             print 'Ignore function %s (arg) nm: %r, tp: %r'%(name, nm, tp)
             return
-        argtypes.append(types[tp])
+
+        argtypes.append(encoded)
 
     encoded = "''.join(("+ ', '.join(argtypes) + ",))"
     functions.append("\n        (%r, %s, '%s %s(%s)'),"%(
-        name, encoded, returns.replace(' *', '*'), name, ', '.join(arglist)))
+        name, encoded, returns, name, ', '.join(arglist)))
 
-def makeInit(framework, out):
+def do_uninteresting_typedef(token, types):
+    if ',' in token['body']: return
+    ptr = 0
+    body = token['body'].split()
+
+    alias = ' '.join(body[:-1])
+    target = body[-1]
+
+    if hinter is not None and hinter.should_ignore(target):
+        return
+
+    while alias[-1] == '*':
+        alias = alias[:-1]
+        ptr += 1
+    while target[0] == '*':
+        target = target[1:]
+        ptr += 1
+    if alias in types:
+        if ptr == 1:
+            prefix = 'objc._C_PTR+'
+        elif ptr > 1:
+            prefix = '(objc._C_PTR*%s)+'%(ptr,)
+        else:
+            prefix = ''
+        types[target] = prefix + types[alias]
+    else:
+        print "Ignore simple typedef: %s"%(target,)
+
+def extractTypes(framework, types, dependencies):
+    """
+    Extract type definitions from a framework. 
+
+    This function only parses type definitions and updates the type table.
+    """
+    f = FrameworkScanner()
+
+    for token in ifilter(None, f.scanframework(framework)):
+        if isinstance(token, NamedEnum):
+            nm = token.matches()[-1]['name'].strip()
+            types[nm] = types['int']
+
+        elif isinstance(token, Dependency):
+            # TODO: also parse system headers
+            dep = os.path.dirname(token.infoTuple()[0])
+            if dep and dep not in dependencies:
+                dependencies.add(dep)
+                extractTypes(dep, types, dependencies)
+
+        elif isinstance(token, NamedStruct):
+            do_struct(token, types, [], None)
+
+        elif isinstance(token, OpaqueNamedStruct):
+            do_opaque_struct(token, types, [], None)
+
+        elif isinstance(token, UninterestingTypedef):
+            do_uninteresting_typedef(token, types)
+
+        elif isinstance(token, (Interface, ForwardClassReference)):
+            types[token['name'] + '*'] = types['id']
+
+def makeInit(framework, out, hinter = None):
     framework_name = filter(None, os.path.split(framework))[0]
     framework_path = unicode(os.path.dirname(framework_find(framework_name)), sys.getfilesystemencoding())
     f = FrameworkScanner()
         'NSString*': 'objc._C_ID',
         'CFStringRef': 'objc._C_ID',
         'SEL': 'objc._C_SEL',
-        'BOOL': 'objc._C_BOOL',
-        'Class': 'objc._C_CLS',
+        'BOOL': 'objc._C_NSBOOL',
+        'bool': 'objc._C_BOOL',
+        'Class': 'objc._C_CLASS',
 
         'char': 'objc._C_CHR',
         'unsigned char': 'objc._C_UCHR',
 
         'float': 'objc._C_FLT',
         'double': 'objc._C_DBL',
+        
+        'char*': 'objc._C_CHARPTR',
     }
+
+    # Some aliases
+    # XXX: need to improve this script, this information can be 
+    # extracted from system headers
+    types['int8_t'] = types['char']
+    types['uint8_t'] = types['unsigned char']
+    types['int16_t'] = types['short']
+    types['uint16_t'] = types['unsigned short']
+    types['int32_t'] = types['long']
+    types['uint32_t'] = types['unsigned long']
+    types['int64_t'] = types['long long']
+    types['uint64_t'] = types['unsigned long long']
+    types['const char*'] = types['char*']
+
     ignores = set([
         CompilerDirective,
         BlockComment,
     imports = []
     structs = []
     functions = []
+    opaque_pointers = []
     for token in ifilter(None, f.scanframework(framework)):
         if isinstance(token, GlobalThing):
+            name = token['name'].strip()
+
+            if hinter and hinter.should_ignore(name):
+                continue
 
             # Skip private variables
-            if token['name'][0] == '_': continue
+            if name[0] == '_': continue
 
-            tp = normalize_whitespace(token['type'])
+            tp = normalize_typename(token['type'])
             if tp not in types:
                 print 'ignore', token
                 continue
 
-            globthings.append('\n        (%r, %s),' % (unicode(token['name']), types[tp]))
+            globthings.append('\n        (%r, %s),' %(unicode(name), types[tp]))
         elif isinstance(token, (Enum, NamedEnum)):
             if isinstance(token, NamedEnum):
                 nm = token.matches()[-1]['name'].strip()
             dep = os.path.dirname(token.infoTuple()[0])
             if dep and dep not in dependencies:
                 dependencies.add(dep)
+                extractTypes(dep, types, dependencies)
                 imports.append('\ntry:\n    from %s import *\nexcept ImportError:\n    pass\n' % (dep,))
         elif isinstance(token, SimpleDefine):
+            if token['name'].startswith('_'): continue
+            if token['name'] in ('NULL',): continue
+            if token['value'].startswith('AVAILABLE_MAC_OS_X_VERSION_'): continue
+            if token['value'] in ('SHRT_MAX',): continue # XXX
             simple_defines.append('%s=%s\n'%(token['name'], token['value']))
 
         
         elif isinstance(token, NamedStruct):
+            do_struct(token, types, structs, hinter)
 
-            # Skip structs containing function pointers, those cannot be
-            # wrapped automaticly (yet?)
-            if contains_instances_of(token.matches(), FunctionStructMember): continue
-
-            do_struct(token, types, structs)
+        elif isinstance(token, OpaqueNamedStruct):
+            do_opaque_struct(token, types, opaque_pointers, hinter)
 
         elif isinstance(token, UninterestingTypedef):
-            ptr = 0
-            body = token['body'].split()
-            if len(body) == 2:
-                alias, target = body
-                while alias[-1] == '*':
-                    alias = alias[:-1]
-                    ptr += 1
-                while target[0] == '*':
-                    target = target[1:]
-                    ptr += 1
-                if alias in types:
-                    if ptr == 1:
-                        prefix = 'objc._C_PTR+'
-                    elif ptr > 1:
-                        prefix = '(objc._C_PTR*%s)+'%(ptr,)
-                    else:
-                        prefix = ''
-                    types[target] = prefix + types[alias]
-
+            do_uninteresting_typedef(token, types)
 
         elif isinstance(token, Protocol):
             # TODO: generate informal-protocol definition. 
         elif isinstance(token, (Interface, ForwardClassReference)):
             # Class definition: make the type known to the type table
             types[token['name'] + '*'] = types['id']
-            types[token['name'] + ' *'] = types['id']
 
         elif isinstance(token, (ExportFunction, ExportVoidFunction)):
-            do_function(token, types, functions)
+            do_function(token, types, functions, hinter)
 
         elif isinstance(token, StaticInlineFunction):
             # TODO: emit wrapper inside a C file.
                 import pdb
                 pdb.Pdb().set_trace()
     print >>out, '# Imports'
+    out.write('import objc\n')
     out.writelines(imports)
+    if hinter is not None:
+        hinter.write_additional_imports(out)
+        
     if enums:
         print >>out, '\n# Enumerations'
         out.writelines(enums)
         print >>out, '\n# struct definitions'
         out.writelines(structs)
 
+    if opaque_pointers:
+        print >>out, '\n# opaque pointers'
+        out.writelines(opaque_pointers)
+
     bundle_variables = ''.join(globthings)
     bundle_functions = ''.join(functions)
     print >>out, """
 
 def _initialize():
-    from Foundation import NSBundle
     import objc
     p = objc.pathForFramework(%(framework_path)r)
     objc.loadBundle(%(framework_name)r, globals(), bundle_path=p)
 
     if bundle_variables or bundle_functions:
         print >>out, """\
-    b = NSBundle.bundleWithPath_(p)
+    b = objc.lookUpClass('NSBundle').bundleWithPath_(p)
 """
 
     if bundle_variables:
 """ % locals()
     print >>out, """
 _initialize()
+del objc
 """
 
-def makeWrapper(fmwk):
+def makeWrapper(fmwk, hinter):
     try:
         os.makedirs(fmwk)
     except OSError:
         pass
-    makeInit(fmwk, file(os.path.join(fmwk, '__init__.py'), 'w'))
+    makeInit(fmwk, file(os.path.join(fmwk, '__init__.py'), 'w'), hinter)
 
 if __name__ == '__main__':
-    fmwk = (sys.argv[1:] or ['PreferencePanes'])[0]
-    makeWrapper(fmwk)
+    if len(sys.argv) == 1:
+        fmwk = 'PreferencePanes'
+        hinter = None
+    elif len(sys.argv) == 2:
+        fmwk = sys.argv[1]
+        hinter = None
+    elif len(sys.argv) == 3:
+        fmwk = sys.argv[1]
+        hinter = Hinter(sys.argv[2])
+    else:
+        print >> sys.stderr, "Usage: scanframeworks [Framework [hinter]]"
+        sys.exit(1)
+
+    makeWrapper(fmwk, hinter)

sandbox/parsing/tokenize_header.py

 SUBPATTERNS = dict(
     AVAILABLE=r'([A-Z][A-Z0-9_]+)',
     PROTOCOLS=r'(<[^>]+>)',
-    KEYWORD=r'((double|float|int|unsigned|long|char|extern|volatile|void|inline|__(\w+?)__|const|typedef|static|const))',
-    IDENTIFIER=r'([A-Za-z_]\w*)',
+    TYPE_KEYWORD=r'((unsigned|long|char|volatile|inline|__(\w+?)__|const))',
+    KEYWORD=r'((double|float|int|unsigned|long|char|extern|volatile|void|inline|__(\w+?)__|const|typedef|static))',
+    IDENTIFIER=r'((?!const|volatile)[A-Za-z_]\w*)',
     SIZEOF=r'(sizeof\(([^)]+)\))',
     DECIMAL=r'([+\-]?((\.\d+)|(\d+(\.\d*)?))([eE]\d+)?[fF]?)',
     INTEGER=r'([+\-]?\d+[uU]?[lL]?)',
     HEX=r'(0[xX][0-9a-fA-F]+[lL]?)',
     EXTERN=r'((([A-Z-a-z_]\w*?_)?(EXTERN|EXPORT)|extern))',
     EXPORT=r'((([A-Z-a-z_]\w*?_)?(EXPORT|EXTERN)|extern))',
-    STATIC_INLINE=r'((([A-Z-a-z_]\w*?_)?INLINE|static\sinline|static\s__inline__))',
+    STATIC_INLINE=r'((([A-Z-a-z_]\w*?_)?INLINE|static\s+inline|static\s+__inline__))',
     BRACES=r'(([^\n}]*|([^}][^\n]*\n)*))',
     INDIRECTION=r'(\s*\*)',
     BOL=r'(\s*^\s*)',
     pattern = pattern(r'//(?P<comment>[^\n]*)(\n|$)')
     example = example(r'// this is a single line comment')
 
+class OpaqueNamedStruct (Token):
+    pattern = pattern(r'''
+    typedef\s+struct\s+
+    (?P<label>%(IDENTIFIER)s)\s*
+    (?P<indirection>(\s|\*))
+    \s*
+    (?P<name>%(IDENTIFIER)s)\s*
+    ;
+    ''')
+    
+    example = example('''
+    typedef struct _Foo Foo;
+    typedef struct _Bar *BarRef;
+    ''')
+
 class UninterestingTypedef(Token):
     pattern = pattern(r'''
     typedef
 class FunctionParameter(Token):
     pattern = pattern(r'''
     \s*(?P<type>
-        (%(KEYWORD)s\s*)*
+        (%(KEYWORD)s\s+)*
         %(IDENTIFIER)s\s*
         (%(INDIRECTION)s|\s+%(KEYWORD)s)*
         \s*
         (?<=\*|\s)
     )\s*
-    (?P<name>%(IDENTIFIER)s\s*)?\s*,?\s*
+    (?P<name>%(IDENTIFIER)s\s*)?\s*,?
     ''')
     example = example(r'''
     NSString* foo
     NSString *foo, NSString *bar
+    CFDataRef ref
     ''')
 
+class FunctionElipsisParameter (Token):
+    pattern = pattern(r'''\s*\.\.\.\s*''')
+    example = example(r'''...''')
+
 class ExportVoidFunction (Token):
     pattern = pattern(r'''
     %(EXPORT)s?
     \s*(?P<returns>
-        (%(KEYWORD)s(\s+%(KEYWORD)s)*)?
-        %(IDENTIFIER)s
-        (%(INDIRECTION)s|\s+%(KEYWORD)s)*
+        (%(TYPE_KEYWORD)s(\s+%(TYPE_KEYWORD)s)*)??
+        \s*%(IDENTIFIER)s
+        ((\s|\*)(\s*(\*|%(KEYWORD)s))*\s*(?<=\s|\*))?
     )
-    (\s*(?P<protocols>%(PROTOCOLS)s))?
-    \s*(\s%(IDENTIFIER)s\s)?
-    \s*(\/\*.*?\*\/)?
+    (\s*(?P<protocols>%(PROTOCOLS)s)\s*)?
+    \s*(\/\*.*?\*\/)?\s*
+    (\s%(IDENTIFIER)s\s)?
     \s*(?P<name>%(IDENTIFIER)s)
     \s*\(\s*void\s*\)
     (\s*(?P<available>%(AVAILABLE)s))?;''')
     pattern = pattern(r'''
     %(EXPORT)s?
     \s*(?P<returns>
-        (%(KEYWORD)s(\s+%(KEYWORD)s)*)?
-        %(IDENTIFIER)s
-        (%(INDIRECTION)s|\s+%(KEYWORD)s)*
+        (%(TYPE_KEYWORD)s(\s+%(TYPE_KEYWORD)s)*)??
+        \s*%(IDENTIFIER)s
+        ((\s|\*)(\s*(\*|%(KEYWORD)s))*\s*(?<=\s|\*))?
     )
-    (\s*(?P<protocols>%(PROTOCOLS)s))?
-    \s*(\s%(IDENTIFIER)s\s)?
-    \s*(\/\*.*?\*\/)?
+    (\s*(?P<protocols>%(PROTOCOLS)s)\s*)?
+    \s*(\/\*.*?\*\/)?\s*
+    (\s%(IDENTIFIER)s\s)?
     \s*(?P<name>%(IDENTIFIER)s)
     \s*\(
     ''')
-    #(\s*\/\*.*?\*\/\s*)?
     endtoken = FunctionEnd
     lexicon = [
         InsignificantWhitespace,
+        FunctionElipsisParameter,
         FunctionParameter,
     ]
     example = example(r'''
     FOUNDATION_EXPORT SomeResult <NSObject> SomeName(const Foo *, const Foo *Bar);
     FOUNDATION_EXPORT SomeResult **SomeName(const Foo *, const Foo *Bar);
     FOUNDATION_EXPORT SomeResult SomeName(int,float);
+    CFArrayRef /* of SCFoo's */ SCNetworkInterfaceCopyAll    (void) AVALABLE_FOO;
+    CF_EXPORT
+    const UInt8 *CFDataGetBytePtr(CFDataRef theData);
     NPError NP_LOADSS   NP_New(void);
-    CFArrayRef /* of SCFoo's */ SCNetworkInterfaceCopyAll    (void) AVALABLE_FOO;
     ''')
 
 #!# class ExportFunction(Token):
     CPPDecls,
     CPPCrap,
     CompilerDirective,
+    OpaqueNamedStruct,
+    UninterestingTypedef,
     StaticInlineFunction,
     ExportVoidFunction,
     ExportFunction,
-    UninterestingTypedef,
     UninterestingStruct,
 ]
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.