Bob Ippolito avatar Bob Ippolito committed aa62097

Lots of hacking on the scanner, still unfinished.. too many changes to list

Comments (0)

Files changed (3)

sandbox/parsing/CoreFoundation_hints.py

+#
+# Scanframework hints for the framework 'Foundation'
+#
+# See scanframework.py for a description of the contents of this file.
+#
+ADDITIONAL_IMPORTS=(
+    '_CoreFoundation',
+)
+
+_ARGUMENT_KINDS={
+    #('NSGetSizeAndAlignment', 1):   'objc._C_OUT',
+    #('NSGetSizeAndAlignment', 2):   'objc._C_OUT',
+    #('NSDivideRect', 1):            'objc._C_OUT',
+    #('NSDivideRect', 2):            'objc._C_OUT',
+    #('NSJavaClassesFromPath', 3):   'objc._C_OUT',
+    #('NSJavaClassesForBundle', 2):  'objc._C_OUT',
+}
+
+def argument_kind(name, idx, nm, tp):
+    try:
+        return _ARGUMENT_KINDS[(name, idx)]
+    except KeyError:
+        #if name.startswith('NSDecimal'):
+        #    return 'objc._C_OUT'
+        return None
+
+def should_create_struct_wrapper(name, fieldnames, fieldtypes):
+    #if name == 'NSDecimal':
+    #    # NSDecimal is wrapped using a custom wrapper, to give
+    #    # it a full set of numeric methods.
+    #    return False
+
+    return True
+
+
+# Items that should not be wrapped. The key is the C object that shouldn't
+# be wrapped, the value should describe why it isn't wrapped.
+IGNORES={
+    u'kCFAllocatorUseContext': "Not valid for CFTypeID() for some reason",
+}

sandbox/parsing/scanframework.py

     set
 except NameError:
     from sets import Set as set
+import pprint
 import glob
 import sys
 from tokenize_header import *
 from macholib.dyld import framework_find
 from itertools import *
 from textwrap import dedent
+from itertools import imap
+import re
+
+# From searching Xcode for toll free
+TOLL_FREE = [
+    # NSObject
+    'CFType',
+    # NSArray
+    'CFArray', 'CFMutableArray',
+    # NSCharacterSet
+    'CFCharacterSet', 'CFMutableCharacterSet',
+    # NSData
+    'CFData', 'CFMutableData',
+    # NSDate
+    'CFDate',
+    # NSDictionary
+    'CFDictionary', 'CFMutableDictionary'
+    # NSTimer
+    'CFRunLoopTimer',
+    # NSSet
+    'CFSet', 'CFMutableSet',
+    # NSString
+    'CFString', 'CFMutableString',
+    # NSURL
+    'CFURL',
+    # NSTimeZone
+    'CFTimeZone',
+    # NSInputStream, NSOutputStream
+    'CFReadStream', 'CFWriteStream',
+    # NSLocale
+    'CFLocale',
+    # NSNumber
+    'CFNumber',
+    # NSAttributedString
+    'CFAttributedString', 'CFMutableAttributedString',
+    # NSNumberFormatter
+    'CFNumberFormatter',
+    # NSTimer
+    'CFRunLoopTimer',
+    # NSCalendar 
+    'CFCalendar',
+    # NSDateFormatter
+    'CFDateFormatter',
+
+    # ABSearchElement, ABRecord
+    'ABSearchElement', 'ABRecord',
+    # ABMutableMultiValue, ABMultiValue
+    'ABMutableMultiValue', 'ABMultiValue'
+    # ABGroup
+    'ABGroup',
+    # ABAddressBook
+    'ABAddressBook',
+    # ABPerson
+    'ABPerson',
+    
+]
+
+class cleanfile(file):
+    def write(self, s):
+        file.write(self, cleanup_text(s))
+
+    def writelines(self, lines):
+        file.writelines(self, imap(cleanup_text, lines))
+
+class typedict(object):
+    def __init__(self):
+        self.dct = {}
+        
+    def update(self, dct):
+        if not isinstance(dct, dict):
+            dct = dict(dct)
+        for k,v in dct.iteritems():
+            self[k] = v
+            
+    def __len__(self):
+        return len(self.dct)
+
+    def __iter__(self):
+        return iter(self.dct)
+
+    def __contains__(self, key):
+        return normalize_type(key) in self.dct
+
+    def __setitem__(self, key, item):
+        key = normalize_type(key)
+        self.dct[key] = item
+        print '[type] %s -> %s' % (key, item)
+
+    def __getitem__(self, key):
+        key = normalize_type(key)
+        return self.dct[key]
 
 class Hinter (object):
     """
     cannot be deduced from the header files, don't use the hinter to
     work around limitations in the scanner!
     """
-    def __init__(self, path):
+    def __init__(self, path, types):
         self._globals = {}
+        self._types = types
         execfile(path, self._globals)
     
         self._ignores = self._globals.get('IGNORES', {})
         self._ignorefunc = self._globals.get('should_ignore', None)
 
+        update_types = self._globals.get('update_types', None)
+        if update_types is not None:
+            update_types(types)
+        types.update(self._globals.get('TYPES', {}))
+
     def should_ignore(self, name):
         if name in self._ignores:
             return True
         elif self._ignorefunc is not None and self._ignorefunc(name):
+            self._ignores[name] = 'ignored by hinter'
             return True
 
         return False
             print
             import pdb
             pdb.Pdb().set_trace()
+        print "scanning:", fn
         return self._scanner.iterscan(file(fn).read(), dead=deadraise)
 
 def contains_instances_of(iterator, types):
 def do_opaque_struct(token, types, opaque_pointers, hinter):
     name = token['name']
     tag = token['label']
-    indirect = token['indirection'].strip() == '*'
+    const = token['const']
+    indirect = normalize_type(token['indirection'])
 
+    if name in types:
+        return
+
+    print '[OPAQUE STRUCT] %(name)s %(label)s %(indirection)s' % token
+ 
     if hinter is not None and hinter.should_ignore(name):
         return
 
-    encoded = 'objc._C_PTR + objc._C_STRUCT_B + %r + objc._C_STRUCT_E'%(tag,)
-    types[name +  '*'] = encoded
+    if const:
+        const = 'objc._C_CONST + '
+    else:
+        const = ''
+    encoded = ('objc._C_PTR + ' * len(indirect)) + const + 'objc._C_STRUCT_B + %r + objc._C_STRUCT_E'%(tag,)
+    types[name] = encoded
 
     if not indirect:
+        encoded = 'objc._C_PTR + ' + encoded
+        types[name+'*'] = encoded
         name += 'Ptr'
 
-    opaque_pointers.append(
-        '%(name)s = objc.createOpaquePointerType(%(name)r, %(encoded)r, "typedef struct %(tag)s* %(name)s")\n'% locals() )
+    # This should be good enough to match CF types but hopefully nothing else
+    # Unfortunately, not all CF types can be used as Objective-C types, so
+    # we wrap them with CF stuff
+    if name.endswith('Ref') and tag.startswith('__') and indirect == '*':
+        opaque_pointers.append(
+            '# %(name)s\nobjc.RegisterCFSignature(%(encoded)s)\n' % locals())
+    else:
+        opaque_pointers.append(
+            '%(name)s = objc.createOpaquePointerType(%(name)r, %(encoded)r, "typedef struct %(tag)s* %(name)s")\n'% locals() )
 
 
 def do_struct(token, types, structs, hinter):
 
     if hinter is not None and hinter.should_ignore(externalname):
         return
-
+    
     structname = token['structname']
     body = token.matches()[:-1]
     fieldnames = []
 
     doc = 'typedef struct {\n%s\n} %s;'%('\n'.join(fields), externalname)
 
-    structs.append('_%s_encoded = %s\n'%(externalname, encoded))
+    structdef = '_%s_encoded = %s\n' % (externalname, encoded)
+    print '[STRUCT]', structdef.strip()
+    structs.append(structdef)
 
     if hinter is not None and not hinter.should_create_struct_wrapper(
             externalname, fieldnames, fields):
+        print '[STRUCT] SKIPPING %s' % (externalname,)
         return
 
     structs.append('%s = objc.createStructType("%s", _%s_encoded, %s, %r)\n'%(
     ptr = 0
     body = token['body'].split()
 
-    alias = ' '.join(body[:-1])
+    alias = normalize_type(' '.join(body[:-1]))
     target = body[-1]
 
+    if target in types:
+        return
+
     if hinter is not None and hinter.should_ignore(target):
         return
 
+   
     while alias[-1] == '*':
         alias = alias[:-1]
         ptr += 1
             prefix = ''
         types[target] = prefix + types[alias]
     else:
-        print "Ignore simple typedef: %s"%(target,)
+        print "Ignore simple typedef: %s -> %s"%(target, alias)
 
 def extractTypes(framework, types, dependencies):
     """
 
     This function only parses type definitions and updates the type table.
     """
+    print "[extractTypes]", framework
     f = FrameworkScanner()
 
     for token in ifilter(None, f.scanframework(framework)):
             do_uninteresting_typedef(token, types)
 
         elif isinstance(token, (Interface, ForwardClassReference)):
-            types[token['name'] + '*'] = types['id']
+            for name in token['name'].split(','):
+                types[name.strip() + '*'] = types['id']
 
-def makeInit(framework, out, hinter = None):
+def makeInit(framework, out, hinter = None, types=None):
     framework_name = filter(None, os.path.split(framework))[0]
     framework_path = unicode(os.path.dirname(framework_find(framework_name)), sys.getfilesystemencoding())
     f = FrameworkScanner()
-    types = {
+    if types is None:
+        types = typedict()
+
+    types.update({
         'id': 'objc._C_ID',
-        'NSString *': 'objc._C_ID',
-        'NSString*': 'objc._C_ID',
-        'CFStringRef': 'objc._C_ID',
+
         'SEL': 'objc._C_SEL',
         'BOOL': 'objc._C_NSBOOL',
         'bool': 'objc._C_BOOL',
         'Class': 'objc._C_CLASS',
 
         'char': 'objc._C_CHR',
+        'signed char': 'objc._C_CHR',
         'unsigned char': 'objc._C_UCHR',
 
         'int': 'objc._C_INT',
         'double': 'objc._C_DBL',
         
         'char*': 'objc._C_CHARPTR',
-    }
+
+        'void': 'objc._C_VOID',
+
+    })
+
+    for name in TOLL_FREE:
+        types[name+'Ref'] = 'objc._C_ID'
+
+    types.update({
+        'OSType': 'objc._C_UINT',
+        'LangCode': 'objc._C_SHT',
+        'RegionCode': 'objc._C_SHT',
+    })
 
     # Some aliases
     # XXX: need to improve this script, this information can be 
     types['uint32_t'] = types['unsigned long']
     types['int64_t'] = types['long long']
     types['uint64_t'] = types['unsigned long long']
-    types['const char*'] = types['char*']
 
     ignores = set([
         CompilerDirective,
         CPPCrap,
 
         # Stuff below here might be interesting
-        UninterestingStruct, 
+        UninterestingStruct,
         MacroDefine,
+        FunctionCallDefine,
     ])
     dependencies = set()
     globthings = []
         elif isinstance(token, Protocol):
             # TODO: generate informal-protocol definition. 
             # Need to enhance the tokenizer for that
+            #
+            # XXX: It can basically be assumed that the
+            #      protocol can be found in the framework
+            #      if the protocol is defined in a header
+            #      so objc.protocolNamed(name) should work.
+            #      Also note that putting them in the namespace
+            #      would be bad because many of them conflict
+            #      with class names.  i.e. NSObject!
             pass
 
         elif isinstance(token, (Interface, ForwardClassReference)):
             # Class definition: make the type known to the type table
-            types[token['name'] + '*'] = types['id']
+            for name in token['name'].split(','):
+                types[name.strip() + '*'] = types['id']
 
         elif isinstance(token, (ExportFunction, ExportVoidFunction)):
             do_function(token, types, functions, hinter)
 del objc
 """
 
-def makeWrapper(fmwk, hinter):
+def makeWrapper(fmwk, hinter, types):
     try:
         os.makedirs(fmwk)
     except OSError:
         pass
-    makeInit(fmwk, file(os.path.join(fmwk, '__init__.py'), 'w'), hinter)
+    makeInit(fmwk, cleanfile(os.path.join(fmwk, '__init__.py'), 'w'), hinter, types)
+    typesfile = file(os.path.join(fmwk, '_types.py'), 'w')
+    typesfile.write('TYPES = ')
+    pprint.pprint(types.dct, typesfile)
+    print >>typesfile, ''
 
 if __name__ == '__main__':
+    types = typedict()
     if len(sys.argv) == 1:
         fmwk = 'PreferencePanes'
         hinter = None
     elif len(sys.argv) == 2:
         fmwk = sys.argv[1]
         hinter = None
-    elif len(sys.argv) == 3:
+    elif len(sys.argv) >= 3:
         fmwk = sys.argv[1]
-        hinter = Hinter(sys.argv[2])
+        # bring in 3rd party types first
+        for fn in sys.argv[3:]:
+            Hinter(fn, types)
+        hinter = Hinter(sys.argv[2], types)
     else:
-        print >> sys.stderr, "Usage: scanframeworks [Framework [hinter]]"
+        print >> sys.stderr, "Usage: scanframeworks [Framework [hinter [typemap...]]]"
         sys.exit(1)
 
-    makeWrapper(fmwk, hinter)
+    makeWrapper(fmwk, hinter, types)

sandbox/parsing/tokenize_header.py

 from scanner import *
 from textwrap import dedent
+import re
 
+TYP_PTR = re.compile(r'\s+\*')
+SPACE_NORMALIZE = re.compile(r'\s+')
+
+#type mapping 'const unsigned char *   ConstStringPtr' -> 'unsigned char* ConstStringPtr'
+#type mapping 'unsigned char           Str255[256]' -> 'unsigned char Str255[256]'
+#type mapping 'const unsigned char *   ConstStr255Param' -> 'unsigned char* ConstStr255Param'
+def normalize_type(typ, _memoize={}):
+    try:
+        return _memoize[typ]
+    except KeyError:
+        pass
+    orig = typ
+    # XXX: Technically const could be turned into objc._C_CONST but
+    #      I can't imagine how that would make a difference.
+    for skip in ('const',):
+        if typ.startswith(skip + ' '):
+            typ = typ[len(skip)+1:]
+        typ = typ.replace(' ' + skip + ' ', ' ')
+    typ = TYP_PTR.sub('*', typ)
+    typ = SPACE_NORMALIZE.sub(' ', typ)
+    typ = typ.strip()
+    #if orig != typ:
+    #    print 'type mapping %r -> %r' % (orig, typ)
+    _memoize[orig] = typ
+    return typ
 
 SUBPATTERNS = dict(
     AVAILABLE=r'([A-Z][A-Z0-9_]+)',
     IDENTIFIER=r'((?!const|volatile)[A-Za-z_]\w*)',
     SIZEOF=r'(sizeof\(([^)]+)\))',
     DECIMAL=r'([+\-]?((\.\d+)|(\d+(\.\d*)?))([eE]\d+)?[fF]?)',
-    INTEGER=r'([+\-]?\d+[uU]?[lL]?)',
+    INTEGER=r'([+\-]?\d+[uU]?[lL]?[lL]?)',
     CHARS=r"('([^\\'\n]|\\')*')",
     STRING=r'("([^\\"\n]|\\")*")',
     CFSTRING=r'(CFSTR\("([^\\"\n]|\\")*"\))',
-    HEX=r'(0[xX][0-9a-fA-F]+[lL]?)',
+    HEX=r'(0[xX][0-9a-fA-F]+[uU]?[lL]?)',
     EXTERN=r'((([A-Z-a-z_]\w*?_)?(EXTERN|EXPORT)|extern))',
     EXPORT=r'((([A-Z-a-z_]\w*?_)?(EXPORT|EXTERN)|extern))',
     STATIC_INLINE=r'((([A-Z-a-z_]\w*?_)?INLINE|static\s+inline|static\s+__inline__))',
     INDIRECTION=r'(\s*\*)',
     BOL=r'(\s*^\s*)',
     EOL=r'(\s*$\n?)',
+    OPERATORS=r'(\||\+|-|\*|&|>>|<<|\^)',
 )
 
+WEIRD_INTEGER = re.compile(r'(0[xX][0-9a-fA-F]+|[0-9]+)[uUlL]+')
+def cleanup_text(s):
+    return WEIRD_INTEGER.sub(r'\1L', s.replace('\t', '        '))
+
 def deadspace(string, begin, end):
     return 'NO MATCH FOR [%d:%d] %r' % (begin, end, string[begin:end])
 
 
 class OpaqueNamedStruct (Token):
     pattern = pattern(r'''
-    typedef\s+struct\s+
+    typedef\s+(?P<const>const\s+)?struct\s+
     (?P<label>%(IDENTIFIER)s)\s*
-    (?P<indirection>(\s|\*))
+    (?P<indirection>(\s|\*)+)
     \s*
     (?P<name>%(IDENTIFIER)s)\s*
     ;
     example = example('#import "Foo/Bar.h"')
 
 class SimpleDefine(Token):
-    # XXX foo << bar
-    # XXX foo | bar | baz
-    # XXX ((type)foo)
+    #XXX ((type)foo)
     pattern = pattern(r'''
     \#\s*define\s*
         (?P<name>%(IDENTIFIER)s)\s+
                     %(CFSTRING)s
                     | %(CHARS)s
                     | %(STRING)s
-                    | %(HEX)s
+                    | %(SIZEOF)s
                     | %(DECIMAL)s
-                    | %(INTEGER)s
-                    | %(SIZEOF)s
-                    | %(IDENTIFIER)s
+                    | (
+                        %(HEX)s
+                        | %(INTEGER)s
+                        | %(IDENTIFIER)s
+                        | %(OPERATORS)s
+                        | [ \t]+
+                      )+
                 )
             )
         \)?
         )
     ''')
     example = example(r'''
+    #define foo bar | baz
+    #define foo bar << 1234
+    #define foo (bar | baz)
+    #define foo (bar | 0xFFF)
     #define foo 'foo!'
     #define foo bar
     #define foo 0x200
     #define foo (8)
     ''')
     
+class FunctionCallDefine(Token):
+    pattern = pattern(r'''
+    \#\s*define\s*
+        (?P<name>%(IDENTIFIER)s)
+        [ \t]*
+        (?P<body>
+            %(IDENTIFIER)s
+            [ \t]*
+            \(
+                [^)]*
+            \)
+            [ \t]*
+        )
+    (\n|$)
+    ''')
+    example = example(r'''
+    #define IUnknownUUID CFUUIDGetConstantUUIDWithBytes(NULL, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46)
+    ''')
+
 class MacroDefine(Token):
     pattern = pattern(r'''
     \#\s*define\s*
     \s*(?P<name>%(IDENTIFIER)s)
     \s*=
     \s*(?P<value>(
-        %(HEX)s
-        | %(INTEGER)s
-        | %(CHARS)s
-        | %(IDENTIFIER)s
+        \(?
+        (
+            %(CHARS)s
+            | (
+                %(HEX)s
+                | %(INTEGER)s
+                | %(IDENTIFIER)s
+                | %(OPERATORS)s
+                | \s+
+              )+
+        )
+        \)?
         ))
     \s*,?
     ''')
     example = example(r'''
+    NSXMLNodePreserveQuotes = (NSXMLNodeUseSingleQuotes | NSXMLNodeUseDoubleQuotes),
+    NSXMLNodePreserveCharacterReferences = 1 << 27,
     Foo = 12,
     Foo = 2
     ''')
     Protocol,
     AngleImport,
     StringImport,
+    FunctionCallDefine,
     SimpleDefine,
     GlobalThing,
     ForwardClassReference,
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.