Source

pyyaml-legacy / yaml / ypath.py

Full commit
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
from types import ListType, StringType, IntType, DictType, InstanceType
import re
from urllib import quote
from timestamp import unquote

noTarget = object()

def escape(node):
    """
        summary: >
            This function escapes a given key so that it
            may appear within a ypath.  URI style escaping
            is used so that ypath expressions can be a 
            valid URI expression.
    """
    typ = type(node)
    if typ is IntType: return str(node)
    if typ is StringType: 
        return quote(node,'')
    raise ValueError("TODO: Support more than just string and integer keys.")

class context:
    """
        summary: >
            A ypath visit context through a YAML rooted graph.
            This is implemented as a 3-tuple including the parent
            node, the current key/index and the value.  This is
            an immutable object so it can be cached.
        properties: 
            key:    mapping key or index within the parent collection
            value:  current value within the parent's range
            parent: the parent context
            root:   the very top of the yaml graph
            path:   a tuple of the domain keys
        notes: >
            The context class doesn't yet handle going down the
            domain side of the tree... 
    """         
    def __init__(self,parent,key,value):
        """
            args:
                parent: parent context (or None if this is the root)
                key:    mapping key or index for this context
                value:  value of current location...
        """
        self.parent = parent
        self.key    = key
        self.value  = value
        if parent: 
            assert parent.__class__ is self.__class__
            self.path = parent.path + (escape(key),)
            self.root = parent.root
        else:      
            assert not key
            self.path = tuple()
            self.root = self
    def __setattr__(self,attname,attval):
        if attname in ('parent','key','value'):
            if self.__dict__.get(attname):
                 raise ValueError("context is read-only")
        self.__dict__[attname] = attval
    def __hash__(self): return hash(self.path)
    def __cmp__(self,other):   
        try:
            return cmp(self.path,other.path)
        except AttributeError:
            return -1
    def __str__(self):
        if self.path:
            return "/".join(('',)+self.path)
        else:
            return '/'

def to_context(target):
    if type(target) is InstanceType:
        if target.__class__ is context:
            return target
    return context(None,None,target)

def context_test():
    lst = ['value']
    map = {'key':lst}
    x = context(None,None,map)
    y = context(x,'key',lst)
    z = context(y,0,'value')
    assert ('key',) == y.path
    assert 'key'    == y.key
    assert lst      == y.value
    assert x        == y.parent
    assert x        == y.root
    assert 0        == z.key
    assert 'value'  == z.value
    assert y        == z.parent
    assert x        == z.root 
    assert hash(x)  
    assert hash(y)
    assert hash(z)
    assert '/' == str(x)
    assert '/key' == str(y)
    assert '/key/0' == str(z)

class null_seg:
    """
        summary: >
            This is the simplest path segment, it
            doesn't return any results and doesn't
            depend upon its context.  It also happens to 
            be the base class which all segments derive.
    """
    def __iter__(self): 
        return self
    def next_null(self):
        raise StopIteration
    def bind(self,cntx):  
        """
            summary: >
                The bind function is called whenever
                the parent context has changed.
        """
        assert(cntx.__class__ is context)
        self.cntx = cntx
    def apply(self,target):
        self.bind(to_context(target))
        return iter(self)
    def exists(self,cntx):
        try:
            self.bind(cntx)
            self.next()
            return 1
        except StopIteration:
            return 0
    next = next_null
 
class self_seg(null_seg):
    """
        summary: >
            This path segment returns the context
            node exactly once.
    """
    def __str__(self): return '.'
    def next_self(self):
        self.next = self.next_null
        return self.cntx
    def bind(self,cntx):
        null_seg.bind(self,cntx)
        self.next = self.next_self

class root_seg(self_seg):
    def __str__(self): return '/'
    def bind(self,cntx):  
        self_seg.bind(self,cntx.root)

class parent_seg(self_seg):
    def __str__(self): return '..'
    def bind(self,cntx):
        if cntx.parent: cntx = cntx.parent
        self_seg.bind(self,cntx)

class wild_seg(null_seg):
    """
        summary: >
            The wild segment simply loops through
            all of the sub-contexts for a given object.
            If there aren't any children, this isn't an
            error it just doesn't return anything.
    """
    def __str__(self): return '*'
    def next_wild(self):
        key = self.keys.next()
        return context(self.cntx,key,self.values[key])
    def bind(self,cntx):  
        null_seg.bind(self,cntx)
        typ = type(cntx.value)
        if typ is ListType:
            self.keys   = iter(xrange(0,len(cntx.value)))
            self.values = cntx.value
            self.next   = self.next_wild
            return
        if typ is DictType:
            self.keys   = iter(cntx.value)
            self.values = cntx.value
            self.next   = self.next_wild
            return 
        self.next = self.next_null

class trav_seg(null_seg):
    """
        summary: >
            This is a recursive traversal of the range, preorder.
            It is a recursive combination of self and wild.
    """
    def __str__(self): return '/'
    def next(self): 
        while 1:
            (cntx,seg) = self.stk[-1]
            if not seg:
                seg = wild_seg()
                seg.bind(cntx)
                self.stk[-1] = (cntx,seg)
                return cntx
            try:
                cntx = seg.next()
                self.stk.append((cntx,None))
            except StopIteration:
                self.stk.pop()
                if not(self.stk):
                    self.next = self.next_null
                    raise StopIteration

    def bind(self,cntx):
        null_seg.bind(self,cntx)
        self.stk = [(cntx,None)]

class match_seg(self_seg):
    """
        summary: >
            Matches a particular key within the
            current context.  Kinda boring.
    """
    def __str__(self): return str(self.key)
    def __init__(self,key):
        #TODO: Do better implicit typing
        try:
           key = int(key)
        except: pass
        self.key = key
    def bind(self,cntx):
        try: 
            mtch = cntx.value[self.key]
            cntx = context(cntx,self.key,mtch)
            self_seg.bind(self,cntx)
        except:
            null_seg.bind(self,cntx)
        
class conn_seg(null_seg):
    """
        summary: >
            When two segments are connected via a slash,
            this is a composite.  For each context of the
            parent, it binds the child, and returns each
            context of the child.
    """
    def __str__(self): 
        if self.parent.__class__ == root_seg:  
            return "/%s" % self.child
        return "%s/%s" % (self.parent, self.child)
    def __init__(self,parent,child):
        self.parent = parent
        self.child  = child
    def next(self):
        while 1:
            try:
                return self.child.next()
            except StopIteration:
                cntx = self.parent.next()
                self.child.bind(cntx)
 
    def bind(self,cntx):
        null_seg.bind(self,cntx)
        self.parent.bind(cntx)
        try:
            cntx = self.parent.next()
        except StopIteration: 
            return
        self.child.bind(cntx)


class pred_seg(null_seg):
    def __str__(self): return "%s[%s]" % (self.parent, self.filter)
    def __init__(self,parent,filter):
        self.parent = parent
        self.filter = filter
    def next(self):
        while 1:
            ret = self.parent.next()
            if self.filter.exists(ret):
                return ret
    def bind(self,cntx):
        null_seg.bind(self,cntx)
        self.parent.bind(cntx)

class or_seg(null_seg):
    def __str__(self): return "%s|%s" % (self.lhs,self.rhs)
    def __init__(self,lhs,rhs):
        self.rhs = rhs
        self.lhs = lhs
        self.unq = {}
    def next(self):
        seg = self.lhs
        try:
            nxt = seg.next()
            self.unq[nxt] = nxt
            return nxt
        except StopIteration: pass
        seg = self.rhs
        while 1:
            nxt = seg.next()
            if self.unq.get(nxt,None): 
                continue  
            return nxt
    def bind(self,cntx):
        null_seg.bind(self,cntx)
        self.lhs.bind(cntx)
        self.rhs.bind(cntx)

class scalar:
    def __init__(self,val):  
        self.val = val
    def __str__(self): 
        return str(self.val)
    def value(self): 
        return self.val

class equal_pred: 
    def exists_true(self,cntx): return 1
    def exists_false(self,cntx): return 0
    def exists_scalar(self,cntx):
        self.rhs.bind(cntx)
        try:
            while 1:
                cntx = self.rhs.next()
                if str(cntx.value) == self.lhs:  #TODO: Remove type hack
                     return 1
        except StopIteration: pass
        return 0
    def exists_segment(self,cntx):
        raise NotImplementedError()
    def __init__(self,lhs,rhs):
        if lhs.__class__ == scalar:
            if rhs.__class__ == scalar:
                if rhs.value() == lhs.value():
                    self.exists = self.exists_true
                else:
                    self.exists = self.exists_false
            else:
                self.exists = self.exists_scalar
        else:
            if rhs.__class__ == scalar:
                (lhs,rhs) = (rhs,lhs)
                self.exists = self.exists_scalar
            else:
                self.exists = self.exists_segment
        self.lhs = str(lhs.value())  #TODO: Remove type hack
        self.rhs = rhs
 
matchSegment = re.compile(r"""^(\w+|/|\.|\*|\"|\')""")

def parse_segment(expr):
    """
        Segments occur between the slashes...
    """
    mtch = matchSegment.search(expr)
    if not(mtch): return (None,expr)
    tok = mtch.group(); siz = len(tok)
    if   '/' == tok: return (trav_seg(),expr)
    elif '.' == tok: 
        if len(expr) > 1 and '.' == expr[1]:
            seg = parent_seg()
            siz = 2
        else: 
            seg = self_seg()
    elif '*' == tok: seg = wild_seg()
    elif '"' == tok or "'" == tok:
        (cur,siz) = unquote(expr)
        seg = match_seg(cur)
    else:
        seg = match_seg(tok)
    return (seg,expr[siz:])

matchTerm = re.compile(r"""^(\w+|/|\.|\(|\"|\')""")

def parse_term(expr):
    mtch = matchTerm.search(expr)
    if not(mtch): return (None,expr)
    tok = mtch.group(); siz = len(tok)
    if '/' == tok or '.' == tok:
        return parse(expr)
    if '(' == tok:
        (term,expr) = parse_predicate(expr)
        assert ')' == expr[0]
        return (term,expr[1:])
    elif '"' == tok or "'" == tok:
        (val,siz) = unquote(expr)
    else:
        val = tok; siz = len(tok)
    return (scalar(val),expr[siz:])

def parse_predicate(expr):
    (term,expr) = parse_term(expr)
    if not term: raise SyntaxError("term expected: '%s'" % expr)
    tok = expr[0]
    if '=' == tok:
        (rhs,expr) = parse_term(expr[1:])
        return (equal_pred(term,rhs),expr)
    if '(' == tok:
        raise "No functions allowed... yet!"
    if ']' == tok or ')' == tok:
        if term.__class__ is scalar:
            term = match_seg(str(term))
        return (term,expr)
    raise SyntaxError("ypath: expecting operator '%s'" % expr)

def parse_start(expr):
    """
        Initial checking on the expression, and 
        determine if it is relative or absolute.
    """
    if type(expr) != StringType or len(expr) < 1: 
        raise TypeError("string required: " + repr(expr))
    if '/' == expr[0]:
        ypth = root_seg()
    else:
        ypth = self_seg()
        expr = '/' + expr
    return (ypth,expr)

def parse(expr):
    """
        This the parser entry point, the top level node
        is always a root or self segment.  The self isn't
        strictly necessary, but it keeps things simple.
    """
    (ypth,expr) = parse_start(expr)
    while expr:
        tok = expr[0]
        if '/' == tok:
            (child, expr) = parse_segment(expr[1:])    
            if child: ypth = conn_seg(ypth,child)
            continue
        if '[' == tok:
            (filter, expr) = parse_predicate(expr[1:])
            assert ']' == expr[0]
            expr = expr[1:]
            ypth = pred_seg(ypth,filter)
            continue
        if '|' == tok:
            (rhs, expr) = parse(expr[1:])
            ypth = or_seg(ypth,rhs)
            continue
        if '(' == tok:
            (child,expr) = parse(expr[1:])
            assert ')' == expr[0]
            expr = expr[1:]
            ypth = conn_seg(ypth,child)
            continue
        break
    return (ypth,expr)

class convert_to_value(null_seg):
    def __init__(self,itr):
        self.itr = itr
    def next(self):
        return self.itr.next().value
    def bind(self,cntx):
        self.itr.bind(cntx)

def ypath(expr,target=noTarget,cntx=0):
    (ret,expr) = parse(expr)
    if expr: raise SyntaxError("ypath parse error `%s`" % expr)
    if not cntx: ret = convert_to_value(ret)
    if target is noTarget: return ret
    return ret.apply(target)