Commits

JanKanis  committed dd012ad Draft

lots of changes, implement a new path resolution algorithm

  • Participants
  • Parent commits 054b003
  • Branches pathwatcher

Comments (0)

Files changed (3)

File inotify/newwatcher.py

 import array
 import errno
 import fcntl
-import os
+import os, sys
 from os import path
 import termios
 from collections import namedtuple, defaultdict
+from pathlib import PosixPath
 
 
 # Inotify flags that can be specified on a watch and can be returned in an event
         self._reread_required = None
 
     def add(self, pth, mask):
+        pth = PosixPath(pth)
         if pth in self._paths:
             self._paths[pth].update_mask(mask)
             return
         self._paths[pth] = _Watch(self, pth, mask)
 
     def _createwatch(self, pth, name, mask, callback):
-        wd = inotify.add_watch(self.fd, pth, mask | inotify.IN_MASK_ADD)
+        wd = inotify.add_watch(self.fd, str(pth), mask | inotify.IN_MASK_ADD)
         if not wd in self._watchdescriptors:
             self._watchdescriptors[wd] = _Descriptor(self, wd)
         desc = self._watchdescriptors[wd]
-        desc.add_callback(pth, mask, name, callback)
+        desc.add_callback(mask, name, callback)
         return desc
 
     def _removewatch(self, descriptor):
 
 
 class _Watch (object):
+    root = PosixPath('/')
+    cwd = PosixPath('.')
+    parentdir = PosixPath('..')
+    
     def __init__(self, watcher, pth, mask):
         self.watcher = watcher
-        self.path = self._normpath(pth)
-        self.cwd = os.getcwd()
+        self.path = PosixPath(pth)
+        self.cwd = PosixPath.cwd()
         self.mask = mask
         self.links = []
+        self.complete_watch = False
         # self.inode = None
-        self.add(pth)
+        self.reconnect()
 
-    def _normpath(self, pth):
-        split = [p for p in pth.split(path.sep) if p not in ('', '.')]
-        if pth.startswith('/'):
-            split.insert(0, '/')
-        return split
 
-    def _nonrel(self, pth):
-        '''Return the path joined with the working directory at the time this watch was
-        created.
-        '''
-        return path.join(self.cwd, pth)
 
-    def add(self, pth):
-        # Register symlinks in a non-racy way
+
+            
+        
+
+    @staticmethod
+    def paths(path):
+        # empty path and the current dir is represented the same in pathlib
+        none = _Watch.cwd
+
+        if path.is_absolute():
+            dir = _Watch.root
+            path = path.relative()
+        else:
+            dir = _Watch.cwd
+        name = path.parts[0:1]
+        rest = path.parts[1:]
+
+        yield (dir, name, rest, 'path')
+
+        while name != none:
+            dir, name, rest, *type = _Watch.nextpath(dir, name, rest)
+            if name == _Watch.parentdir:
+                dir = dir.parent()
+                name = rest.parts[0:1]
+                rest = rest.parts[1:]
+            if name == none:
+                type = 'target'
+            yield (dir, name, rest) + tuple(type)
+        
+
+    @staticmethod
+    def nextpath(dir, name, rest):
+        # Test if it is a symlink
+        try:
+            link = os.readlink(str(dir[name]))
+        except OSError as e:
+            if e.errno == os.errno.EINVAL:
+                # The entry is not a symbolic link, assume it is a normal file
+                # or directory
+                return (dir[name], rest.parts[0:1], rest.parts[1:], 'path')
+            if e.errno == os.errno.ENOENT:
+                # The entry does not exist, or the path is not valid
+                return (dir, name, rest, 'error', 'ENOENT')
+            if e.errno == os.errno.ENOTDIR:
+                # A directory along the path changed, path is no longer
+                # valid. We should have received an event about this so abort
+                # now and re-establish when we receive the event.
+                return (dir, name, rest, 'error', 'ENOTDIR')
+            raise
+        else:
+            # it is a link
+            rest = PosixPath(link)[rest]
+            if rest.is_absolute():
+                dir = _Watch.root
+                rest = rest.relative()
+            # else dir remains the current dir
+            return (dir, rest.parts[0:1], rest.parts[1:], 'symlink')
+        
+        assert False
+
+
+    def reconnect(self):
+        # seen_links = set()
+        
+        # Register symlinks and path elements in a non-racy way
+        pth = self.path
         linkdepth = 0
         while True:
             try:
-                link = os.readlink(pth)
+                link = os.readlink(str(pth))
             except OSError as e:
                 if e.errno == os.errno.EINVAL:
                     # The entry is not a symbolic link
                     return
                 raise
             self.add_symlink(pth)
-            pth = path.join(path.dirname(pth), link)
+            pth = pth.parent()[link]
             linkdepth += 1
 
         self.add_leaf(pth)
     def add_leaf(self, pth):
         mask = self.mask | inotify.IN_MOVE_SELF | inotify.IN_DELETE_SELF
         self.links.append(_Link(len(self.links), 'leaf', self, mask, pth, None))
+        self.complete_watch = True
         # st = os.stat(pth)
         # self.inode = (st.st_dev, st.st_ino)
 
     def add_symlink(self, pth):
-        pth, name = path.split(pth)
-        if not pth:
-            pth = '.'
+        name = pth.parts[-1:]
+        pth = pth.parent()
         mask = inotify.IN_MOVE | inotify.IN_DELETE | inotify.IN_CREATE | inotify.IN_ONLYDIR
         self.links.append(_Link(len(self.links), 'symlink', self, mask, pth, name))
         
-    def handle_event(self, event, pth):
-        if pth.idx == len(self.links) - 1:
+    def handle_event(self, event, link):
+        if self.complete_watch and link.idx == len(self.links) - 1:
             assert event.mask & self.mask
-            yield Event(event, path.join(*self.path))
+            yield Event(event, str(self.path))
         else:
-            for p in self.links[pth.idx:]:
+            for p in self.links[link.idx:]:
                 p.remove()
-            del self.links[pth.idx:]
-            yield Event(mediumevent(mask=inotify.IN_LINK_CHANGED, cookie=0, name=None, wd=event.wd), path.join(*self.path)), False
+            del self.links[link.idx:]
+            self.complete_watch = False
+            yield Event(mediumevent(mask=inotify.IN_LINK_CHANGED, cookie=0, name=None, wd=event.wd), str(self.path))
+
+    def __str__(self):
+        return '<_Watch for {}>'.format(str(self.path))
              
 
 mediumevent = namedtuple('mediumevent', 'mask cookie name wd')
 
     def remove(self):
         self.wd.remove_callback(self.name, self.handle_event)
+
+    def _fullname(self):
+        if self.name:
+            return str(self.path[self.name])
+        return str(self.path)
+
+    def __str__(self):
+        return '<_Link for {}>'.format(self._fullname())
     
 
 class _Descriptor (object):
         # callbacks is indexed by name to improve speed and because we
         # can. Indexing by name and mask would be faster but would be more
         # cumbersome to implement.
-        self.callbacks = defaultdict(list)
+        self.callbacks = {}
 
-    def add_callback(self, pth, mask, name, callback):
+    def add_callback(self, mask, name, callback):
         # If the callback is to a path link element, mask will include
         # IN_ONLYDIR so we could remove that here. However the IN_ONLYDIR flag
         # can not be returned by inotify events so keeping it in does no harm.
         self.mask |= mask
-        self.callbacks[name].append((mask, callback))
+        self.callbacks.setdefault(name, []).append((mask, callback))
 
     def remove_callback(self, name, callback):
         idx = [c == callback for m,c in self.callbacks[name]].index(True)
             self.watcher._signal_empty_descriptor(self)
 
     def handle_event(self, event):
-        for m, c in self.callbacks[event.name]:
+        name = PosixPath(event.name) if not event.name is None else None
+        for m, c in self.callbacks.get(name, ()):
             if event.mask & m:
                 yield from c(event)
         if event.mask & inotify.IN_IGNORED:
+            assert not self.callbacks
             self.watcher._removewatch(self)
       
+    def __str__(self):
+        names = ', '.join(c.__self__._fullname() for c in l for l in self.callbacks.values())
+        return '<_Descriptor for wd {}: {}>'.format(self.wd, ', '.join(names))
+
+
+class InvalidPathException (Exception):
+    pass
+
+class NoEntryException (InvalidPathException):
+    def __init__(self, pth, *args):
+        msg = "Path not valid: '{}' does not exist".format(pth)
+        InvalidPathException.__init__(self, msg, *args)
+
+class NotDirectoryException (InvalidPathException):
+    def __init__(self, pth, *args):
+        msg = "Path not valid: '{}' is not a directory".format(pth)
+        InvalidPathException.__init__(self, msg, *args)
+
+class ConcurrentFilesystemModificationException (InvalidPathException):
+    def __init__(self, pth, *args):
+        msg = "Path not valid: A concurrent change was detected while traversing '{}'".format(pth)
+        InvalidPathException.__init__(self, msg, *args)
+
+class SymlinkLoopException (InvalidPathException):
+    def __init__(self, pth, *args):
+        msg = ("Path not valid: The symlink at '{}' forms a symlink loop".format(pth)
+        InvalidPathException.__init__(self, msg, *args)

File inotify/pathresolver.py

+# pathresolver.py - This module contains an iterator that iterates over all
+# elements of a path including any symlinks. 
+
+# Copyright 2012-2013 Jan Kanis <jan.code@jankanis.nl>
+
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+
+__author__ = "Jan Kanis <jan.code@jankanis.nl>"
+
+
+import os
+from pathlib import PosixPath
+
+
+_curdir = PosixPath('.')
+_root = PosixPath('/')
+_parentdir = PosixPath('..')
+
+
+def resolve_path(path):
+    '''Resolve the symlinks in path, yielding all filesystem locations that are traversed.
+
+    The yielded value is a tuple, of which the first element is a symlink-free
+    path, and the second element is a path relative to the first element that
+    has not yet been traversed. This second element may contain more symlinks.
+    
+    The resolution implementation will follow an unbounded number of symlinks
+    but will still detect symlink loops if they prevent a path from resolving.
+
+    path can be given as a string or as a pathlib object. The yielded values
+    are pathlib.PosixPath objects.
+
+    '''
+    linkcache = {}
+    linkcounter = [0]
+    yield from resolve_symlink(_curdir, PosixPath(path), set(),
+                                  linkcache, linkcounter)
+
+
+def resolve_symlink(location, link_contents, active_links, known_links, linkcounter):
+    '''Recursively resolve a symlink to the file or directory it ultimately points
+    to. This function handles an unlimited number of symlinks, and
+    correctly detects symlink loops. All path parameters should be given as
+    pathlib.PosixPath instances.
+
+    location: The directory in which the currently to be resolved link resides.
+
+    link_contents: The path stored in the symlink as returned by readlink().
+
+    active_links: a set of symlinks that is currently being resolved.
+
+    linkcache: a dictionary of link location -> resolved target paths. This
+    cache prevents this function from having to resolve the same symlink
+    twice. (Note that having to traverse the same symlink multiple times
+    does not necessarily mean that the path does not resolve to anything.)
+
+    linkcounter: A list containing a single number. (We use a list so that the
+    value can be passed by reference.) This number is updated to indicate the
+    total number of symlinks that has been traversed.
+
+    '''
+
+    while True:
+        if link_contents.is_absolute():
+            location = _root
+            link_contents = link_contents.relative()
+
+        yield location, link_contents
+        if link_contents == _curdir:
+            return
+
+        if link_contents.parts[0:1] == _parentdir:
+            # We need to choose here if we allow traversing of a path above
+            # the root or above the current directory. Going above CWD
+            # should be allowed as long as we don't go above / by doing
+            # so. The OS allows going to /.. (which just ends up at /
+            # again), so for consistency with that we also allow it,
+            # although a path that requires us to do this is probably a bug
+            # somewhere.
+            if not all(p in ('/', '..') for p in location.parts):
+                location = location.parent()
+            else:
+                location = location['..']
+            # Strip the first part of link_contents off
+            link_contents = link_contents.parts[1:]
+            continue
+
+        try:
+            nextpath = location[link_contents.parts[0]]
+            newlink = PosixPath(os.readlink(str(nextpath)))
+        except OSError as e:
+            if e.errno == os.errno.EINVAL:
+                # The entry is not a symbolic link, assume it is a normal file
+                # or directory
+                location = nextpath
+                link_contents = link_contents.parts[1:]
+                continue
+            if e.errno == os.errno.ENOENT:
+                # The entry does not exist
+                raise NoEntryException(nextpath)
+            if e.errno == os.errno.ENOTDIR:
+                if not location.is_dir():
+                    raise NotDirectoryException(location)
+                # We should not be able to get here, unless there is a bug
+                # or some relevant part of the file system was changed
+                # concurrently while we were resolving this link.
+                raise ConcurrentFilesystemModificationException(nextpath)
+
+        # It is a symlink!
+        if nextpath in active_links:
+            raise SymlinkLoopException(nextpath)
+        # We have not yet attempted traversing this symlink during the
+        # current call or any of its parents.
+        if nextpath in known_links:
+            location = known_links[nextpath]
+            link_contents = link_contents.parts[1:]
+            continue
+        
+        # An unknown link, resolve it recursively
+        linkcounter[0] += 1
+        # Don't yield the very last result of this recursive call immediately,
+        # we still want to process that further. 
+        lastloc, lastlink = None, None
+        for loc, link in resolve_symlink(location, newlink,
+                          active_links.union((nextpath,)), known_links, linkcounter):
+            if lastloc:
+                yield lastloc, lastlink
+            lastloc, lastlink = loc, link
+        # The last yielded location is the final resolution of the symlink. The
+        # last yielded link_contents is always '.' so we can ignore that.
+        known_links[nextpath] = loc
+        location = loc
+        link_contents = link_contents.parts[1:]
+        continue
+
+
+class InvalidPathException (Exception):
+    pass
+
+class NoEntryException (InvalidPathException):
+    def __init__(self, pth, *args):
+        msg = "Path not valid: '{}' does not exist".format(pth)
+        InvalidPathException.__init__(self, msg, *args)
+
+class NotDirectoryException (InvalidPathException):
+    def __init__(self, pth, *args):
+        msg = "Path not valid: '{}' is not a directory".format(pth)
+        InvalidPathException.__init__(self, msg, *args)
+
+class ConcurrentFilesystemModificationException (InvalidPathException):
+    def __init__(self, pth, *args):
+        msg = "Path not valid: A concurrent change was detected while traversing '{}'".format(pth)
+        InvalidPathException.__init__(self, msg, *args)
+
+class SymlinkLoopException (InvalidPathException):
+    def __init__(self, pth, *args):
+        msg = "Path not valid: The symlink at '{}' forms a symlink loop".format(pth)
+        InvalidPathException.__init__(self, msg, *args)

File test/newtest.py

 
 # from __future__ import print_function
 
-import sys, os, shutil, tempfile, inspect
+import sys, os, shutil, tempfile, itertools
 import pytest
+from pathlib import PosixPath as P
 
 if not sys.platform.startswith('linux'): raise Exception("This module will only work on Linux")
 
-# find the build dir
+# Find the package to test. We first try an inotify in the current directory,
+# then try to find one in the build directory of this package, and else we
+# import from the default path.
 un = os.uname()
 ver = '.'.join(str(x) for x in sys.version_info[:2])
 testdir = os.path.dirname(os.path.abspath(__file__))
 def test_open(w):
   mask = inotify.IN_OPEN | inotify.IN_CLOSE
   w.add('testfile', mask)
-  watch = w._paths['testfile']
+  watch = w._paths[P('testfile')]
 
   assert len(watch.links) == 1
-  assert watch.path == ['testfile']
+  assert watch.path == P('testfile')
   assert watch.watcher == w
   st = os.stat('testfile')
   # assert watch.inode == (st.st_dev, st.st_ino)
   assert watch.mask == mask
   link = watch.links[0]
   assert link.idx == 0
-  assert link.path == 'testfile'
+  assert link.path == P('testfile')
   linkmask = mask | inotify.IN_MOVE_SELF | inotify.IN_DELETE_SELF
   assert link.mask == linkmask
   assert link.watch == watch
   assert wd.watcher == w
   watchdesc = wd.wd
   assert w._watchdescriptors[watchdesc] == wd
-  assert w._paths['testfile'] == watch
+  assert w._paths[P('testfile')] == watch
   
   open('testfile').close()
   ev1, ev2 = w.read(block=False)
   os.symlink('link3', 'link2')
   os.symlink('link2', 'link1')
   w.add('link1', inotify.IN_OPEN)
-  watch = w._paths['link1']
+  watch = w._paths[P('link1')]
   assert len(watch.links) == 4
   w1, w2, w3, wt  = watch.links
-  assert [w.path+'/'+w.name for w in (w1, w2, w3)] == './link1 ./link2 ./link3'.split()
-  assert (wt.path, wt.name) == ('testfile', None)
+  assert [str(w.path[w.name]) for w in (w1, w2, w3)] == 'link1 link2 link3'.split()
+  assert (wt.path, wt.name) == (P('testfile'), None)
   assert w1.wd == w2.wd == w3.wd
   desc = w1.wd
   linkmask = inotify.IN_MOVE | inotify.IN_DELETE | inotify.IN_CREATE | inotify.IN_ONLYDIR
-  assert desc.callbacks['link1'] == [(linkmask, w1.handle_event)]
-  assert desc.callbacks['link2'] == [(linkmask, w2.handle_event)]
-  assert desc.callbacks['link3'] == [(linkmask, w3.handle_event)]
+  assert desc.callbacks[P('link1')] == [(linkmask, w1.handle_event)]
+  assert desc.callbacks[P('link2')] == [(linkmask, w2.handle_event)]
+  assert desc.callbacks[P('link3')] == [(linkmask, w3.handle_event)]
+
+  os.rename('link2', 'link2new')
+  e = w.read()
+  assert len(e) == 1
+  e1 = e[0]
+  assert e1.link_changed
+  assert len(w._watchdescriptors) == 1
+  assert len(watch.links) == 1
+  assert len(list(itertools.chain(*watch.links[0].wd.callbacks.values()))) == 1
 
   os.rename('link1', 'link1new')
-  e1, = w.read()
-  assert not w._watchdescriptors
+  e = w.read()
+  assert len(e) == 1
+  e1 = e[0]
+  assert e1.link_changed
+  assert len(w._watchdescriptors) == 0
+  assert len(watch.links) == 0
+
   # ipythonembed()
 
 # def test_move(w):