| #!/usr/bin/env python |
| # |
| # tree.py: tools for comparing directory trees |
| # |
| # Subversion is a tool for revision control. |
| # See http://subversion.tigris.org for more information. |
| # |
| # ==================================================================== |
| # Copyright (c) 2001 Sam Tobin-Hochstadt. All rights reserved. |
| # |
| # This software is licensed as described in the file COPYING, which |
| # you should have received as part of this distribution. The terms |
| # are also available at http://subversion.tigris.org/license-1.html. |
| # If newer versions of this license are posted there, you may use a |
| # newer version instead, at your option. |
| # |
| ###################################################################### |
| |
| # This file was modified by Vladimir Prus to store modification times in tree |
| # nodes. |
| |
| import re |
| import string |
| import os.path |
| import os |
| import stat |
| |
| |
| #======================================================================== |
| |
| # ===> Overview of our Datastructures <=== |
| |
| # The general idea here is that many, many things can be represented by |
| # a tree structure: |
| |
| # - a working copy's structure and contents |
| # - the output of 'svn status' |
| # - the output of 'svn checkout/update' |
| # - the output of 'svn commit' |
| |
| # The idea is that a test function creates a "expected" tree of some |
| # kind, and is then able to compare it to an "actual" tree that comes |
| # from running the Subversion client. This is what makes a test |
| # automated; if an actual and expected tree match exactly, then the test |
| # has passed. (See compare_trees() below.) |
| |
| # The SVNTreeNode class is the fundamental data type used to build tree |
| # structures. The class contains a method for "dropping" a new node |
| # into an ever-growing tree structure. (See also create_from_path()). |
| |
| # We have four parsers in this file for the four use cases listed above: |
| # each parser examines some kind of input and returns a tree of |
| # SVNTreeNode objects. (See build_tree_from_checkout(), |
| # build_tree_from_commit(), build_tree_from_status(), and |
| # build_tree_from_wc()). These trees are the "actual" trees that result |
| # from running the Subversion client. |
| |
| # Also necessary, of course, is a convenient way for a test to create an |
| # "expected" tree. The test *could* manually construct and link a bunch |
| # of SVNTreeNodes, certainly. But instead, all the tests are using the |
| # build_generic_tree() routine instead. |
| |
| # build_generic_tree() takes a specially-formatted list of lists as |
| # input, and returns a tree of SVNTreeNodes. The list of lists has this |
| # structure: |
| |
| # [ ['/full/path/to/item', 'text contents', {prop-hash}, {att-hash}], |
| # [...], |
| # [...], |
| # ... ] |
| |
| # You can see that each item in the list essentially defines an |
| # SVNTreeNode. build_generic_tree() instantiates a SVNTreeNode for each |
| # item, and then drops it into a tree by parsing each item's full path. |
| |
| # So a typical test routine spends most of its time preparing lists of |
| # this format and sending them to build_generic_tree(), rather than |
| # building the "expected" trees directly. |
| |
| # ### Note: in the future, we'd like to remove this extra layer of |
| # ### abstraction. We'd like the SVNTreeNode class to be more |
| # ### directly programmer-friendly, providing a number of accessor |
| # ### routines, so that tests can construct trees directly. |
| |
| # The first three fields of each list-item are self-explanatory. It's |
| # the fourth field, the "attribute" hash, that needs some explanation. |
| # The att-hash is used to place extra information about the node itself, |
| # depending on the parsing context: |
| |
| # - in the 'svn co/up' use-case, each line of output starts with two |
| # characters from the set of (A, D, G, U, C, _). This status code |
| # is stored in a attribute named 'status'. |
| |
| # - in the 'svn ci/im' use-case, each line of output starts with one |
| # of the words (Adding, Deleting, Sending). This verb is stored in |
| # an attribute named 'verb'. |
| |
| # - in the 'svn status' use-case (which is always run with the -v |
| # (--verbose) flag), each line of output contains a working revision |
| # number and a two-letter status code similar to the 'svn co/up' |
| # case. The repository revision is also printed. All of this |
| # information is stored in attributes named 'wc_rev', 'status', and |
| # 'repos_rev', respectively. |
| |
| # - in the working-copy use-case, the att-hash is ignored. |
| |
| |
| # Finally, one last explanation: the file 'actions.py' contain a number |
| # of helper routines named 'run_and_verify_FOO'. These routines take |
| # one or more "expected" trees as input, then run some svn subcommand, |
| # then push the output through an appropriate parser to derive an |
| # "actual" tree. Then it runs compare_trees() and returns the result. |
| # This is why most tests typically end with a call to |
| # run_and_verify_FOO(). |
| |
| |
| |
| |
| # A node in a tree. |
| # |
| # If CHILDREN is None, then the node is a file. Otherwise, CHILDREN |
| # is a list of the nodes making up that directory's children. |
| # |
| # NAME is simply the name of the file or directory. CONTENTS is a |
| # string that contains the file's contents (if a file), PROPS are |
| # properties attached to files or dirs, and ATTS is a dictionary of |
| # other metadata attached to the node. |
| |
| class SVNTreeNode: |
| |
| def __init__(self, name, children=None, contents=None, props={}, atts={}): |
| self.name = name |
| self.mtime = 0 |
| self.children = children |
| self.contents = contents |
| self.props = props |
| self.atts = atts |
| self.path = name |
| |
| # TODO: Check to make sure contents and children are mutually exclusive |
| |
| def add_child(self, newchild): |
| if self.children is None: # if you're a file, |
| self.children = [] # become an empty dir. |
| child_already_exists = 0 |
| for a in self.children: |
| if a.name == newchild.name: |
| child_already_exists = 1 |
| break |
| if child_already_exists == 0: |
| self.children.append(newchild) |
| newchild.path = os.path.join (self.path, newchild.name) |
| |
| # If you already have the node, |
| else: |
| if newchild.children is None: |
| # this is the 'end' of the chain, so copy any content here. |
| a.contents = newchild.contents |
| a.props = newchild.props |
| a.atts = newchild.atts |
| a.path = os.path.join (self.path, newchild.name) |
| else: |
| # try to add dangling children to your matching node |
| for i in newchild.children: |
| a.add_child(i) |
| |
| |
| def pprint(self): |
| print " * Node name: ", self.name |
| print " Path: ", self.path |
| print " Contents: ", self.contents |
| print " Properties:", self.props |
| print " Attributes:", self.atts |
| ### FIXME: I'd like to be able to tell the difference between |
| ### self.children is None (file) and self.children == [] (empty |
| ### diretory), but it seems that most places that construct |
| ### SVNTreeNode objects don't even try to do that. --xbc |
| if self.children is not None: |
| print " Children: ", len(self.children) |
| else: |
| print " Children: is a file." |
| |
| # reserved name of the root of the tree |
| |
| root_node_name = "__SVN_ROOT_NODE" |
| |
| # Exception raised if you screw up in this module. |
| |
| class SVNTreeError(Exception): pass |
| |
| # Exception raised if two trees are unequal |
| |
| class SVNTreeUnequal(Exception): pass |
| |
| # Exception raised if one node is file and other is dir |
| |
| class SVNTypeMismatch(Exception): pass |
| |
| # Exception raised if get_child is passed a file. |
| |
| class SVNTreeIsNotDirectory(Exception): pass |
| |
| |
| # Some attributes 'stack' on each other if the same node is added |
| # twice to a tree. Place all such special cases in here. |
| def attribute_merge(orighash, newhash): |
| "Merge the attributes in NEWHASH into ORIGHASH." |
| |
| if orighash.has_key('verb') and newhash.has_key('verb'): |
| # Special case: if a commit reports a node as "deleted", then |
| # "added", it's a replacment. |
| if orighash['verb'] == "Deleting": |
| if newhash['verb'] == "Adding": |
| orighash['verb'] = "Replacing" |
| |
| # Add future stackable attributes here... |
| |
| return orighash |
| |
| |
| # helper func |
| def add_elements_as_path(top_node, element_list): |
| """Add the elements in ELEMENT_LIST as if they were a single path |
| below TOP_NODE.""" |
| |
| # The idea of this function is to take a list like so: |
| # ['A', 'B', 'C'] and a top node, say 'Z', and generate a tree |
| # like this: |
| # |
| # Z -> A -> B -> C |
| # |
| # where 1 -> 2 means 2 is a child of 1. |
| # |
| |
| prev_node = top_node |
| for i in element_list: |
| new_node = SVNTreeNode(i, None) |
| prev_node.add_child(new_node) |
| prev_node = new_node |
| |
| |
| # Sorting function -- sort 2 nodes by their names. |
| def node_is_greater(a, b): |
| "Sort the names of two nodes." |
| # Interal use only |
| if a.name == b.name: |
| return 0 |
| if a.name > b.name: |
| return 1 |
| else: |
| return -1 |
| |
| |
| # Helper for compare_trees |
| def compare_file_nodes(a, b): |
| """Compare two nodes' names, contents, and properties, ignoring |
| children. Return 0 if the same, 1 otherwise.""" |
| if a.name != b.name: |
| return 1 |
| if a.contents != b.contents: |
| return 1 |
| if a.props != b.props: |
| return 1 |
| if a.atts != b.atts: |
| return 1 |
| |
| |
| # Internal utility used by most build_tree_from_foo() routines. |
| # |
| # (Take the output and .add_child() it to a root node.) |
| |
| def create_from_path(path, contents=None, props={}, atts={}): |
| """Create and return a linked list of treenodes, given a PATH |
| representing a single entry into that tree. CONTENTS and PROPS are |
| optional arguments that will be deposited in the tail node.""" |
| |
| # get a list of all the names in the path |
| # each of these will be a child of the former |
| elements = path.split("/") |
| if len(elements) == 0: |
| raise SVNTreeError |
| |
| root_node = SVNTreeNode(elements[0], None) |
| |
| add_elements_as_path(root_node, elements[1:]) |
| |
| # deposit contents in the very last node. |
| node = root_node |
| while 1: |
| if node.children is None: |
| node.contents = contents |
| node.props = props |
| node.atts = atts |
| break |
| node = node.children[0] |
| |
| return root_node |
| |
| |
| # helper for handle_dir(), which is a helper for build_tree_from_wc() |
| def get_props(path): |
| "Return a hash of props for PATH, using the svn client." |
| |
| # It's not kosher to look inside SVN/ and try to read the internal |
| # property storage format. Instead, we use 'svn proplist'. After |
| # all, this is the only way the user can retrieve them, so we're |
| # respecting the black-box paradigm. |
| |
| props = {} |
| output, errput = main.run_svn(1, "proplist", path, "--verbose") |
| |
| for line in output: |
| name, value = line.split(' : ') |
| name = string.strip(name) |
| value = string.strip(value) |
| props[name] = value |
| |
| return props |
| |
| |
| # helper for handle_dir(), which helps build_tree_from_wc() |
| def get_text(path): |
| "Return a string with the textual contents of a file at PATH." |
| |
| # sanity check |
| if not os.path.isfile(path): |
| return None |
| |
| fp = open(path, 'r') |
| contents = fp.read() |
| fp.close() |
| return contents |
| |
| |
| # main recursive helper for build_tree_from_wc() |
| def handle_dir(path, current_parent, load_props, ignore_svn): |
| |
| # get a list of all the files |
| all_files = os.listdir(path) |
| files = [] |
| dirs = [] |
| |
| # put dirs and files in their own lists, and remove SVN dirs |
| for f in all_files: |
| f = os.path.join(path, f) |
| if (os.path.isdir(f) and os.path.basename(f) != 'SVN'): |
| dirs.append(f) |
| elif os.path.isfile(f): |
| files.append(f) |
| |
| # add each file as a child of CURRENT_PARENT |
| for f in files: |
| fcontents = get_text(f) |
| if load_props: |
| fprops = get_props(f) |
| else: |
| fprops = {} |
| c = SVNTreeNode(os.path.basename(f), None, |
| fcontents, fprops) |
| c.mtime = os.stat(f)[stat.ST_MTIME] |
| current_parent.add_child(c) |
| |
| # for each subdir, create a node, walk its tree, add it as a child |
| for d in dirs: |
| if load_props: |
| dprops = get_props(d) |
| else: |
| dprops = {} |
| new_dir_node = SVNTreeNode(os.path.basename(d), [], None, dprops) |
| handle_dir(d, new_dir_node, load_props, ignore_svn) |
| new_dir_node.mtime = os.stat(f)[stat.ST_MTIME] |
| current_parent.add_child(new_dir_node) |
| |
| def get_child(node, name): |
| """If SVNTreeNode NODE contains a child named NAME, return child; |
| else, return None. If SVNTreeNode is not a directory, raise a |
| SVNTreeIsNotDirectory exception""" |
| if node.children == None: |
| raise SVNTreeIsNotDirectory |
| for n in node.children: |
| if (name == n.name): |
| return n |
| return None |
| |
| |
| # Helper for compare_trees |
| def default_singleton_handler(a, baton): |
| "Printing SVNTreeNode A's name, then raise SVNTreeUnequal." |
| print "Got singleton", a.name |
| a.pprint() |
| raise SVNTreeUnequal |
| |
| |
| ########################################################################### |
| ########################################################################### |
| # EXPORTED ROUTINES ARE BELOW |
| |
| |
| # Main tree comparison routine! |
| |
| def compare_trees(a, b, |
| singleton_handler_a = None, |
| a_baton = None, |
| singleton_handler_b = None, |
| b_baton = None): |
| """Compare SVNTreeNodes A and B, expressing differences using FUNC_A |
| and FUNC_B. FUNC_A and FUNC_B are functions of two arguments (a |
| SVNTreeNode and a context baton), and may raise exception |
| SVNTreeUnequal. Their return value is ignored. |
| |
| If A and B are both files, then return 0 if their contents, |
| properties, and names are all the same; else raise a SVNTreeUnequal. |
| If A is a file and B is a directory, raise a SVNTypeMismatch; same |
| vice-versa. If both are directories, then for each entry that |
| exists in both, call compare_trees on the two entries; otherwise, if |
| the entry exists only in A, invoke FUNC_A on it, and likewise for |
| B with FUNC_B.""" |
| |
| def display_nodes(a, b): |
| 'Display two nodes, expected and actual.' |
| print "=============================================================" |
| print "Expected", b.name, "and actual", a.name, "are different!" |
| print "=============================================================" |
| print "EXPECTED NODE TO BE:" |
| print "=============================================================" |
| b.pprint() |
| print "=============================================================" |
| print "ACTUAL NODE FOUND:" |
| print "=============================================================" |
| a.pprint() |
| |
| # Setup singleton handlers |
| if (singleton_handler_a is None): |
| singleton_handler_a = default_singleton_handler |
| if (singleton_handler_b is None): |
| singleton_handler_b = default_singleton_handler |
| |
| try: |
| # A and B are both files. |
| if ((a.children is None) and (b.children is None)): |
| if compare_file_nodes(a, b): |
| display_nodes(a, b) |
| raise main.SVNTreeUnequal |
| # One is a file, one is a directory. |
| elif (((a.children is None) and (b.children is not None)) |
| or ((a.children is not None) and (b.children is None))): |
| display_nodes(a, b) |
| raise main.SVNTypeMismatch |
| # They're both directories. |
| else: |
| # First, compare the directories' two hashes. |
| if (a.props != b.props) or (a.atts != b.atts): |
| display_nodes(a, b) |
| raise main.SVNTreeUnequal |
| |
| accounted_for = [] |
| # For each child of A, check and see if it's in B. If so, run |
| # compare_trees on the two children and add b's child to |
| # accounted_for. If not, run FUNC_A on the child. Next, for each |
| # child of B, check and see if it's in accounted_for. If it is, |
| # do nothing. If not, run FUNC_B on it. |
| for a_child in a.children: |
| b_child = get_child(b, a_child.name) |
| if b_child: |
| accounted_for.append(b_child) |
| compare_trees(a_child, b_child, |
| singleton_handler_a, a_baton, |
| singleton_handler_b, b_baton) |
| else: |
| singleton_handler_a(a_child, a_baton) |
| for b_child in b.children: |
| if (b_child not in accounted_for): |
| singleton_handler_b(b_child, b_baton) |
| return 0 |
| except SVNTypeMismatch: |
| print 'Unequal Types: one Node is a file, the other is a directory' |
| raise SVNTreeUnequal |
| except SVNTreeIsNotDirectory: |
| print "Error: Foolish call to get_child." |
| sys.exit(1) |
| except IndexError: |
| print "Error: unequal number of children" |
| raise SVNTreeUnequal |
| except SVNTreeUnequal: |
| if a.name == root_node_name: |
| return 1 |
| else: |
| print "Unequal at node %s" % a.name |
| raise SVNTreeUnequal |
| return 0 |
| |
| |
| |
| |
| # Visually show a tree's structure |
| |
| def dump_tree(n,indent=""): |
| "Print out a nice representation of the tree's structure." |
| |
| # Code partially stolen from Dave Beazley. |
| if n.children is None: |
| tmp_children = [] |
| else: |
| tmp_children = n.children |
| |
| if n.name == root_node_name: |
| print "%s%s" % (indent, "ROOT") |
| else: |
| print "%s%s" % (indent, n.name) |
| |
| indent = indent.replace("-", " ") |
| indent = indent.replace("+", " ") |
| for i in range(len(tmp_children)): |
| c = tmp_children[i] |
| if i == len(tmp_children) - 1: |
| dump_tree(c,indent + " +-- ") |
| else: |
| dump_tree(c,indent + " |-- ") |
| |
| |
| ################################################################### |
| ################################################################### |
| # PARSERS that return trees made of SVNTreeNodes.... |
| |
| |
| ################################################################### |
| # Build an "expected" static tree from a list of lists |
| |
| |
| # Create a list of lists, of the form: |
| # |
| # [ [path, contents, props, atts], ... ] |
| # |
| # and run it through this parser. PATH is a string, a path to the |
| # object. CONTENTS is either a string or None, and PROPS and ATTS are |
| # populated dictionaries or {}. Each CONTENTS/PROPS/ATTS will be |
| # attached to the basename-node of the associated PATH. |
| |
| def build_generic_tree(nodelist): |
| "Given a list of lists of a specific format, return a tree." |
| |
| root = SVNTreeNode(root_node_name) |
| |
| for list in nodelist: |
| new_branch = create_from_path(list[0], list[1], list[2], list[3]) |
| root.add_child(new_branch) |
| |
| return root |
| |
| |
| #################################################################### |
| # Build trees from different kinds of subcommand output. |
| |
| |
| # Parse co/up output into a tree. |
| # |
| # Tree nodes will contain no contents, and only one 'status' att. |
| |
| def build_tree_from_checkout(lines): |
| "Return a tree derived by parsing the output LINES from 'co' or 'up'." |
| |
| root = SVNTreeNode(root_node_name) |
| rm = re.compile ('^([MAGCUD_ ][MAGCUD_ ]) (.+)') |
| |
| for line in lines: |
| match = rm.search(line) |
| if match and match.groups(): |
| new_branch = create_from_path(match.group(2), None, {}, |
| {'status' : match.group(1)}) |
| root.add_child(new_branch) |
| |
| return root |
| |
| |
| # Parse ci/im output into a tree. |
| # |
| # Tree nodes will contain no contents, and only one 'verb' att. |
| |
| def build_tree_from_commit(lines): |
| "Return a tree derived by parsing the output LINES from 'ci' or 'im'." |
| |
| # Lines typically have a verb followed by whitespace then a path. |
| root = SVNTreeNode(root_node_name) |
| rm1 = re.compile ('^(\w+)\s+(.+)') |
| rm2 = re.compile ('^Transmitting') |
| |
| for line in lines: |
| match = rm2.search(line) |
| if not match: |
| match = rm1.search(line) |
| if match and match.groups(): |
| new_branch = create_from_path(match.group(2), None, {}, |
| {'verb' : match.group(1)}) |
| root.add_child(new_branch) |
| |
| return root |
| |
| |
| # Parse status output into a tree. |
| # |
| # Tree nodes will contain no contents, and these atts: |
| # |
| # 'status', 'wc_rev', 'repos_rev' |
| # ... and possibly 'locked', 'copied', IFF columns non-empty. |
| # |
| |
| def build_tree_from_status(lines): |
| "Return a tree derived by parsing the output LINES from 'st'." |
| |
| root = SVNTreeNode(root_node_name) |
| rm = re.compile ('^.+\:.+(\d+)') |
| lastline = string.strip(lines.pop()) |
| match = rm.search(lastline) |
| if match and match.groups(): |
| repos_rev = match.group(1) |
| else: |
| repos_rev = '?' |
| |
| # Try http://www.wordsmith.org/anagram/anagram.cgi?anagram=ACDRMGU |
| rm = re.compile ('^([MACDRUG_ ][MACDRUG_ ])(.)(.) . [^0-9-]+(\d+|-)(.{23})(.+)') |
| for line in lines: |
| match = rm.search(line) |
| if match and match.groups(): |
| if match.group(5) != '-': # ignore items that only exist on repos |
| atthash = {'status' : match.group(1), |
| 'wc_rev' : match.group(4), |
| 'repos_rev' : repos_rev} |
| if match.group(2) != ' ': |
| atthash['locked'] = match.group(2) |
| if match.group(3) != ' ': |
| atthash['copied'] = match.group(3) |
| new_branch = create_from_path(match.group(6), None, {}, atthash) |
| |
| root.add_child(new_branch) |
| |
| return root |
| |
| |
| #################################################################### |
| # Build trees by looking at the working copy |
| |
| |
| # The reason the 'load_props' flag is off by default is because it |
| # creates a drastic slowdown -- we spawn a new 'svn proplist' |
| # process for every file and dir in the working copy! |
| |
| |
| def build_tree_from_wc(wc_path, load_props=0, ignore_svn=1): |
| """Takes WC_PATH as the path to a working copy. Walks the tree below |
| that path, and creates the tree based on the actual found |
| files. If IGNORE_SVN is true, then exclude SVN dirs from the tree. |
| If LOAD_PROPS is true, the props will be added to the tree.""" |
| |
| root = SVNTreeNode(root_node_name, None) |
| |
| # if necessary, store the root dir's props in the root node. |
| if load_props: |
| root.props = get_props(wc_path) |
| |
| # Walk the tree recursively |
| handle_dir(os.path.normpath(wc_path), root, load_props, ignore_svn) |
| |
| return root |
| |
| ### End of file. |
| # local variables: |
| # eval: (load-file "../../../../../tools/dev/svn-dev.el") |
| # end: |