blob: 74bceb7f57bec715d952a1fec34871ab7a0874bb [file] [log] [blame]
#!/usr/bin/env python
#
# tree.py: tools for comparing directory trees
#
# Subversion is a tool for revision control.
# See http://subversion.tigris.org for more information.
#
# ====================================================================
# Copyright (c) 2001 Sam Tobin-Hochstadt. All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://subversion.tigris.org/license-1.html.
# If newer versions of this license are posted there, you may use a
# newer version instead, at your option.
#
######################################################################
# This file was modified by Vladimir Prus to store modification times in tree
# nodes.
import re
import string
import os.path
import os
import stat
#========================================================================
# ===> Overview of our Datastructures <===
# The general idea here is that many, many things can be represented by
# a tree structure:
# - a working copy's structure and contents
# - the output of 'svn status'
# - the output of 'svn checkout/update'
# - the output of 'svn commit'
# The idea is that a test function creates a "expected" tree of some
# kind, and is then able to compare it to an "actual" tree that comes
# from running the Subversion client. This is what makes a test
# automated; if an actual and expected tree match exactly, then the test
# has passed. (See compare_trees() below.)
# The SVNTreeNode class is the fundamental data type used to build tree
# structures. The class contains a method for "dropping" a new node
# into an ever-growing tree structure. (See also create_from_path()).
# We have four parsers in this file for the four use cases listed above:
# each parser examines some kind of input and returns a tree of
# SVNTreeNode objects. (See build_tree_from_checkout(),
# build_tree_from_commit(), build_tree_from_status(), and
# build_tree_from_wc()). These trees are the "actual" trees that result
# from running the Subversion client.
# Also necessary, of course, is a convenient way for a test to create an
# "expected" tree. The test *could* manually construct and link a bunch
# of SVNTreeNodes, certainly. But instead, all the tests are using the
# build_generic_tree() routine instead.
# build_generic_tree() takes a specially-formatted list of lists as
# input, and returns a tree of SVNTreeNodes. The list of lists has this
# structure:
# [ ['/full/path/to/item', 'text contents', {prop-hash}, {att-hash}],
# [...],
# [...],
# ... ]
# You can see that each item in the list essentially defines an
# SVNTreeNode. build_generic_tree() instantiates a SVNTreeNode for each
# item, and then drops it into a tree by parsing each item's full path.
# So a typical test routine spends most of its time preparing lists of
# this format and sending them to build_generic_tree(), rather than
# building the "expected" trees directly.
# ### Note: in the future, we'd like to remove this extra layer of
# ### abstraction. We'd like the SVNTreeNode class to be more
# ### directly programmer-friendly, providing a number of accessor
# ### routines, so that tests can construct trees directly.
# The first three fields of each list-item are self-explanatory. It's
# the fourth field, the "attribute" hash, that needs some explanation.
# The att-hash is used to place extra information about the node itself,
# depending on the parsing context:
# - in the 'svn co/up' use-case, each line of output starts with two
# characters from the set of (A, D, G, U, C, _). This status code
# is stored in a attribute named 'status'.
# - in the 'svn ci/im' use-case, each line of output starts with one
# of the words (Adding, Deleting, Sending). This verb is stored in
# an attribute named 'verb'.
# - in the 'svn status' use-case (which is always run with the -v
# (--verbose) flag), each line of output contains a working revision
# number and a two-letter status code similar to the 'svn co/up'
# case. The repository revision is also printed. All of this
# information is stored in attributes named 'wc_rev', 'status', and
# 'repos_rev', respectively.
# - in the working-copy use-case, the att-hash is ignored.
# Finally, one last explanation: the file 'actions.py' contain a number
# of helper routines named 'run_and_verify_FOO'. These routines take
# one or more "expected" trees as input, then run some svn subcommand,
# then push the output through an appropriate parser to derive an
# "actual" tree. Then it runs compare_trees() and returns the result.
# This is why most tests typically end with a call to
# run_and_verify_FOO().
# A node in a tree.
#
# If CHILDREN is None, then the node is a file. Otherwise, CHILDREN
# is a list of the nodes making up that directory's children.
#
# NAME is simply the name of the file or directory. CONTENTS is a
# string that contains the file's contents (if a file), PROPS are
# properties attached to files or dirs, and ATTS is a dictionary of
# other metadata attached to the node.
class SVNTreeNode:
def __init__(self, name, children=None, contents=None, props={}, atts={}):
self.name = name
self.mtime = 0
self.children = children
self.contents = contents
self.props = props
self.atts = atts
self.path = name
# TODO: Check to make sure contents and children are mutually exclusive
def add_child(self, newchild):
if self.children is None: # if you're a file,
self.children = [] # become an empty dir.
child_already_exists = 0
for a in self.children:
if a.name == newchild.name:
child_already_exists = 1
break
if child_already_exists == 0:
self.children.append(newchild)
newchild.path = os.path.join (self.path, newchild.name)
# If you already have the node,
else:
if newchild.children is None:
# this is the 'end' of the chain, so copy any content here.
a.contents = newchild.contents
a.props = newchild.props
a.atts = newchild.atts
a.path = os.path.join (self.path, newchild.name)
else:
# try to add dangling children to your matching node
for i in newchild.children:
a.add_child(i)
def pprint(self):
print " * Node name: ", self.name
print " Path: ", self.path
print " Contents: ", self.contents
print " Properties:", self.props
print " Attributes:", self.atts
### FIXME: I'd like to be able to tell the difference between
### self.children is None (file) and self.children == [] (empty
### diretory), but it seems that most places that construct
### SVNTreeNode objects don't even try to do that. --xbc
if self.children is not None:
print " Children: ", len(self.children)
else:
print " Children: is a file."
# reserved name of the root of the tree
root_node_name = "__SVN_ROOT_NODE"
# Exception raised if you screw up in this module.
class SVNTreeError(Exception): pass
# Exception raised if two trees are unequal
class SVNTreeUnequal(Exception): pass
# Exception raised if one node is file and other is dir
class SVNTypeMismatch(Exception): pass
# Exception raised if get_child is passed a file.
class SVNTreeIsNotDirectory(Exception): pass
# Some attributes 'stack' on each other if the same node is added
# twice to a tree. Place all such special cases in here.
def attribute_merge(orighash, newhash):
"Merge the attributes in NEWHASH into ORIGHASH."
if orighash.has_key('verb') and newhash.has_key('verb'):
# Special case: if a commit reports a node as "deleted", then
# "added", it's a replacment.
if orighash['verb'] == "Deleting":
if newhash['verb'] == "Adding":
orighash['verb'] = "Replacing"
# Add future stackable attributes here...
return orighash
# helper func
def add_elements_as_path(top_node, element_list):
"""Add the elements in ELEMENT_LIST as if they were a single path
below TOP_NODE."""
# The idea of this function is to take a list like so:
# ['A', 'B', 'C'] and a top node, say 'Z', and generate a tree
# like this:
#
# Z -> A -> B -> C
#
# where 1 -> 2 means 2 is a child of 1.
#
prev_node = top_node
for i in element_list:
new_node = SVNTreeNode(i, None)
prev_node.add_child(new_node)
prev_node = new_node
# Sorting function -- sort 2 nodes by their names.
def node_is_greater(a, b):
"Sort the names of two nodes."
# Interal use only
if a.name == b.name:
return 0
if a.name > b.name:
return 1
else:
return -1
# Helper for compare_trees
def compare_file_nodes(a, b):
"""Compare two nodes' names, contents, and properties, ignoring
children. Return 0 if the same, 1 otherwise."""
if a.name != b.name:
return 1
if a.contents != b.contents:
return 1
if a.props != b.props:
return 1
if a.atts != b.atts:
return 1
# Internal utility used by most build_tree_from_foo() routines.
#
# (Take the output and .add_child() it to a root node.)
def create_from_path(path, contents=None, props={}, atts={}):
"""Create and return a linked list of treenodes, given a PATH
representing a single entry into that tree. CONTENTS and PROPS are
optional arguments that will be deposited in the tail node."""
# get a list of all the names in the path
# each of these will be a child of the former
elements = path.split("/")
if len(elements) == 0:
raise SVNTreeError
root_node = SVNTreeNode(elements[0], None)
add_elements_as_path(root_node, elements[1:])
# deposit contents in the very last node.
node = root_node
while 1:
if node.children is None:
node.contents = contents
node.props = props
node.atts = atts
break
node = node.children[0]
return root_node
# helper for handle_dir(), which is a helper for build_tree_from_wc()
def get_props(path):
"Return a hash of props for PATH, using the svn client."
# It's not kosher to look inside SVN/ and try to read the internal
# property storage format. Instead, we use 'svn proplist'. After
# all, this is the only way the user can retrieve them, so we're
# respecting the black-box paradigm.
props = {}
output, errput = main.run_svn(1, "proplist", path, "--verbose")
for line in output:
name, value = line.split(' : ')
name = string.strip(name)
value = string.strip(value)
props[name] = value
return props
# helper for handle_dir(), which helps build_tree_from_wc()
def get_text(path):
"Return a string with the textual contents of a file at PATH."
# sanity check
if not os.path.isfile(path):
return None
fp = open(path, 'r')
contents = fp.read()
fp.close()
return contents
# main recursive helper for build_tree_from_wc()
def handle_dir(path, current_parent, load_props, ignore_svn):
# get a list of all the files
all_files = os.listdir(path)
files = []
dirs = []
# put dirs and files in their own lists, and remove SVN dirs
for f in all_files:
f = os.path.join(path, f)
if (os.path.isdir(f) and os.path.basename(f) != 'SVN'):
dirs.append(f)
elif os.path.isfile(f):
files.append(f)
# add each file as a child of CURRENT_PARENT
for f in files:
fcontents = get_text(f)
if load_props:
fprops = get_props(f)
else:
fprops = {}
c = SVNTreeNode(os.path.basename(f), None,
fcontents, fprops)
c.mtime = os.stat(f)[stat.ST_MTIME]
current_parent.add_child(c)
# for each subdir, create a node, walk its tree, add it as a child
for d in dirs:
if load_props:
dprops = get_props(d)
else:
dprops = {}
new_dir_node = SVNTreeNode(os.path.basename(d), [], None, dprops)
handle_dir(d, new_dir_node, load_props, ignore_svn)
new_dir_node.mtime = os.stat(f)[stat.ST_MTIME]
current_parent.add_child(new_dir_node)
def get_child(node, name):
"""If SVNTreeNode NODE contains a child named NAME, return child;
else, return None. If SVNTreeNode is not a directory, raise a
SVNTreeIsNotDirectory exception"""
if node.children == None:
raise SVNTreeIsNotDirectory
for n in node.children:
if (name == n.name):
return n
return None
# Helper for compare_trees
def default_singleton_handler(a, baton):
"Printing SVNTreeNode A's name, then raise SVNTreeUnequal."
print "Got singleton", a.name
a.pprint()
raise SVNTreeUnequal
###########################################################################
###########################################################################
# EXPORTED ROUTINES ARE BELOW
# Main tree comparison routine!
def compare_trees(a, b,
singleton_handler_a = None,
a_baton = None,
singleton_handler_b = None,
b_baton = None):
"""Compare SVNTreeNodes A and B, expressing differences using FUNC_A
and FUNC_B. FUNC_A and FUNC_B are functions of two arguments (a
SVNTreeNode and a context baton), and may raise exception
SVNTreeUnequal. Their return value is ignored.
If A and B are both files, then return 0 if their contents,
properties, and names are all the same; else raise a SVNTreeUnequal.
If A is a file and B is a directory, raise a SVNTypeMismatch; same
vice-versa. If both are directories, then for each entry that
exists in both, call compare_trees on the two entries; otherwise, if
the entry exists only in A, invoke FUNC_A on it, and likewise for
B with FUNC_B."""
def display_nodes(a, b):
'Display two nodes, expected and actual.'
print "============================================================="
print "Expected", b.name, "and actual", a.name, "are different!"
print "============================================================="
print "EXPECTED NODE TO BE:"
print "============================================================="
b.pprint()
print "============================================================="
print "ACTUAL NODE FOUND:"
print "============================================================="
a.pprint()
# Setup singleton handlers
if (singleton_handler_a is None):
singleton_handler_a = default_singleton_handler
if (singleton_handler_b is None):
singleton_handler_b = default_singleton_handler
try:
# A and B are both files.
if ((a.children is None) and (b.children is None)):
if compare_file_nodes(a, b):
display_nodes(a, b)
raise main.SVNTreeUnequal
# One is a file, one is a directory.
elif (((a.children is None) and (b.children is not None))
or ((a.children is not None) and (b.children is None))):
display_nodes(a, b)
raise main.SVNTypeMismatch
# They're both directories.
else:
# First, compare the directories' two hashes.
if (a.props != b.props) or (a.atts != b.atts):
display_nodes(a, b)
raise main.SVNTreeUnequal
accounted_for = []
# For each child of A, check and see if it's in B. If so, run
# compare_trees on the two children and add b's child to
# accounted_for. If not, run FUNC_A on the child. Next, for each
# child of B, check and see if it's in accounted_for. If it is,
# do nothing. If not, run FUNC_B on it.
for a_child in a.children:
b_child = get_child(b, a_child.name)
if b_child:
accounted_for.append(b_child)
compare_trees(a_child, b_child,
singleton_handler_a, a_baton,
singleton_handler_b, b_baton)
else:
singleton_handler_a(a_child, a_baton)
for b_child in b.children:
if (b_child not in accounted_for):
singleton_handler_b(b_child, b_baton)
return 0
except SVNTypeMismatch:
print 'Unequal Types: one Node is a file, the other is a directory'
raise SVNTreeUnequal
except SVNTreeIsNotDirectory:
print "Error: Foolish call to get_child."
sys.exit(1)
except IndexError:
print "Error: unequal number of children"
raise SVNTreeUnequal
except SVNTreeUnequal:
if a.name == root_node_name:
return 1
else:
print "Unequal at node %s" % a.name
raise SVNTreeUnequal
return 0
# Visually show a tree's structure
def dump_tree(n,indent=""):
"Print out a nice representation of the tree's structure."
# Code partially stolen from Dave Beazley.
if n.children is None:
tmp_children = []
else:
tmp_children = n.children
if n.name == root_node_name:
print "%s%s" % (indent, "ROOT")
else:
print "%s%s" % (indent, n.name)
indent = indent.replace("-", " ")
indent = indent.replace("+", " ")
for i in range(len(tmp_children)):
c = tmp_children[i]
if i == len(tmp_children) - 1:
dump_tree(c,indent + " +-- ")
else:
dump_tree(c,indent + " |-- ")
###################################################################
###################################################################
# PARSERS that return trees made of SVNTreeNodes....
###################################################################
# Build an "expected" static tree from a list of lists
# Create a list of lists, of the form:
#
# [ [path, contents, props, atts], ... ]
#
# and run it through this parser. PATH is a string, a path to the
# object. CONTENTS is either a string or None, and PROPS and ATTS are
# populated dictionaries or {}. Each CONTENTS/PROPS/ATTS will be
# attached to the basename-node of the associated PATH.
def build_generic_tree(nodelist):
"Given a list of lists of a specific format, return a tree."
root = SVNTreeNode(root_node_name)
for list in nodelist:
new_branch = create_from_path(list[0], list[1], list[2], list[3])
root.add_child(new_branch)
return root
####################################################################
# Build trees from different kinds of subcommand output.
# Parse co/up output into a tree.
#
# Tree nodes will contain no contents, and only one 'status' att.
def build_tree_from_checkout(lines):
"Return a tree derived by parsing the output LINES from 'co' or 'up'."
root = SVNTreeNode(root_node_name)
rm = re.compile ('^([MAGCUD_ ][MAGCUD_ ]) (.+)')
for line in lines:
match = rm.search(line)
if match and match.groups():
new_branch = create_from_path(match.group(2), None, {},
{'status' : match.group(1)})
root.add_child(new_branch)
return root
# Parse ci/im output into a tree.
#
# Tree nodes will contain no contents, and only one 'verb' att.
def build_tree_from_commit(lines):
"Return a tree derived by parsing the output LINES from 'ci' or 'im'."
# Lines typically have a verb followed by whitespace then a path.
root = SVNTreeNode(root_node_name)
rm1 = re.compile ('^(\w+)\s+(.+)')
rm2 = re.compile ('^Transmitting')
for line in lines:
match = rm2.search(line)
if not match:
match = rm1.search(line)
if match and match.groups():
new_branch = create_from_path(match.group(2), None, {},
{'verb' : match.group(1)})
root.add_child(new_branch)
return root
# Parse status output into a tree.
#
# Tree nodes will contain no contents, and these atts:
#
# 'status', 'wc_rev', 'repos_rev'
# ... and possibly 'locked', 'copied', IFF columns non-empty.
#
def build_tree_from_status(lines):
"Return a tree derived by parsing the output LINES from 'st'."
root = SVNTreeNode(root_node_name)
rm = re.compile ('^.+\:.+(\d+)')
lastline = string.strip(lines.pop())
match = rm.search(lastline)
if match and match.groups():
repos_rev = match.group(1)
else:
repos_rev = '?'
# Try http://www.wordsmith.org/anagram/anagram.cgi?anagram=ACDRMGU
rm = re.compile ('^([MACDRUG_ ][MACDRUG_ ])(.)(.) . [^0-9-]+(\d+|-)(.{23})(.+)')
for line in lines:
match = rm.search(line)
if match and match.groups():
if match.group(5) != '-': # ignore items that only exist on repos
atthash = {'status' : match.group(1),
'wc_rev' : match.group(4),
'repos_rev' : repos_rev}
if match.group(2) != ' ':
atthash['locked'] = match.group(2)
if match.group(3) != ' ':
atthash['copied'] = match.group(3)
new_branch = create_from_path(match.group(6), None, {}, atthash)
root.add_child(new_branch)
return root
####################################################################
# Build trees by looking at the working copy
# The reason the 'load_props' flag is off by default is because it
# creates a drastic slowdown -- we spawn a new 'svn proplist'
# process for every file and dir in the working copy!
def build_tree_from_wc(wc_path, load_props=0, ignore_svn=1):
"""Takes WC_PATH as the path to a working copy. Walks the tree below
that path, and creates the tree based on the actual found
files. If IGNORE_SVN is true, then exclude SVN dirs from the tree.
If LOAD_PROPS is true, the props will be added to the tree."""
root = SVNTreeNode(root_node_name, None)
# if necessary, store the root dir's props in the root node.
if load_props:
root.props = get_props(wc_path)
# Walk the tree recursively
handle_dir(os.path.normpath(wc_path), root, load_props, ignore_svn)
return root
### End of file.
# local variables:
# eval: (load-file "../../../../../tools/dev/svn-dev.el")
# end: