blob: 70c66a3dbb4130072388b03f76821793b085a5e0 [file] [log] [blame]
# Copyright 2003 Dave Abrahams
# Copyright 2001, 2002 Vladimir Prus
# Copyright 2012 Jurko Gospodnetic
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)
###############################################################################
#
# Based in part on an old Subversion tree.py source file (tools for comparing
# directory trees). See http://subversion.tigris.org for more information.
#
# Copyright (c) 2001 Sam Tobin-Hochstadt. All rights reserved.
#
# This software is licensed as described in the file COPYING, which you should
# have received as part of this distribution. The terms are also available at
# http://subversion.tigris.org/license-1.html. If newer versions of this
# license are posted there, you may use a newer version instead, at your
# option.
#
###############################################################################
import os
import os.path
import stat
import sys
class TreeNode:
"""
Fundamental data type used to build file system tree structures.
If CHILDREN is None, then the node represents a file. Otherwise, CHILDREN
is a list of the nodes representing that directory's children.
NAME is simply the name of the file or directory. CONTENTS is a string
holding the file's contents (if a file).
"""
def __init__(self, name, children=None, contents=None):
assert children is None or contents is None
self.name = name
self.mtime = 0
self.children = children
self.contents = contents
self.path = name
def add_child(self, newchild):
assert not self.is_file()
for a in self.children:
if a.name == newchild.name:
if newchild.is_file():
a.contents = newchild.contents
a.path = os.path.join(self.path, newchild.name)
else:
for i in newchild.children:
a.add_child(i)
break
else:
self.children.append(newchild)
newchild.path = os.path.join(self.path, newchild.name)
def get_child(self, name):
"""
If the given TreeNode directory NODE contains a child named NAME,
return the child; else, return None.
"""
for n in self.children:
if n.name == name:
return n
def is_file(self):
return self.children is None
def pprint(self):
print(" * Node name: %s" % self.name)
print(" Path: %s" % self.path)
print(" Contents: %s" % self.contents)
if self.is_file():
print(" Children: is a file.")
else:
print(" Children: %d" % len(self.children))
class TreeDifference:
def __init__(self):
self.added_files = []
self.removed_files = []
self.modified_files = []
self.touched_files = []
def append(self, other):
self.added_files.extend(other.added_files)
self.removed_files.extend(other.removed_files)
self.modified_files.extend(other.modified_files)
self.touched_files.extend(other.touched_files)
def ignore_directories(self):
"""Removes directories from our lists of found differences."""
not_dir = lambda x : x[-1] != "/"
self.added_files = filter(not_dir, self.added_files)
self.removed_files = filter(not_dir, self.removed_files)
self.modified_files = filter(not_dir, self.modified_files)
self.touched_files = filter(not_dir, self.touched_files)
def pprint(self, file=sys.stdout):
file.write("Added files : %s\n" % self.added_files)
file.write("Removed files : %s\n" % self.removed_files)
file.write("Modified files: %s\n" % self.modified_files)
file.write("Touched files : %s\n" % self.touched_files)
def empty(self):
return not (self.added_files or self.removed_files or
self.modified_files or self.touched_files)
def build_tree(path):
"""
Takes PATH as the folder path, walks the file system below that path, and
creates a tree structure based on any files and folders found there.
Returns the prepared tree structure plus the maximum file modification
timestamp under the given folder.
"""
return _handle_dir(os.path.normpath(path))
def tree_difference(a, b):
"""Compare TreeNodes A and B, and create a TreeDifference instance."""
return _do_tree_difference(a, b, "", True)
def _do_tree_difference(a, b, parent_path, root=False):
"""Internal recursive worker function for tree_difference()."""
# We do not want to list root node names.
if root:
assert not parent_path
assert not a.is_file()
assert not b.is_file()
full_path = ""
else:
assert a.name == b.name
full_path = parent_path + a.name
result = TreeDifference()
# A and B are both files.
if a.is_file() and b.is_file():
if a.contents != b.contents:
result.modified_files.append(full_path)
elif a.mtime != b.mtime:
result.touched_files.append(full_path)
return result
# Directory converted to file.
if not a.is_file() and b.is_file():
result.removed_files.extend(_traverse_tree(a, parent_path))
result.added_files.append(full_path)
# File converted to directory.
elif a.is_file() and not b.is_file():
result.removed_files.append(full_path)
result.added_files.extend(_traverse_tree(b, parent_path))
# A and B are both directories.
else:
if full_path:
full_path += "/"
accounted_for = [] # Children present in both trees.
for a_child in a.children:
b_child = b.get_child(a_child.name)
if b_child:
accounted_for.append(b_child)
result.append(_do_tree_difference(a_child, b_child, full_path))
else:
result.removed_files.append(full_path + a_child.name)
for b_child in b.children:
if b_child not in accounted_for:
result.added_files.extend(_traverse_tree(b_child, full_path))
return result
def _traverse_tree(t, parent_path):
"""Returns a list of all names in a tree."""
assert not parent_path or parent_path[-1] == "/"
full_node_name = parent_path + t.name
if t.is_file():
result = [full_node_name]
else:
name_prefix = full_node_name + "/"
result = [name_prefix]
for i in t.children:
result.extend(_traverse_tree(i, name_prefix))
return result
def _get_text(path):
"""Return a string with the textual contents of a file at PATH."""
fp = open(path, 'r')
try:
return fp.read()
finally:
fp.close()
def _handle_dir(path):
"""
Main recursive worker function for build_tree(). Returns a newly created
tree node representing the given normalized folder path as well as the
maximum file/folder modification time detected under the same path.
"""
files = []
dirs = []
node = TreeNode(os.path.basename(path), children=[])
max_mtime = node.mtime = os.stat(path).st_mtime
# List files & folders.
for f in os.listdir(path):
f = os.path.join(path, f)
if os.path.isdir(f):
dirs.append(f)
elif os.path.isfile(f):
files.append(f)
# Add a child node for each file.
for f in files:
fcontents = _get_text(f)
new_file_node = TreeNode(os.path.basename(f), contents=fcontents)
new_file_node.mtime = os.stat(f).st_mtime
max_mtime = max(max_mtime, new_file_node.mtime)
node.add_child(new_file_node)
# For each subdir, create a node, walk its tree, add it as a child.
for d in dirs:
new_dir_node, new_max_mtime = _handle_dir(d)
max_mtime = max(max_mtime, new_max_mtime)
node.add_child(new_dir_node)
return node, max_mtime