blob: 0ad2d032b7c878ae8190f4748490940ae55b5f15 [file] [log] [blame] [edit]
"""Collection of utility functions to work with git."""
from __future__ import absolute_import
import collections
import copy
import os
import re
import six
CHERRY_PICK_MSG = r'\(cherry picked from commit [0-9a-f]{5,40}\)'
SHA_PATTERN = r'[a-fA-F0-9]{40}'
# `git diff` uses /dev/null as a placeholder name for new/deleted files.
FILENAME_PLACEHOLDER = '/dev/null'
def check_history_depth(executor, directory):
depth = executor.exec_subprocess(
['git', 'rev-list', '--count', 'HEAD'], cwd=directory, check_output=True)
try:
return int(depth)
except ValueError:
return 0
def changed_files(executor,
file_extensions=None,
directory=None,
file_exclusion_pattern=None,
depth=1):
"""Returns a list of names of changed files in newest |depth| commits.
Args:
executor: Executes the subprocess.
file_extensions: Filter output to only files with matching extensions. (e.g.
['.cc', '.h'] for C++ files)
directory: Directory to execute the subprocess in.
file_exclusion_pattern: A regex pattern to filter files with. If the pattern
matches a file that file will not be included in the changed file list.
depth: The number of commits to find files from.
Returns:
List of names of changed files.
"""
diff_output = executor.exec_subprocess([
'git', 'diff', 'HEAD~{}'.format(depth), '--name-only', '--diff-filter=AMR'
],
cwd=directory,
check_output=True)
if not diff_output:
return []
changed_filenames = diff_output.strip().split('\n')
return filtered_files(
changed_filenames,
file_extensions=file_extensions,
file_exclusion_pattern=file_exclusion_pattern)
def get_changed_files_in_all_patches(executor,
patches,
path_lookup,
file_extensions=None,
file_exclusion_pattern=None):
"""Returns every file that is changed as a result of applying |patches|.
This method is useful when determining all files that have changed when a
series of patches have been applied via the Depends-On tag. This method
supports patches in multiple projects, as well as multiple patches in a
single project.
Args:
executor: Executes the subprocess.
patches: A list of all of the patches applied to the checkout. For more
detail on the entries in this list, see scripts/slave/base_step.py.
path_lookup: A dictionary mapping project name to path.
file_extensions: Filter output to only files with matching extensions. (e.g.
['.cc', '.h'] for C++ files)
file_exclusion_pattern: A regex pattern to filter files with. If the pattern
matches a file that file will not be included in the changed file list.
Returns:
A list of file paths relative to the checkout root directory.
"""
# |project_depth| is a dictionary mapping each project name to the number of
# patches that have been applied on top of that project's HEAD. Here's an
# example of what this might look like:
# {
# "chromecast/internal": 1,
# "chromium/src": 2,
# "some_other_project": 1
# }
project_depths = collections.Counter(
(patch['project'], patch['branch']) for patch in patches
if (patch['project'], patch['branch']) in path_lookup)
# |path_lookup| dictionary uses real paths relative to the root checkout
# directory. However, some tools like gn expect paths to be symlinks, e.g.
# chromium/src/chromecast/internal/receiver/app/dummy.cc instead of
# cast/internal/receiver/app/dummy.cc. Create a dictionary here so that
# symlinks can be returned for some projects.
file_prefix_dict = copy.deepcopy(path_lookup)
replacing_symlinks = {'chromecast/internal':
'chromium/src/chromecast/internal'}
for project, symlink in replacing_symlinks.items():
if file_prefix_dict._lookup_dict.get(project):
update_dict = {(project, revision): symlink
for revision in file_prefix_dict._lookup_dict[project]}
file_prefix_dict.update(update_dict)
# For each project, calculate the list of changed files, concatenate each
# file path with the project's path from checkout root, and append them to
# a list.
files = []
for project in project_depths.keys():
depth = project_depths[project]
if depth < check_history_depth(executor, path_lookup[project]):
files.extend([
os.path.join(file_prefix_dict[project], f)
for f in changed_files(
executor, file_extensions, path_lookup[project],
file_exclusion_pattern, depth)
])
return files
def filtered_files(filenames,
file_extensions=None,
file_exclusion_pattern=None):
"""Filters the list of files.
Args:
filenames: A list of filenames to filter.
file_extensions: Filter output to only files with matching extensions. (e.g.
['.cc', '.h'] for C++ files)
file_exclusion_pattern: A regex pattern to filter files with. If the pattern
matches a file that file will not be included in the changed file list.
Returns:
List of files after applying the filters.
"""
if file_extensions:
filenames = [f for f in filenames if f.endswith(tuple(file_extensions))]
if file_exclusion_pattern:
filenames = [
f for f in filenames if not re.search(file_exclusion_pattern, f)
]
return filenames
def commit_message(executor, directory=None, skip_counts: int=0) -> str:
"""Returns the most recent git commit message.
Note that this does not include the Author: or Date: fields,
only the message body.
Args:
executor: Executes the subprocess.
directory: Directory to execute the subprocess in (defaults to cwd)
skip_counts: Number of commits to skip before returning the message.
Returns:
The most recent git commit message.
"""
return executor.exec_subprocess(['git', 'log', '--format=%B', '--skip', str(skip_counts), '-n', '1'],
cwd=directory,
check_output=True)
def is_cherry_pick(executor, directory=None):
"""Returns if the most recent commit is a cherry pick.
Args:
executor: Executes the subprocess.
directory: Directory to execute the subproccess in (defaults to cwd)
Returns:
True iff the most recent commit is a cherry pick.
"""
return bool(
re.search(CHERRY_PICK_MSG, commit_message(executor, directory),
re.MULTILINE))
def commit_author_email(executor, directory=None):
"""Returns the author email of the most recent git commit.
Args:
executor: Executes the subprocess.
directory: Directory to execute the subprocess in (defaults to cwd)
Returns:
The email of the author of the most recent git commit.
"""
author = executor.exec_subprocess(['git', 'log', '--format=%ae', '-n', '1'],
cwd=directory,
check_output=True).strip()
# We have to first convert to a unicode str, then decode removing unicode
# chars.
return six.text_type(author.decode('utf-8')).encode('ascii', errors='ignore')
def is_sha(revision):
"""Returns True if revision looks like a SHA."""
return re.match(SHA_PATTERN, revision)
def get_head_sha(executor, directory):
"""Returns the SHA of the HEAD revision of the given directory, else None."""
cmd_get_sha = ['git', 'rev-parse', 'HEAD']
returncode, stdout, _ = executor.exec_subprocess(cmd_get_sha, cwd=directory)
return stdout.strip() if returncode == 0 else None