| """Collection of utility functions to work with git.""" |
| |
| from __future__ import absolute_import |
| import collections |
| import copy |
| import os |
| import re |
| import six |
| |
| CHERRY_PICK_MSG = r'\(cherry picked from commit [0-9a-f]{5,40}\)' |
| SHA_PATTERN = r'[a-fA-F0-9]{40}' |
| |
| # `git diff` uses /dev/null as a placeholder name for new/deleted files. |
| FILENAME_PLACEHOLDER = '/dev/null' |
| |
| |
| def check_history_depth(executor, directory): |
| depth = executor.exec_subprocess( |
| ['git', 'rev-list', '--count', 'HEAD'], cwd=directory, check_output=True) |
| try: |
| return int(depth) |
| except ValueError: |
| return 0 |
| |
| |
| def changed_files(executor, |
| file_extensions=None, |
| directory=None, |
| file_exclusion_pattern=None, |
| depth=1): |
| """Returns a list of names of changed files in newest |depth| commits. |
| |
| Args: |
| executor: Executes the subprocess. |
| file_extensions: Filter output to only files with matching extensions. (e.g. |
| ['.cc', '.h'] for C++ files) |
| directory: Directory to execute the subprocess in. |
| file_exclusion_pattern: A regex pattern to filter files with. If the pattern |
| matches a file that file will not be included in the changed file list. |
| depth: The number of commits to find files from. |
| |
| Returns: |
| List of names of changed files. |
| """ |
| diff_output = executor.exec_subprocess([ |
| 'git', 'diff', 'HEAD~{}'.format(depth), '--name-only', '--diff-filter=AMR' |
| ], |
| cwd=directory, |
| check_output=True) |
| |
| if not diff_output: |
| return [] |
| |
| changed_filenames = diff_output.strip().split('\n') |
| |
| return filtered_files( |
| changed_filenames, |
| file_extensions=file_extensions, |
| file_exclusion_pattern=file_exclusion_pattern) |
| |
| |
| def get_changed_files_in_all_patches(executor, |
| patches, |
| path_lookup, |
| file_extensions=None, |
| file_exclusion_pattern=None): |
| """Returns every file that is changed as a result of applying |patches|. |
| |
| This method is useful when determining all files that have changed when a |
| series of patches have been applied via the Depends-On tag. This method |
| supports patches in multiple projects, as well as multiple patches in a |
| single project. |
| |
| Args: |
| executor: Executes the subprocess. |
| patches: A list of all of the patches applied to the checkout. For more |
| detail on the entries in this list, see scripts/slave/base_step.py. |
| path_lookup: A dictionary mapping project name to path. |
| file_extensions: Filter output to only files with matching extensions. (e.g. |
| ['.cc', '.h'] for C++ files) |
| file_exclusion_pattern: A regex pattern to filter files with. If the pattern |
| matches a file that file will not be included in the changed file list. |
| |
| Returns: |
| A list of file paths relative to the checkout root directory. |
| """ |
| |
| # |project_depth| is a dictionary mapping each project name to the number of |
| # patches that have been applied on top of that project's HEAD. Here's an |
| # example of what this might look like: |
| # { |
| # "chromecast/internal": 1, |
| # "chromium/src": 2, |
| # "some_other_project": 1 |
| # } |
| project_depths = collections.Counter( |
| (patch['project'], patch['branch']) for patch in patches |
| if (patch['project'], patch['branch']) in path_lookup) |
| # |path_lookup| dictionary uses real paths relative to the root checkout |
| # directory. However, some tools like gn expect paths to be symlinks, e.g. |
| # chromium/src/chromecast/internal/receiver/app/dummy.cc instead of |
| # cast/internal/receiver/app/dummy.cc. Create a dictionary here so that |
| # symlinks can be returned for some projects. |
| file_prefix_dict = copy.deepcopy(path_lookup) |
| replacing_symlinks = {'chromecast/internal': |
| 'chromium/src/chromecast/internal'} |
| for project, symlink in replacing_symlinks.items(): |
| if file_prefix_dict._lookup_dict.get(project): |
| update_dict = {(project, revision): symlink |
| for revision in file_prefix_dict._lookup_dict[project]} |
| file_prefix_dict.update(update_dict) |
| |
| # For each project, calculate the list of changed files, concatenate each |
| # file path with the project's path from checkout root, and append them to |
| # a list. |
| files = [] |
| for project in project_depths.keys(): |
| depth = project_depths[project] |
| if depth < check_history_depth(executor, path_lookup[project]): |
| files.extend([ |
| os.path.join(file_prefix_dict[project], f) |
| for f in changed_files( |
| executor, file_extensions, path_lookup[project], |
| file_exclusion_pattern, depth) |
| ]) |
| return files |
| |
| |
| def filtered_files(filenames, |
| file_extensions=None, |
| file_exclusion_pattern=None): |
| """Filters the list of files. |
| |
| Args: |
| filenames: A list of filenames to filter. |
| file_extensions: Filter output to only files with matching extensions. (e.g. |
| ['.cc', '.h'] for C++ files) |
| file_exclusion_pattern: A regex pattern to filter files with. If the pattern |
| matches a file that file will not be included in the changed file list. |
| |
| Returns: |
| List of files after applying the filters. |
| """ |
| if file_extensions: |
| filenames = [f for f in filenames if f.endswith(tuple(file_extensions))] |
| if file_exclusion_pattern: |
| filenames = [ |
| f for f in filenames if not re.search(file_exclusion_pattern, f) |
| ] |
| return filenames |
| |
| |
| def commit_message(executor, directory=None, skip_counts: int=0) -> str: |
| """Returns the most recent git commit message. |
| |
| Note that this does not include the Author: or Date: fields, |
| only the message body. |
| |
| Args: |
| executor: Executes the subprocess. |
| directory: Directory to execute the subprocess in (defaults to cwd) |
| skip_counts: Number of commits to skip before returning the message. |
| |
| Returns: |
| The most recent git commit message. |
| """ |
| return executor.exec_subprocess(['git', 'log', '--format=%B', '--skip', str(skip_counts), '-n', '1'], |
| cwd=directory, |
| check_output=True) |
| |
| |
| def is_cherry_pick(executor, directory=None): |
| """Returns if the most recent commit is a cherry pick. |
| |
| Args: |
| executor: Executes the subprocess. |
| directory: Directory to execute the subproccess in (defaults to cwd) |
| |
| Returns: |
| True iff the most recent commit is a cherry pick. |
| """ |
| return bool( |
| re.search(CHERRY_PICK_MSG, commit_message(executor, directory), |
| re.MULTILINE)) |
| |
| |
| def commit_author_email(executor, directory=None): |
| """Returns the author email of the most recent git commit. |
| |
| Args: |
| executor: Executes the subprocess. |
| directory: Directory to execute the subprocess in (defaults to cwd) |
| |
| Returns: |
| The email of the author of the most recent git commit. |
| """ |
| author = executor.exec_subprocess(['git', 'log', '--format=%ae', '-n', '1'], |
| cwd=directory, |
| check_output=True).strip() |
| # We have to first convert to a unicode str, then decode removing unicode |
| # chars. |
| return six.text_type(author.decode('utf-8')).encode('ascii', errors='ignore') |
| |
| |
| def is_sha(revision): |
| """Returns True if revision looks like a SHA.""" |
| return re.match(SHA_PATTERN, revision) |
| |
| |
| def get_head_sha(executor, directory): |
| """Returns the SHA of the HEAD revision of the given directory, else None.""" |
| cmd_get_sha = ['git', 'rev-parse', 'HEAD'] |
| returncode, stdout, _ = executor.exec_subprocess(cmd_get_sha, cwd=directory) |
| return stdout.strip() if returncode == 0 else None |