| # Copyright (C) 2013 Google Inc. All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions are |
| # met: |
| # |
| # * Redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer. |
| # * Redistributions in binary form must reproduce the above |
| # copyright notice, this list of conditions and the following disclaimer |
| # in the documentation and/or other materials provided with the |
| # distribution. |
| # * Neither the Google name nor the names of its |
| # contributors may be used to endorse or promote products derived from |
| # this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| """Generates a fake TestExpectations file consisting of flaky tests from the bot |
| corresponding to the give port. |
| """ |
| |
| import json |
| import logging |
| import os.path |
| import urllib |
| |
| from six.moves import urllib |
| |
| from blinkpy.web_tests.models.typ_types import Expectation, ResultType |
| |
| _log = logging.getLogger(__name__) |
| |
| class ResultsFilter(object): |
| """Results filter for cq results. |
| Filtering any build which has passing tests in retry without patch. |
| If any test passed during this retry indicates the step has failed, |
| and the results are most likely unreliable. |
| |
| For results.json v4 format check ResultsJSON""" |
| |
| RESULTS_COUNT_BY_TYPE = 'num_failures_by_type' |
| BUILD_NUMBERS = 'buildNumbers' |
| |
| # results.json was originally designed to support |
| # multiple builders in one json file, so the builder_name |
| # is needed to figure out which builder this json file |
| # refers to (and thus where the results are stored) |
| def __init__(self, builder_name, json_dict): |
| self._builds_to_filter = self._get_builds_to_ignore(json_dict[builder_name]) |
| |
| def _get_builds_to_ignore(self, json_builder_summary): |
| build_count = len(json_builder_summary[self.BUILD_NUMBERS]) |
| passing_retries_indices = [ |
| i for i, p in enumerate(json_builder_summary[ |
| self.RESULTS_COUNT_BY_TYPE][ResultType.Pass][:build_count]) if p > 0] |
| return set([json_builder_summary[self.BUILD_NUMBERS][i] for i in passing_retries_indices]) |
| |
| def get_build_numbers_to_ignore(self): |
| return self._builds_to_filter |
| |
| |
| class ResultsJSON(object): |
| """Contains the contents of a results.json file. |
| |
| results.json v4 format: |
| { |
| 'version': 4, |
| 'builder name' : { |
| 'blinkRevision': [], |
| 'tests': { |
| 'directory' { # Each path component is a dictionary. |
| 'testname.html': { |
| 'expected' : 'FAIL', # Expectation name. |
| 'results': [], # Run-length encoded result. |
| 'times': [], |
| 'bugs': [], # Bug URLs. |
| } |
| } |
| } |
| 'buildNumbers': [], |
| 'secondsSinceEpoch': [], |
| 'chromeRevision': [], |
| 'failure_map': {}, # Map from letter code to expectation name. |
| 'num_failures_by_type: {} # Map result type to list of result count' |
| } |
| } |
| """ |
| TESTS_KEY = 'tests' |
| BUILD_NUMBERS = 'buildNumbers' |
| FAILURE_MAP_KEY = 'failure_map' |
| RESULTS_KEY = 'results' |
| EXPECTATIONS_KEY = 'expected' |
| BUGS_KEY = 'bugs' |
| RLE_LENGTH = 0 |
| RLE_VALUE = 1 |
| |
| # results.json was originally designed to support |
| # multiple builders in one json file, so the builder_name |
| # is needed to figure out which builder this json file |
| # refers to (and thus where the results are stored) |
| def __init__(self, builder_name, json_dict): |
| self.builder_name = builder_name |
| self._json = json_dict |
| |
| def _walk_trie(self, trie, parent_path): |
| for name, value in trie.items(): |
| full_path = os.path.join(parent_path, name) |
| |
| # FIXME: If we ever have a test directory self.RESULTS_KEY |
| # ("results"), this logic will break! |
| if self.RESULTS_KEY not in value: |
| for path, results in self._walk_trie(value, full_path): |
| yield path, results |
| else: |
| yield full_path, value |
| |
| def walk_results(self, full_path=''): |
| tests_trie = self._json[self.builder_name][self.TESTS_KEY] |
| return self._walk_trie(tests_trie, parent_path='') |
| |
| def expectation_for_type(self, type_char): |
| return self._json[self.builder_name][self.FAILURE_MAP_KEY][type_char] |
| |
| def build_numbers(self): |
| return self._json[self.builder_name][self.BUILD_NUMBERS] |
| |
| # Knowing how to parse the run-length-encoded values in results.json |
| # is a detail of this class. |
| def occurances_and_type_from_result_item(self, item): |
| return item[self.RLE_LENGTH], item[self.RLE_VALUE] |
| |
| |
| class BotTestExpectationsFactory(object): |
| # STEP_NAME is used to fetch results for ci builders and retry without |
| # patch for cq builders. |
| # STEP_NAME_TRY is use to fetch patched cq results. |
| STEP_NAME = 'blink_web_tests' |
| STEP_NAME_TRY = 'blink_web_tests (with patch)' |
| RESULTS_URL_FORMAT = ( |
| 'https://test-results.appspot.com/testfile?testtype=%s' |
| '&name=results-small.json&master=%s&builder=%s' |
| ) |
| |
| def __init__(self, builders): |
| self.builders = builders |
| |
| def _results_json_for_port(self, port_name, builder_category): |
| builder = self.builders.builder_name_for_port_name(port_name) |
| if not builder: |
| return None |
| return self._results_json_for_builder(builder) |
| |
| def _results_url_for_builder(self, builder, use_try_step=False): |
| test_type = (self.STEP_NAME_TRY if use_try_step else self.STEP_NAME) |
| return self.RESULTS_URL_FORMAT % ( |
| urllib.parse.quote(test_type), |
| urllib.parse.quote(self.builders.master_for_builder(builder)), |
| urllib.parse.quote(builder)) |
| |
| def _results_json_for_builder(self, builder): |
| results_url = self._results_url_for_builder( |
| builder, self.builders.is_try_server_builder(builder)) |
| try: |
| _log.debug('Fetching flakiness data from appengine: %s', |
| results_url) |
| return ResultsJSON(builder, json.load(urllib.urlopen(results_url))) |
| except urllib.error.URLError as error: |
| _log.warning( |
| 'Could not retrieve flakiness data from the bot. url: %s', |
| results_url) |
| _log.warning(error) |
| |
| def _results_filter_for_builder(self, builder): |
| results_url = self._results_url_for_builder(builder, False) |
| try: |
| _log.debug('Fetching flakiness data from appengine: %s', |
| results_url) |
| return ResultsFilter(builder, |
| json.load(urllib.urlopen(results_url))) |
| except urllib.URLError as error: |
| _log.warning( |
| 'Could not retrieve flakiness data from the bot. url: %s', |
| results_url) |
| _log.warning(error) |
| |
| def expectations_for_port(self, port_name, builder_category='layout'): |
| # FIXME: This only grabs release builder's flakiness data. If we're running debug, |
| # when we should grab the debug builder's data. |
| # FIXME: What should this do if there is no debug builder for a port, e.g. we have |
| # no debug XP builder? Should it use the release bot or another Windows debug bot? |
| # At the very least, it should log an error. |
| results_json = self._results_json_for_port(port_name, builder_category) |
| if not results_json: |
| return None |
| return BotTestExpectations(results_json, self.builders) |
| |
| def expectations_for_builder(self, builder): |
| results_json = self._results_json_for_builder(builder) |
| if not results_json: |
| return None |
| results_filter = None |
| if self.builders.is_try_server_builder(builder): |
| results_filter = self._results_filter_for_builder(builder) |
| return BotTestExpectations(results_json, self.builders, |
| results_filter=results_filter) |
| |
| |
| class BotTestExpectations(object): |
| # FIXME: Get this from the json instead of hard-coding it. |
| RESULT_TYPES_TO_IGNORE = ['N', 'X', 'Y'] # NO_DATA, SKIP, NOTRUN |
| |
| # TODO(ojan): Remove this once crbug.com/514378 is fixed. |
| # The JSON can contain results for expectations, not just actual result types. |
| NON_RESULT_TYPES = ['S', 'X'] # SLOW, SKIP |
| |
| # specifiers arg is used in unittests to avoid the static dependency on builders. |
| def __init__(self, results_json, builders, specifiers=None, results_filter=None): |
| self.results_json = results_json |
| self.specifiers = specifiers or set( |
| builders.specifiers_for_builder(results_json.builder_name)) |
| self.filter_results_bitmap = self._get_results_filter(results_filter) |
| |
| def flakes_by_path(self, only_ignore_very_flaky): |
| """Sets test expectations to bot results if there are at least two distinct results.""" |
| flakes_by_path = {} |
| for test_path, entry in self.results_json.walk_results(): |
| flaky_types = self._flaky_types_in_results(entry, |
| only_ignore_very_flaky) |
| if len(flaky_types) <= 1: |
| continue |
| flakes_by_path[test_path] = flaky_types |
| return flakes_by_path |
| |
| def unexpected_results_by_path(self): |
| |
| unexpected_results_by_path = {} |
| for test_path, entry in self.results_json.walk_results(): |
| # Expectations for this test. No expectation defaults to PASS. |
| exp_string = entry.get(self.results_json.EXPECTATIONS_KEY, |
| ResultType.Pass) |
| |
| # All run-length-encoded results for this test. |
| results_dict = entry.get(self.results_json.RESULTS_KEY, {}) |
| |
| # Set of distinct results for this test. |
| result_types = self._all_types_in_results(results_dict) |
| |
| # Distinct results as non-encoded strings. |
| results = map(self.results_json.expectation_for_type, result_types) |
| |
| # Get test expectations |
| expectations = exp_string.split(' ') |
| |
| # Unexpected results will become additional expectations |
| additional_expectations = [ |
| res for res in results if res not in expectations |
| ] |
| |
| if not additional_expectations: |
| continue |
| |
| # Get typ expectation result tags |
| unexpected_results_by_path[test_path] = set( |
| expectations + additional_expectations) |
| return unexpected_results_by_path |
| |
| def all_results_by_path(self): |
| """Returns all seen result types for each test. |
| |
| Returns a dictionary from each test path that has a result to a list of distinct, sorted result |
| strings. For example, if the test results are as follows: |
| |
| a.html IMAGE IMAGE PASS PASS PASS TIMEOUT PASS TEXT |
| b.html PASS PASS PASS PASS PASS PASS PASS PASS |
| c.html |
| |
| This method will return: |
| { |
| 'a.html': ['IMAGE', 'TEXT', 'TIMEOUT', 'PASS'], |
| 'b.html': ['PASS'], |
| } |
| """ |
| results_by_path = {} |
| for test_path, entry in self.results_json.walk_results(): |
| results_dict = entry.get(self.results_json.RESULTS_KEY, {}) |
| |
| result_types = self._all_types_in_results(results_dict) |
| |
| if not result_types: |
| continue |
| |
| # Distinct results as non-encoded strings. |
| result_strings = map(self.results_json.expectation_for_type, |
| result_types) |
| |
| results_by_path[test_path] = sorted(result_strings) |
| return results_by_path |
| |
| def expectation_lines(self, only_ignore_very_flaky): |
| lines = [] |
| for test_path, entry in self.results_json.walk_results(): |
| flaky_types = self._flaky_types_in_results(entry, |
| only_ignore_very_flaky) |
| if len(flaky_types) > 1: |
| line = self._line_from_test_and_flaky_types( |
| test_path, flaky_types) |
| lines.append(line) |
| return lines |
| |
| def _get_results_filter(self, results_filter): |
| if results_filter: |
| filter_builds = results_filter.get_build_numbers_to_ignore() |
| return [build not in filter_builds for build in self.results_json.build_numbers()] |
| else: |
| return None |
| |
| def _line_from_test_and_flaky_types(self, test_name, flaky_types): |
| return Expectation( |
| tags=self.specifiers, test=test_name, results=flaky_types) |
| |
| def _all_types_in_results(self, run_length_encoded_results): |
| results = set() |
| |
| result_index = 0 |
| for result_item in run_length_encoded_results: |
| count, result_types = self.results_json.occurances_and_type_from_result_item( |
| result_item) |
| if (not self.filter_results_bitmap or |
| any(self.filter_results_bitmap[result_index : result_index + count])): |
| for result_type in result_types: |
| if result_type not in self.RESULT_TYPES_TO_IGNORE: |
| results.add(result_type) |
| result_index += count |
| return results |
| |
| def _flaky_types_in_results(self, results_entry, only_ignore_very_flaky): |
| flaky_results = set() |
| |
| # Always include pass as an expected result. Passes will never turn the bot red. |
| # This fixes cases where the expectations have an implicit Pass, e.g. [ Slow ]. |
| latest_expectations = [ResultType.Pass] |
| if self.results_json.EXPECTATIONS_KEY in results_entry: |
| expectations_list = results_entry[self.results_json. |
| EXPECTATIONS_KEY].split(' ') |
| latest_expectations.extend(expectations_list) |
| |
| for result_item in results_entry[self.results_json.RESULTS_KEY]: |
| _, result_types_str = self.results_json.occurances_and_type_from_result_item( |
| result_item) |
| |
| result_types = [] |
| for result_type in result_types_str: |
| # TODO(ojan): Remove this if-statement once crbug.com/514378 is fixed. |
| if result_type not in self.NON_RESULT_TYPES: |
| result_types.append( |
| self.results_json.expectation_for_type(result_type)) |
| |
| # It didn't flake if it didn't retry. |
| if len(result_types) <= 1: |
| continue |
| |
| # If the test ran as expected after only one retry, it's not very flaky. |
| # It's only very flaky if it failed the first run and the first retry |
| # and then ran as expected in one of the subsequent retries. |
| # If there are only two entries, then that means it failed on the first |
| # try and ran as expected on the second because otherwise we'd have |
| # a third entry from the next try. |
| if only_ignore_very_flaky and len(result_types) == 2: |
| continue |
| |
| has_unexpected_results = False |
| for result_type in result_types: |
| # TODO(ojan): We really should be grabbing the expected results from the time |
| # of the run instead of looking at the latest expected results. That's a lot |
| # more complicated though. So far we've been looking at the aggregated |
| # results_small.json off test_results.appspot, which has all the information |
| # for the last 100 runs. In order to do this, we'd need to look at the |
| # individual runs' full_results.json, which would be slow and more complicated. |
| # The only thing we lose by not fixing this is that a test that was flaky |
| # and got fixed will still get printed out until 100 runs have passed. |
| if result_type not in latest_expectations: |
| has_unexpected_results = True |
| break |
| |
| if has_unexpected_results: |
| flaky_results = flaky_results.union(set(result_types)) |
| |
| return flaky_results |