blob: fa29bdb6e78f915a1d87bda96cabee648a590053 [file] [log] [blame]
# Copyright (C) 2013 Google Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the Google name nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Generates a fake TestExpectations file consisting of flaky tests from the bot
corresponding to the give port.
"""
import json
import logging
import os.path
import urllib
from six.moves import urllib
from blinkpy.web_tests.models.typ_types import Expectation, ResultType
_log = logging.getLogger(__name__)
class ResultsFilter(object):
"""Results filter for cq results.
Filtering any build which has passing tests in retry without patch.
If any test passed during this retry indicates the step has failed,
and the results are most likely unreliable.
For results.json v4 format check ResultsJSON"""
RESULTS_COUNT_BY_TYPE = 'num_failures_by_type'
BUILD_NUMBERS = 'buildNumbers'
# results.json was originally designed to support
# multiple builders in one json file, so the builder_name
# is needed to figure out which builder this json file
# refers to (and thus where the results are stored)
def __init__(self, builder_name, json_dict):
self._builds_to_filter = self._get_builds_to_ignore(json_dict[builder_name])
def _get_builds_to_ignore(self, json_builder_summary):
build_count = len(json_builder_summary[self.BUILD_NUMBERS])
passing_retries_indices = [
i for i, p in enumerate(json_builder_summary[
self.RESULTS_COUNT_BY_TYPE][ResultType.Pass][:build_count]) if p > 0]
return set([json_builder_summary[self.BUILD_NUMBERS][i] for i in passing_retries_indices])
def get_build_numbers_to_ignore(self):
return self._builds_to_filter
class ResultsJSON(object):
"""Contains the contents of a results.json file.
results.json v4 format:
{
'version': 4,
'builder name' : {
'blinkRevision': [],
'tests': {
'directory' { # Each path component is a dictionary.
'testname.html': {
'expected' : 'FAIL', # Expectation name.
'results': [], # Run-length encoded result.
'times': [],
'bugs': [], # Bug URLs.
}
}
}
'buildNumbers': [],
'secondsSinceEpoch': [],
'chromeRevision': [],
'failure_map': {}, # Map from letter code to expectation name.
'num_failures_by_type: {} # Map result type to list of result count'
}
}
"""
TESTS_KEY = 'tests'
BUILD_NUMBERS = 'buildNumbers'
FAILURE_MAP_KEY = 'failure_map'
RESULTS_KEY = 'results'
EXPECTATIONS_KEY = 'expected'
BUGS_KEY = 'bugs'
RLE_LENGTH = 0
RLE_VALUE = 1
# results.json was originally designed to support
# multiple builders in one json file, so the builder_name
# is needed to figure out which builder this json file
# refers to (and thus where the results are stored)
def __init__(self, builder_name, json_dict):
self.builder_name = builder_name
self._json = json_dict
def _walk_trie(self, trie, parent_path):
for name, value in trie.items():
full_path = os.path.join(parent_path, name)
# FIXME: If we ever have a test directory self.RESULTS_KEY
# ("results"), this logic will break!
if self.RESULTS_KEY not in value:
for path, results in self._walk_trie(value, full_path):
yield path, results
else:
yield full_path, value
def walk_results(self, full_path=''):
tests_trie = self._json[self.builder_name][self.TESTS_KEY]
return self._walk_trie(tests_trie, parent_path='')
def expectation_for_type(self, type_char):
return self._json[self.builder_name][self.FAILURE_MAP_KEY][type_char]
def build_numbers(self):
return self._json[self.builder_name][self.BUILD_NUMBERS]
# Knowing how to parse the run-length-encoded values in results.json
# is a detail of this class.
def occurances_and_type_from_result_item(self, item):
return item[self.RLE_LENGTH], item[self.RLE_VALUE]
class BotTestExpectationsFactory(object):
# STEP_NAME is used to fetch results for ci builders and retry without
# patch for cq builders.
# STEP_NAME_TRY is use to fetch patched cq results.
STEP_NAME = 'blink_web_tests'
STEP_NAME_TRY = 'blink_web_tests (with patch)'
RESULTS_URL_FORMAT = (
'https://test-results.appspot.com/testfile?testtype=%s'
'&name=results-small.json&master=%s&builder=%s'
)
def __init__(self, builders):
self.builders = builders
def _results_json_for_port(self, port_name, builder_category):
builder = self.builders.builder_name_for_port_name(port_name)
if not builder:
return None
return self._results_json_for_builder(builder)
def _results_url_for_builder(self, builder, use_try_step=False):
test_type = (self.STEP_NAME_TRY if use_try_step else self.STEP_NAME)
return self.RESULTS_URL_FORMAT % (
urllib.parse.quote(test_type),
urllib.parse.quote(self.builders.master_for_builder(builder)),
urllib.parse.quote(builder))
def _results_json_for_builder(self, builder):
results_url = self._results_url_for_builder(
builder, self.builders.is_try_server_builder(builder))
try:
_log.debug('Fetching flakiness data from appengine: %s',
results_url)
return ResultsJSON(builder, json.load(urllib.urlopen(results_url)))
except urllib.error.URLError as error:
_log.warning(
'Could not retrieve flakiness data from the bot. url: %s',
results_url)
_log.warning(error)
def _results_filter_for_builder(self, builder):
results_url = self._results_url_for_builder(builder, False)
try:
_log.debug('Fetching flakiness data from appengine: %s',
results_url)
return ResultsFilter(builder,
json.load(urllib.urlopen(results_url)))
except urllib.URLError as error:
_log.warning(
'Could not retrieve flakiness data from the bot. url: %s',
results_url)
_log.warning(error)
def expectations_for_port(self, port_name, builder_category='layout'):
# FIXME: This only grabs release builder's flakiness data. If we're running debug,
# when we should grab the debug builder's data.
# FIXME: What should this do if there is no debug builder for a port, e.g. we have
# no debug XP builder? Should it use the release bot or another Windows debug bot?
# At the very least, it should log an error.
results_json = self._results_json_for_port(port_name, builder_category)
if not results_json:
return None
return BotTestExpectations(results_json, self.builders)
def expectations_for_builder(self, builder):
results_json = self._results_json_for_builder(builder)
if not results_json:
return None
results_filter = None
if self.builders.is_try_server_builder(builder):
results_filter = self._results_filter_for_builder(builder)
return BotTestExpectations(results_json, self.builders,
results_filter=results_filter)
class BotTestExpectations(object):
# FIXME: Get this from the json instead of hard-coding it.
RESULT_TYPES_TO_IGNORE = ['N', 'X', 'Y'] # NO_DATA, SKIP, NOTRUN
# TODO(ojan): Remove this once crbug.com/514378 is fixed.
# The JSON can contain results for expectations, not just actual result types.
NON_RESULT_TYPES = ['S', 'X'] # SLOW, SKIP
# specifiers arg is used in unittests to avoid the static dependency on builders.
def __init__(self, results_json, builders, specifiers=None, results_filter=None):
self.results_json = results_json
self.specifiers = specifiers or set(
builders.specifiers_for_builder(results_json.builder_name))
self.filter_results_bitmap = self._get_results_filter(results_filter)
def flakes_by_path(self, only_ignore_very_flaky):
"""Sets test expectations to bot results if there are at least two distinct results."""
flakes_by_path = {}
for test_path, entry in self.results_json.walk_results():
flaky_types = self._flaky_types_in_results(entry,
only_ignore_very_flaky)
if len(flaky_types) <= 1:
continue
flakes_by_path[test_path] = flaky_types
return flakes_by_path
def unexpected_results_by_path(self):
unexpected_results_by_path = {}
for test_path, entry in self.results_json.walk_results():
# Expectations for this test. No expectation defaults to PASS.
exp_string = entry.get(self.results_json.EXPECTATIONS_KEY,
ResultType.Pass)
# All run-length-encoded results for this test.
results_dict = entry.get(self.results_json.RESULTS_KEY, {})
# Set of distinct results for this test.
result_types = self._all_types_in_results(results_dict)
# Distinct results as non-encoded strings.
results = map(self.results_json.expectation_for_type, result_types)
# Get test expectations
expectations = exp_string.split(' ')
# Unexpected results will become additional expectations
additional_expectations = [
res for res in results if res not in expectations
]
if not additional_expectations:
continue
# Get typ expectation result tags
unexpected_results_by_path[test_path] = set(
expectations + additional_expectations)
return unexpected_results_by_path
def all_results_by_path(self):
"""Returns all seen result types for each test.
Returns a dictionary from each test path that has a result to a list of distinct, sorted result
strings. For example, if the test results are as follows:
a.html IMAGE IMAGE PASS PASS PASS TIMEOUT PASS TEXT
b.html PASS PASS PASS PASS PASS PASS PASS PASS
c.html
This method will return:
{
'a.html': ['IMAGE', 'TEXT', 'TIMEOUT', 'PASS'],
'b.html': ['PASS'],
}
"""
results_by_path = {}
for test_path, entry in self.results_json.walk_results():
results_dict = entry.get(self.results_json.RESULTS_KEY, {})
result_types = self._all_types_in_results(results_dict)
if not result_types:
continue
# Distinct results as non-encoded strings.
result_strings = map(self.results_json.expectation_for_type,
result_types)
results_by_path[test_path] = sorted(result_strings)
return results_by_path
def expectation_lines(self, only_ignore_very_flaky):
lines = []
for test_path, entry in self.results_json.walk_results():
flaky_types = self._flaky_types_in_results(entry,
only_ignore_very_flaky)
if len(flaky_types) > 1:
line = self._line_from_test_and_flaky_types(
test_path, flaky_types)
lines.append(line)
return lines
def _get_results_filter(self, results_filter):
if results_filter:
filter_builds = results_filter.get_build_numbers_to_ignore()
return [build not in filter_builds for build in self.results_json.build_numbers()]
else:
return None
def _line_from_test_and_flaky_types(self, test_name, flaky_types):
return Expectation(
tags=self.specifiers, test=test_name, results=flaky_types)
def _all_types_in_results(self, run_length_encoded_results):
results = set()
result_index = 0
for result_item in run_length_encoded_results:
count, result_types = self.results_json.occurances_and_type_from_result_item(
result_item)
if (not self.filter_results_bitmap or
any(self.filter_results_bitmap[result_index : result_index + count])):
for result_type in result_types:
if result_type not in self.RESULT_TYPES_TO_IGNORE:
results.add(result_type)
result_index += count
return results
def _flaky_types_in_results(self, results_entry, only_ignore_very_flaky):
flaky_results = set()
# Always include pass as an expected result. Passes will never turn the bot red.
# This fixes cases where the expectations have an implicit Pass, e.g. [ Slow ].
latest_expectations = [ResultType.Pass]
if self.results_json.EXPECTATIONS_KEY in results_entry:
expectations_list = results_entry[self.results_json.
EXPECTATIONS_KEY].split(' ')
latest_expectations.extend(expectations_list)
for result_item in results_entry[self.results_json.RESULTS_KEY]:
_, result_types_str = self.results_json.occurances_and_type_from_result_item(
result_item)
result_types = []
for result_type in result_types_str:
# TODO(ojan): Remove this if-statement once crbug.com/514378 is fixed.
if result_type not in self.NON_RESULT_TYPES:
result_types.append(
self.results_json.expectation_for_type(result_type))
# It didn't flake if it didn't retry.
if len(result_types) <= 1:
continue
# If the test ran as expected after only one retry, it's not very flaky.
# It's only very flaky if it failed the first run and the first retry
# and then ran as expected in one of the subsequent retries.
# If there are only two entries, then that means it failed on the first
# try and ran as expected on the second because otherwise we'd have
# a third entry from the next try.
if only_ignore_very_flaky and len(result_types) == 2:
continue
has_unexpected_results = False
for result_type in result_types:
# TODO(ojan): We really should be grabbing the expected results from the time
# of the run instead of looking at the latest expected results. That's a lot
# more complicated though. So far we've been looking at the aggregated
# results_small.json off test_results.appspot, which has all the information
# for the last 100 runs. In order to do this, we'd need to look at the
# individual runs' full_results.json, which would be slow and more complicated.
# The only thing we lose by not fixing this is that a test that was flaky
# and got fixed will still get printed out until 100 runs have passed.
if result_type not in latest_expectations:
has_unexpected_results = True
break
if has_unexpected_results:
flaky_results = flaky_results.union(set(result_types))
return flaky_results