cq/scripts/helpers/sanitizer_utils.py - manifest_repos/continuous-tests - Git at Google

 """Utils for working with the Clang sanitizers (asan, msan, tsan, ubsan)."""

 _SANITIZER_STRING = {
     'asan': 'AddressSanitizer',
     'lsan': 'LeakSanitizer',
     'msan': 'MemorySanitizer',
     'tsan': 'ThreadSanitizer',
     'ubsan': 'runtime error:'
 }

 _MAX_ERRORS = 1
 _TOO_MANY_ERRORS_MSG = '<Too many errors to print. Check logs for more.>'
 _MAX_LINES_PER_ERROR = 30
 _ERROR_SNIPPED_MSG = '<...snip (error too long; removed {} lines)...>\n'


 def get_sanitizer_to_comments_for(sanitizer):
   """Returns a function that converts |sanitizer| output to review comments."""
   if sanitizer == 'asan':
     return lambda returncode, stdout, stderr: _asan_to_comments(stdout)
   if sanitizer == 'msan':
     return lambda returncode, stdout, stderr: _msan_to_comments(stdout)
   if sanitizer == 'tsan':
     return lambda returncode, stdout, stderr: _tsan_to_comments(stdout)
   if sanitizer == 'ubsan':
     return lambda returncode, stdout, stderr: _ubsan_to_comments(stdout)
   raise ValueError('Unknown sanitizer: [{}]'.format(sanitizer))


 def get_validator_for(sanitizer):
   """Returns a function that validates output for |sanitizer|.

   The returned validator takes: returncode, stdout, stderr and returns
   True if the output should be considered a success of False if the output
   indicates an error of some kind occured.

   Args:
     sanitizer: 'asan', 'msan', 'tsan', or 'ubsan'

   Returns:
     Validator function.

   Raises:
     ValueError: for unknown |sanitizer|.
   """
   if sanitizer == 'asan':
     return _asan_lsan_validator
   if sanitizer == 'msan':
     return _msan_validator
   if sanitizer == 'tsan':
     return _tsan_validator
   if sanitizer == 'ubsan':
     return _ubsan_validator
   raise ValueError('Unknown sanitizer: [{}]'.format(sanitizer))


 def _asan_lsan_validator(returncode, stdout, stderr):
   del stderr  # unused
   asan_error_msg = 'ERROR: {}'.format(_SANITIZER_STRING['asan'])
   lsan_error_msg = 'ERROR: {}'.format(_SANITIZER_STRING['lsan'])
   asan_summary_msg = 'SUMMARY: {}'.format(_SANITIZER_STRING['asan'])
   error_indicator = ((asan_error_msg in stdout or lsan_error_msg in stdout) and
                      (asan_summary_msg in stdout))
   return returncode == 0 or not error_indicator


 def _msan_validator(returncode, stdout, stderr):
   del stderr  # unused
   warning_msg = 'WARNING: {}'.format(_SANITIZER_STRING['msan'])
   summary_msg = 'SUMMARY: {}'.format(_SANITIZER_STRING['msan'])
   error_indicator = (warning_msg in stdout) and (summary_msg in stdout)
   return returncode == 0 or not error_indicator


 def _tsan_validator(returncode, stdout, stderr):
   del stderr  # unused
   warning_msg = 'WARNING: {}'.format(_SANITIZER_STRING['tsan'])
   summary_msg = 'SUMMARY: {}'.format(_SANITIZER_STRING['tsan'])
   error_indicator = (warning_msg in stdout) and (summary_msg in stdout)
   return returncode == 0 or not error_indicator


 def _ubsan_validator(returncode, stdout, stderr):
   del stderr  # unused
   return returncode == 0 or _SANITIZER_STRING['ubsan'] not in stdout


 def _asan_to_comments(sanitizer_output, lsan=True):
   """Convert the output from ASAN into comments for Gerrit.

   Args:
     sanitizer_output: Output from running a binary with ASAN enabled.
     lsan: Set True if asan/lsan were run together, to get comments from both.
         (Default is True)

   Returns:
     A review dict suitable for passing to base_step.add_review
   """
   sanitizer_strings = [_SANITIZER_STRING['asan']]
   if lsan:
     sanitizer_strings.append(_SANITIZER_STRING['lsan'])

   return _sanitizer_to_comments(sanitizer_strings, sanitizer_output)


 def _msan_to_comments(sanitizer_output):
   return _sanitizer_to_comments([_SANITIZER_STRING['msan']], sanitizer_output)


 def _tsan_to_comments(sanitizer_output):
   return _sanitizer_to_comments([_SANITIZER_STRING['tsan']], sanitizer_output)


 def _ubsan_to_comments(sanitizer_output):
   """Convert output from UBSAN into comments for Gerrit.

   UBSAN output is much different than other sanitizers, so it needs to
   be handled specially.

   Args:
     sanitizer_output: Output from running a binary with ubsan enabled.

   Returns:
     A review dict suitable for passing to base_step.add_review
   """
   all_errors = []
   for line in sanitizer_output.splitlines(True):
     if _SANITIZER_STRING['ubsan'] in line:
       all_errors.append(line)
       if len(all_errors) > _MAX_ERRORS:
         break

   if len(all_errors) > _MAX_ERRORS:
     all_errors = all_errors[:_MAX_ERRORS]
     all_errors.append('<Too many errors to print. Check logs for more.>')

   if all_errors:
     return {'review': {'message': '\n\n'.join(all_errors)}}
   return {}


 def _sanitizer_to_comments(sanitizer_strings, sanitizer_output):
   """Converts output from ASAN/LSAN/MSAN/TSAN into comments for Gerrit.

   Aside from UBSAN, all the sanitizers generate output that is in roughly
   the same format, so they can all get parsed/scraped the same way.

   Args:
     sanitizer_strings: List of the identifying strings to search for in the logs
       (e.g. AddressSanitizer, LeakSanitizer, MemorySanitizer, ThreadSanitizer)
     sanitizer_output: Output from running a binary with a sanitizer enabled.

   Returns:
     A review dict suitable for passing to base_step.add_review
   """
   all_errors = []
   current_error = []
   for line in sanitizer_output.splitlines(True):
     if _is_start_of_error(line, sanitizer_strings):
       current_error = [line]
     elif current_error and _is_end_of_error(line, sanitizer_strings):
       current_error.append(line)
       all_errors.append(''.join(_shorten_error_if_necessary(current_error)))
       current_error = []
       if len(all_errors) > _MAX_ERRORS:
         break
     elif current_error:
       current_error.append(line)

   if len(all_errors) > _MAX_ERRORS:
     all_errors = all_errors[:_MAX_ERRORS]
     all_errors.append(_TOO_MANY_ERRORS_MSG)

   if all_errors:
     return {'review': {'message': '\n\n'.join(all_errors)}}
   return {}


 def _is_start_of_error(line, sanitizer_strings):
   warning_patterns = ['WARNING: {}'.format(s) for s in sanitizer_strings]
   error_patterns = ['ERROR: {}'.format(s) for s in sanitizer_strings]
   begin_error_patterns = warning_patterns + error_patterns
   return any(begin_error in line for begin_error in begin_error_patterns)


 def _is_end_of_error(line, sanitizer_strings):
   end_error_patterns = ['SUMMARY: {}'.format(s) for s in sanitizer_strings]
   return any(end_error in line for end_error in end_error_patterns)


 def _shorten_error_if_necessary(error):
   if len(error) <= _MAX_LINES_PER_ERROR:
     return error

   start_of_error = error[:int(_MAX_LINES_PER_ERROR / 2)]
   end_of_error = error[int(-1 * _MAX_LINES_PER_ERROR / 2):]
   num_snipped_lines = len(error) - _MAX_LINES_PER_ERROR
   snipped_msg = [_ERROR_SNIPPED_MSG.format(num_snipped_lines)]
   return start_of_error + snipped_msg + end_of_error
	"""Utils for working with the Clang sanitizers (asan, msan, tsan, ubsan)."""

	_SANITIZER_STRING = {
	'asan': 'AddressSanitizer',
	'lsan': 'LeakSanitizer',
	'msan': 'MemorySanitizer',
	'tsan': 'ThreadSanitizer',
	'ubsan': 'runtime error:'
	}

	_MAX_ERRORS = 1
	_TOO_MANY_ERRORS_MSG = '<Too many errors to print. Check logs for more.>'
	_MAX_LINES_PER_ERROR = 30
	_ERROR_SNIPPED_MSG = '<...snip (error too long; removed {} lines)...>\n'


	def get_sanitizer_to_comments_for(sanitizer):
	"""Returns a function that converts \|sanitizer\| output to review comments."""
	if sanitizer == 'asan':
	return lambda returncode, stdout, stderr: _asan_to_comments(stdout)
	if sanitizer == 'msan':
	return lambda returncode, stdout, stderr: _msan_to_comments(stdout)
	if sanitizer == 'tsan':
	return lambda returncode, stdout, stderr: _tsan_to_comments(stdout)
	if sanitizer == 'ubsan':
	return lambda returncode, stdout, stderr: _ubsan_to_comments(stdout)
	raise ValueError('Unknown sanitizer: [{}]'.format(sanitizer))


	def get_validator_for(sanitizer):
	"""Returns a function that validates output for \|sanitizer\|.

	The returned validator takes: returncode, stdout, stderr and returns
	True if the output should be considered a success of False if the output
	indicates an error of some kind occured.

	Args:
	sanitizer: 'asan', 'msan', 'tsan', or 'ubsan'

	Returns:
	Validator function.

	Raises:
	ValueError: for unknown \|sanitizer\|.
	"""
	if sanitizer == 'asan':
	return _asan_lsan_validator
	if sanitizer == 'msan':
	return _msan_validator
	if sanitizer == 'tsan':
	return _tsan_validator
	if sanitizer == 'ubsan':
	return _ubsan_validator
	raise ValueError('Unknown sanitizer: [{}]'.format(sanitizer))


	def _asan_lsan_validator(returncode, stdout, stderr):
	del stderr # unused
	asan_error_msg = 'ERROR: {}'.format(_SANITIZER_STRING['asan'])
	lsan_error_msg = 'ERROR: {}'.format(_SANITIZER_STRING['lsan'])
	asan_summary_msg = 'SUMMARY: {}'.format(_SANITIZER_STRING['asan'])
	error_indicator = ((asan_error_msg in stdout or lsan_error_msg in stdout) and
	(asan_summary_msg in stdout))
	return returncode == 0 or not error_indicator


	def _msan_validator(returncode, stdout, stderr):
	del stderr # unused
	warning_msg = 'WARNING: {}'.format(_SANITIZER_STRING['msan'])
	summary_msg = 'SUMMARY: {}'.format(_SANITIZER_STRING['msan'])
	error_indicator = (warning_msg in stdout) and (summary_msg in stdout)
	return returncode == 0 or not error_indicator


	def _tsan_validator(returncode, stdout, stderr):
	del stderr # unused
	warning_msg = 'WARNING: {}'.format(_SANITIZER_STRING['tsan'])
	summary_msg = 'SUMMARY: {}'.format(_SANITIZER_STRING['tsan'])
	error_indicator = (warning_msg in stdout) and (summary_msg in stdout)
	return returncode == 0 or not error_indicator


	def _ubsan_validator(returncode, stdout, stderr):
	del stderr # unused
	return returncode == 0 or _SANITIZER_STRING['ubsan'] not in stdout


	def _asan_to_comments(sanitizer_output, lsan=True):
	"""Convert the output from ASAN into comments for Gerrit.

	Args:
	sanitizer_output: Output from running a binary with ASAN enabled.
	lsan: Set True if asan/lsan were run together, to get comments from both.
	(Default is True)

	Returns:
	A review dict suitable for passing to base_step.add_review
	"""
	sanitizer_strings = [_SANITIZER_STRING['asan']]
	if lsan:
	sanitizer_strings.append(_SANITIZER_STRING['lsan'])

	return _sanitizer_to_comments(sanitizer_strings, sanitizer_output)


	def _msan_to_comments(sanitizer_output):
	return _sanitizer_to_comments([_SANITIZER_STRING['msan']], sanitizer_output)


	def _tsan_to_comments(sanitizer_output):
	return _sanitizer_to_comments([_SANITIZER_STRING['tsan']], sanitizer_output)


	def _ubsan_to_comments(sanitizer_output):
	"""Convert output from UBSAN into comments for Gerrit.

	UBSAN output is much different than other sanitizers, so it needs to
	be handled specially.

	Args:
	sanitizer_output: Output from running a binary with ubsan enabled.

	Returns:
	A review dict suitable for passing to base_step.add_review
	"""
	all_errors = []
	for line in sanitizer_output.splitlines(True):
	if _SANITIZER_STRING['ubsan'] in line:
	all_errors.append(line)
	if len(all_errors) > _MAX_ERRORS:
	break

	if len(all_errors) > _MAX_ERRORS:
	all_errors = all_errors[:_MAX_ERRORS]
	all_errors.append('<Too many errors to print. Check logs for more.>')

	if all_errors:
	return {'review': {'message': '\n\n'.join(all_errors)}}
	return {}


	def _sanitizer_to_comments(sanitizer_strings, sanitizer_output):
	"""Converts output from ASAN/LSAN/MSAN/TSAN into comments for Gerrit.

	Aside from UBSAN, all the sanitizers generate output that is in roughly
	the same format, so they can all get parsed/scraped the same way.

	Args:
	sanitizer_strings: List of the identifying strings to search for in the logs
	(e.g. AddressSanitizer, LeakSanitizer, MemorySanitizer, ThreadSanitizer)
	sanitizer_output: Output from running a binary with a sanitizer enabled.

	Returns:
	A review dict suitable for passing to base_step.add_review
	"""
	all_errors = []
	current_error = []
	for line in sanitizer_output.splitlines(True):
	if _is_start_of_error(line, sanitizer_strings):
	current_error = [line]
	elif current_error and _is_end_of_error(line, sanitizer_strings):
	current_error.append(line)
	all_errors.append(''.join(_shorten_error_if_necessary(current_error)))
	current_error = []
	if len(all_errors) > _MAX_ERRORS:
	break
	elif current_error:
	current_error.append(line)

	if len(all_errors) > _MAX_ERRORS:
	all_errors = all_errors[:_MAX_ERRORS]
	all_errors.append(_TOO_MANY_ERRORS_MSG)

	if all_errors:
	return {'review': {'message': '\n\n'.join(all_errors)}}
	return {}


	def _is_start_of_error(line, sanitizer_strings):
	warning_patterns = ['WARNING: {}'.format(s) for s in sanitizer_strings]
	error_patterns = ['ERROR: {}'.format(s) for s in sanitizer_strings]
	begin_error_patterns = warning_patterns + error_patterns
	return any(begin_error in line for begin_error in begin_error_patterns)


	def _is_end_of_error(line, sanitizer_strings):
	end_error_patterns = ['SUMMARY: {}'.format(s) for s in sanitizer_strings]
	return any(end_error in line for end_error in end_error_patterns)


	def _shorten_error_if_necessary(error):
	if len(error) <= _MAX_LINES_PER_ERROR:
	return error

	start_of_error = error[:int(_MAX_LINES_PER_ERROR / 2)]
	end_of_error = error[int(-1 * _MAX_LINES_PER_ERROR / 2):]
	num_snipped_lines = len(error) - _MAX_LINES_PER_ERROR
	snipped_msg = [_ERROR_SNIPPED_MSG.format(num_snipped_lines)]
	return start_of_error + snipped_msg + end_of_error