chromium/src/third_party/blink/renderer/modules/sanitizer_api/build_corpus.py - manifest_repos/chromium_src - Git at Google

 #!/usr/bin/env python
 # Copyright 2021 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import argparse
 import os
 import re


 def basename(path):
     return os.path.splitext(os.path.basename(path))[0]


 def char_escape(c):
     # Escaping suitable for Protobuf text format, which is C-like.
     if c in "'\"\\":
         return "\\" + c
     elif c == "\n":
         return "\\n"
     else:
         return c


 def main():
     parser = argparse.ArgumentParser(
         description="Generate sanitizer_api_fuzzer seed corpus.")
     parser.add_argument("--outdir", required=True)
     parser.add_argument("--dictionary")
     parser.add_argument("inputs", nargs="+")
     args = parser.parse_args()

     # For simplicity, read all inputs into dictionary.
     inputs = {}
     for input_file in args.inputs:
         with open(input_file, "r") as f:
             inputs[input_file] = f.read()

     # Use file extensions to distinguish HTML from config inputs.
     htmls = filter(lambda name: name.endswith(".html"), inputs)
     configs = filter(lambda name: name.endswith(".txt"), inputs)

     # Generate each combo of html + config, and write it into --outdir.
     for html in htmls:
         for config in configs:
             name = "%s/%s-%s.txt" % (args.outdir, basename(html),
                                      basename(config))
             escaped_html = "".join(map(char_escape, inputs[html]))
             with open(name, "w") as f:
                 f.write("html_string: \"%s\"\n%s\n" %
                         (escaped_html, inputs[config]))

     # Write a "dictionary" file with the element and attribute names.
     # Extract element and attribute names with simple regexps. It doesn't matter
     # if these will always match correctly, as long as the dictionary is mostly
     # sensible.
     if args.dictionary:
         seed_dictionary = set()
         for html in htmls:
             seed_dictionary.update(re.findall(r'(?<=<)\w+\b', inputs[html]))
             seed_dictionary.update(re.findall(r'\b\w+(?==)', inputs[html]))
         with open(args.dictionary, "w") as f:
             for word in seed_dictionary:
                 f.write("\"%s\"\n" % word)


 if __name__ == '__main__':
     main()
	#!/usr/bin/env python
	# Copyright 2021 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import argparse
	import os
	import re


	def basename(path):
	return os.path.splitext(os.path.basename(path))[0]


	def char_escape(c):
	# Escaping suitable for Protobuf text format, which is C-like.
	if c in "'\"\\":
	return "\\" + c
	elif c == "\n":
	return "\\n"
	else:
	return c


	def main():
	parser = argparse.ArgumentParser(
	description="Generate sanitizer_api_fuzzer seed corpus.")
	parser.add_argument("--outdir", required=True)
	parser.add_argument("--dictionary")
	parser.add_argument("inputs", nargs="+")
	args = parser.parse_args()

	# For simplicity, read all inputs into dictionary.
	inputs = {}
	for input_file in args.inputs:
	with open(input_file, "r") as f:
	inputs[input_file] = f.read()

	# Use file extensions to distinguish HTML from config inputs.
	htmls = filter(lambda name: name.endswith(".html"), inputs)
	configs = filter(lambda name: name.endswith(".txt"), inputs)

	# Generate each combo of html + config, and write it into --outdir.
	for html in htmls:
	for config in configs:
	name = "%s/%s-%s.txt" % (args.outdir, basename(html),
	basename(config))
	escaped_html = "".join(map(char_escape, inputs[html]))
	with open(name, "w") as f:
	f.write("html_string: \"%s\"\n%s\n" %
	(escaped_html, inputs[config]))

	# Write a "dictionary" file with the element and attribute names.
	# Extract element and attribute names with simple regexps. It doesn't matter
	# if these will always match correctly, as long as the dictionary is mostly
	# sensible.
	if args.dictionary:
	seed_dictionary = set()
	for html in htmls:
	seed_dictionary.update(re.findall(r'(?<=<)\w+\b', inputs[html]))
	seed_dictionary.update(re.findall(r'\b\w+(?==)', inputs[html]))
	with open(args.dictionary, "w") as f:
	for word in seed_dictionary:
	f.write("\"%s\"\n" % word)


	if __name__ == '__main__':
	main()