381 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			381 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|  | #!/usr/bin/env python3 | ||
|  | 
 | ||
|  | # Copyright The Mbed TLS Contributors | ||
|  | # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later | ||
|  | # | ||
|  | # This file is provided under the Apache License 2.0, or the | ||
|  | # GNU General Public License v2.0 or later. | ||
|  | # | ||
|  | # ********** | ||
|  | # Apache License 2.0: | ||
|  | # | ||
|  | # Licensed under the Apache License, Version 2.0 (the "License"); you may | ||
|  | # not use this file except in compliance with the License. | ||
|  | # You may obtain a copy of the License at | ||
|  | # | ||
|  | # http://www.apache.org/licenses/LICENSE-2.0 | ||
|  | # | ||
|  | # Unless required by applicable law or agreed to in writing, software | ||
|  | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
|  | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
|  | # See the License for the specific language governing permissions and | ||
|  | # limitations under the License. | ||
|  | # | ||
|  | # ********** | ||
|  | # | ||
|  | # ********** | ||
|  | # GNU General Public License v2.0 or later: | ||
|  | # | ||
|  | # This program is free software; you can redistribute it and/or modify | ||
|  | # it under the terms of the GNU General Public License as published by | ||
|  | # the Free Software Foundation; either version 2 of the License, or | ||
|  | # (at your option) any later version. | ||
|  | # | ||
|  | # This program is distributed in the hope that it will be useful, | ||
|  | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
|  | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||
|  | # GNU General Public License for more details. | ||
|  | # | ||
|  | # You should have received a copy of the GNU General Public License along | ||
|  | # with this program; if not, write to the Free Software Foundation, Inc., | ||
|  | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
|  | # | ||
|  | # ********** | ||
|  | 
 | ||
|  | """
 | ||
|  | This script checks the current state of the source code for minor issues, | ||
|  | including incorrect file permissions, presence of tabs, non-Unix line endings, | ||
|  | trailing whitespace, and presence of UTF-8 BOM. | ||
|  | Note: requires python 3, must be run from Mbed TLS root. | ||
|  | """
 | ||
|  | 
 | ||
|  | import os | ||
|  | import argparse | ||
|  | import logging | ||
|  | import codecs | ||
|  | import re | ||
|  | import subprocess | ||
|  | import sys | ||
|  | 
 | ||
|  | 
 | ||
|  | class FileIssueTracker: | ||
|  |     """Base class for file-wide issue tracking.
 | ||
|  | 
 | ||
|  |     To implement a checker that processes a file as a whole, inherit from | ||
|  |     this class and implement `check_file_for_issue` and define ``heading``. | ||
|  | 
 | ||
|  |     ``suffix_exemptions``: files whose name ends with a string in this set | ||
|  |      will not be checked. | ||
|  | 
 | ||
|  |     ``path_exemptions``: files whose path (relative to the root of the source | ||
|  |     tree) matches this regular expression will not be checked. This can be | ||
|  |     ``None`` to match no path. Paths are normalized and converted to ``/`` | ||
|  |     separators before matching. | ||
|  | 
 | ||
|  |     ``heading``: human-readable description of the issue | ||
|  |     """
 | ||
|  | 
 | ||
|  |     suffix_exemptions = frozenset() | ||
|  |     path_exemptions = None | ||
|  |     # heading must be defined in derived classes. | ||
|  |     # pylint: disable=no-member | ||
|  | 
 | ||
|  |     def __init__(self): | ||
|  |         self.files_with_issues = {} | ||
|  | 
 | ||
|  |     @staticmethod | ||
|  |     def normalize_path(filepath): | ||
|  |         """Normalize ``filepath`` with / as the directory separator.""" | ||
|  |         filepath = os.path.normpath(filepath) | ||
|  |         # On Windows, we may have backslashes to separate directories. | ||
|  |         # We need slashes to match exemption lists. | ||
|  |         seps = os.path.sep | ||
|  |         if os.path.altsep is not None: | ||
|  |             seps += os.path.altsep | ||
|  |         return '/'.join(filepath.split(seps)) | ||
|  | 
 | ||
|  |     def should_check_file(self, filepath): | ||
|  |         """Whether the given file name should be checked.
 | ||
|  | 
 | ||
|  |         Files whose name ends with a string listed in ``self.suffix_exemptions`` | ||
|  |         or whose path matches ``self.path_exemptions`` will not be checked. | ||
|  |         """
 | ||
|  |         for files_exemption in self.suffix_exemptions: | ||
|  |             if filepath.endswith(files_exemption): | ||
|  |                 return False | ||
|  |         if self.path_exemptions and \ | ||
|  |            re.match(self.path_exemptions, self.normalize_path(filepath)): | ||
|  |             return False | ||
|  |         return True | ||
|  | 
 | ||
|  |     def check_file_for_issue(self, filepath): | ||
|  |         """Check the specified file for the issue that this class is for.
 | ||
|  | 
 | ||
|  |         Subclasses must implement this method. | ||
|  |         """
 | ||
|  |         raise NotImplementedError | ||
|  | 
 | ||
|  |     def record_issue(self, filepath, line_number): | ||
|  |         """Record that an issue was found at the specified location.""" | ||
|  |         if filepath not in self.files_with_issues.keys(): | ||
|  |             self.files_with_issues[filepath] = [] | ||
|  |         self.files_with_issues[filepath].append(line_number) | ||
|  | 
 | ||
|  |     def output_file_issues(self, logger): | ||
|  |         """Log all the locations where the issue was found.""" | ||
|  |         if self.files_with_issues.values(): | ||
|  |             logger.info(self.heading) | ||
|  |             for filename, lines in sorted(self.files_with_issues.items()): | ||
|  |                 if lines: | ||
|  |                     logger.info("{}: {}".format( | ||
|  |                         filename, ", ".join(str(x) for x in lines) | ||
|  |                     )) | ||
|  |                 else: | ||
|  |                     logger.info(filename) | ||
|  |             logger.info("") | ||
|  | 
 | ||
|  | BINARY_FILE_PATH_RE_LIST = [ | ||
|  |     r'docs/.*\.pdf\Z', | ||
|  |     r'programs/fuzz/corpuses/[^.]+\Z', | ||
|  |     r'tests/data_files/[^.]+\Z', | ||
|  |     r'tests/data_files/.*\.(crt|csr|db|der|key|pubkey)\Z', | ||
|  |     r'tests/data_files/.*\.req\.[^/]+\Z', | ||
|  |     r'tests/data_files/.*malformed[^/]+\Z', | ||
|  |     r'tests/data_files/format_pkcs12\.fmt\Z', | ||
|  | ] | ||
|  | BINARY_FILE_PATH_RE = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST)) | ||
|  | 
 | ||
|  | class LineIssueTracker(FileIssueTracker): | ||
|  |     """Base class for line-by-line issue tracking.
 | ||
|  | 
 | ||
|  |     To implement a checker that processes files line by line, inherit from | ||
|  |     this class and implement `line_with_issue`. | ||
|  |     """
 | ||
|  | 
 | ||
|  |     # Exclude binary files. | ||
|  |     path_exemptions = BINARY_FILE_PATH_RE | ||
|  | 
 | ||
|  |     def issue_with_line(self, line, filepath): | ||
|  |         """Check the specified line for the issue that this class is for.
 | ||
|  | 
 | ||
|  |         Subclasses must implement this method. | ||
|  |         """
 | ||
|  |         raise NotImplementedError | ||
|  | 
 | ||
|  |     def check_file_line(self, filepath, line, line_number): | ||
|  |         if self.issue_with_line(line, filepath): | ||
|  |             self.record_issue(filepath, line_number) | ||
|  | 
 | ||
|  |     def check_file_for_issue(self, filepath): | ||
|  |         """Check the lines of the specified file.
 | ||
|  | 
 | ||
|  |         Subclasses must implement the ``issue_with_line`` method. | ||
|  |         """
 | ||
|  |         with open(filepath, "rb") as f: | ||
|  |             for i, line in enumerate(iter(f.readline, b"")): | ||
|  |                 self.check_file_line(filepath, line, i + 1) | ||
|  | 
 | ||
|  | 
 | ||
|  | def is_windows_file(filepath): | ||
|  |     _root, ext = os.path.splitext(filepath) | ||
|  |     return ext in ('.bat', '.dsp', '.dsw', '.sln', '.vcxproj') | ||
|  | 
 | ||
|  | 
 | ||
|  | class PermissionIssueTracker(FileIssueTracker): | ||
|  |     """Track files with bad permissions.
 | ||
|  | 
 | ||
|  |     Files that are not executable scripts must not be executable."""
 | ||
|  | 
 | ||
|  |     heading = "Incorrect permissions:" | ||
|  | 
 | ||
|  |     def check_file_for_issue(self, filepath): | ||
|  |         is_executable = os.access(filepath, os.X_OK) | ||
|  |         should_be_executable = filepath.endswith((".sh", ".pl", ".py")) | ||
|  |         if is_executable != should_be_executable: | ||
|  |             self.files_with_issues[filepath] = None | ||
|  | 
 | ||
|  | 
 | ||
|  | class EndOfFileNewlineIssueTracker(FileIssueTracker): | ||
|  |     """Track files that end with an incomplete line
 | ||
|  |     (no newline character at the end of the last line)."""
 | ||
|  | 
 | ||
|  |     heading = "Missing newline at end of file:" | ||
|  | 
 | ||
|  |     path_exemptions = BINARY_FILE_PATH_RE | ||
|  | 
 | ||
|  |     def check_file_for_issue(self, filepath): | ||
|  |         with open(filepath, "rb") as f: | ||
|  |             try: | ||
|  |                 f.seek(-1, 2) | ||
|  |             except OSError: | ||
|  |                 # This script only works on regular files. If we can't seek | ||
|  |                 # 1 before the end, it means that this position is before | ||
|  |                 # the beginning of the file, i.e. that the file is empty. | ||
|  |                 return | ||
|  |             if f.read(1) != b"\n": | ||
|  |                 self.files_with_issues[filepath] = None | ||
|  | 
 | ||
|  | 
 | ||
|  | class Utf8BomIssueTracker(FileIssueTracker): | ||
|  |     """Track files that start with a UTF-8 BOM.
 | ||
|  |     Files should be ASCII or UTF-8. Valid UTF-8 does not start with a BOM."""
 | ||
|  | 
 | ||
|  |     heading = "UTF-8 BOM present:" | ||
|  | 
 | ||
|  |     suffix_exemptions = frozenset([".vcxproj", ".sln"]) | ||
|  |     path_exemptions = BINARY_FILE_PATH_RE | ||
|  | 
 | ||
|  |     def check_file_for_issue(self, filepath): | ||
|  |         with open(filepath, "rb") as f: | ||
|  |             if f.read().startswith(codecs.BOM_UTF8): | ||
|  |                 self.files_with_issues[filepath] = None | ||
|  | 
 | ||
|  | 
 | ||
|  | class UnixLineEndingIssueTracker(LineIssueTracker): | ||
|  |     """Track files with non-Unix line endings (i.e. files with CR).""" | ||
|  | 
 | ||
|  |     heading = "Non-Unix line endings:" | ||
|  | 
 | ||
|  |     def should_check_file(self, filepath): | ||
|  |         if not super().should_check_file(filepath): | ||
|  |             return False | ||
|  |         return not is_windows_file(filepath) | ||
|  | 
 | ||
|  |     def issue_with_line(self, line, _filepath): | ||
|  |         return b"\r" in line | ||
|  | 
 | ||
|  | 
 | ||
|  | class WindowsLineEndingIssueTracker(LineIssueTracker): | ||
|  |     """Track files with non-Windows line endings (i.e. CR or LF not in CRLF).""" | ||
|  | 
 | ||
|  |     heading = "Non-Windows line endings:" | ||
|  | 
 | ||
|  |     def should_check_file(self, filepath): | ||
|  |         if not super().should_check_file(filepath): | ||
|  |             return False | ||
|  |         return is_windows_file(filepath) | ||
|  | 
 | ||
|  |     def issue_with_line(self, line, _filepath): | ||
|  |         return not line.endswith(b"\r\n") or b"\r" in line[:-2] | ||
|  | 
 | ||
|  | 
 | ||
|  | class TrailingWhitespaceIssueTracker(LineIssueTracker): | ||
|  |     """Track lines with trailing whitespace.""" | ||
|  | 
 | ||
|  |     heading = "Trailing whitespace:" | ||
|  |     suffix_exemptions = frozenset([".dsp", ".md"]) | ||
|  | 
 | ||
|  |     def issue_with_line(self, line, _filepath): | ||
|  |         return line.rstrip(b"\r\n") != line.rstrip() | ||
|  | 
 | ||
|  | 
 | ||
|  | class TabIssueTracker(LineIssueTracker): | ||
|  |     """Track lines with tabs.""" | ||
|  | 
 | ||
|  |     heading = "Tabs present:" | ||
|  |     suffix_exemptions = frozenset([ | ||
|  |         ".pem", # some openssl dumps have tabs | ||
|  |         ".sln", | ||
|  |         "/Makefile", | ||
|  |         "/generate_visualc_files.pl", | ||
|  |     ]) | ||
|  | 
 | ||
|  |     def issue_with_line(self, line, _filepath): | ||
|  |         return b"\t" in line | ||
|  | 
 | ||
|  | 
 | ||
|  | class MergeArtifactIssueTracker(LineIssueTracker): | ||
|  |     """Track lines with merge artifacts.
 | ||
|  |     These are leftovers from a ``git merge`` that wasn't fully edited.""" | ||
|  | 
 | ||
|  |     heading = "Merge artifact:" | ||
|  | 
 | ||
|  |     def issue_with_line(self, line, _filepath): | ||
|  |         # Detect leftover git conflict markers. | ||
|  |         if line.startswith(b'<<<<<<< ') or line.startswith(b'>>>>>>> '): | ||
|  |             return True | ||
|  |         if line.startswith(b'||||||| '): # from merge.conflictStyle=diff3 | ||
|  |             return True | ||
|  |         if line.rstrip(b'\r\n') == b'=======' and \ | ||
|  |            not _filepath.endswith('.md'): | ||
|  |             return True | ||
|  |         return False | ||
|  | 
 | ||
|  | 
 | ||
|  | class IntegrityChecker: | ||
|  |     """Sanity-check files under the current directory.""" | ||
|  | 
 | ||
|  |     def __init__(self, log_file): | ||
|  |         """Instantiate the sanity checker.
 | ||
|  |         Check files under the current directory. | ||
|  |         Write a report of issues to log_file."""
 | ||
|  |         self.check_repo_path() | ||
|  |         self.logger = None | ||
|  |         self.setup_logger(log_file) | ||
|  |         self.issues_to_check = [ | ||
|  |             PermissionIssueTracker(), | ||
|  |             EndOfFileNewlineIssueTracker(), | ||
|  |             Utf8BomIssueTracker(), | ||
|  |             UnixLineEndingIssueTracker(), | ||
|  |             WindowsLineEndingIssueTracker(), | ||
|  |             TrailingWhitespaceIssueTracker(), | ||
|  |             TabIssueTracker(), | ||
|  |             MergeArtifactIssueTracker(), | ||
|  |         ] | ||
|  | 
 | ||
|  |     @staticmethod | ||
|  |     def check_repo_path(): | ||
|  |         if not all(os.path.isdir(d) for d in ["include", "library", "tests"]): | ||
|  |             raise Exception("Must be run from Mbed TLS root") | ||
|  | 
 | ||
|  |     def setup_logger(self, log_file, level=logging.INFO): | ||
|  |         self.logger = logging.getLogger() | ||
|  |         self.logger.setLevel(level) | ||
|  |         if log_file: | ||
|  |             handler = logging.FileHandler(log_file) | ||
|  |             self.logger.addHandler(handler) | ||
|  |         else: | ||
|  |             console = logging.StreamHandler() | ||
|  |             self.logger.addHandler(console) | ||
|  | 
 | ||
|  |     @staticmethod | ||
|  |     def collect_files(): | ||
|  |         bytes_output = subprocess.check_output(['git', 'ls-files', '-z']) | ||
|  |         bytes_filepaths = bytes_output.split(b'\0')[:-1] | ||
|  |         ascii_filepaths = map(lambda fp: fp.decode('ascii'), bytes_filepaths) | ||
|  |         # Prepend './' to files in the top-level directory so that | ||
|  |         # something like `'/Makefile' in fp` matches in the top-level | ||
|  |         # directory as well as in subdirectories. | ||
|  |         return [fp if os.path.dirname(fp) else os.path.join(os.curdir, fp) | ||
|  |                 for fp in ascii_filepaths] | ||
|  | 
 | ||
|  |     def check_files(self): | ||
|  |         for issue_to_check in self.issues_to_check: | ||
|  |             for filepath in self.collect_files(): | ||
|  |                 if issue_to_check.should_check_file(filepath): | ||
|  |                     issue_to_check.check_file_for_issue(filepath) | ||
|  | 
 | ||
|  |     def output_issues(self): | ||
|  |         integrity_return_code = 0 | ||
|  |         for issue_to_check in self.issues_to_check: | ||
|  |             if issue_to_check.files_with_issues: | ||
|  |                 integrity_return_code = 1 | ||
|  |             issue_to_check.output_file_issues(self.logger) | ||
|  |         return integrity_return_code | ||
|  | 
 | ||
|  | 
 | ||
|  | def run_main(): | ||
|  |     parser = argparse.ArgumentParser(description=__doc__) | ||
|  |     parser.add_argument( | ||
|  |         "-l", "--log_file", type=str, help="path to optional output log", | ||
|  |     ) | ||
|  |     check_args = parser.parse_args() | ||
|  |     integrity_check = IntegrityChecker(check_args.log_file) | ||
|  |     integrity_check.check_files() | ||
|  |     return_code = integrity_check.output_issues() | ||
|  |     sys.exit(return_code) | ||
|  | 
 | ||
|  | 
 | ||
|  | if __name__ == "__main__": | ||
|  |     run_main() |