diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 1aba741a..147dd45b 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -191,6 +191,15 @@ # for backward compatibility files: '' minimum_pre_commit_version: 0.15.0 +- id: mixed-line-ending + name: Mixed line ending + description: Replaces or checks mixed line ending + entry: mixed-line-ending + language: python + types: [text] + # for backward compatibility + files: '' + minimum_pre_commit_version: 0.15.0 - id: name-tests-test name: Tests should end in _test.py description: This verifies that test files are named correctly diff --git a/README.md b/README.md index 55b49ff1..b42a9821 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,11 @@ Add this to your `.pre-commit-config.yaml` - To remove the coding pragma pass `--remove` (useful in a python3-only codebase) - `flake8` - Run flake8 on your python files. - `forbid-new-submodules` - Prevent addition of new git submodules. +- `mixed-line-ending` - Replaces or checks mixed line ending. + - `--fix={auto,crlf,lf,no}` + - `auto` - Replaces automatically the most frequent line ending. This is the default argument. + - `crlf`, `lf` - Forces to replace line ending by respectively CRLF and LF. + - `no` - Checks if there is any mixed line ending without modifying any file. - `name-tests-test` - Assert that files in tests/ end in `_test.py`. - Use `args: ['--django']` to match `test*.py` instead. - `no-commit-to-branch` - Protect specific branches from direct checkins. diff --git a/hooks.yaml b/hooks.yaml index 5278bf5e..59cc320f 100644 --- a/hooks.yaml +++ b/hooks.yaml @@ -130,6 +130,12 @@ entry: upgrade-your-pre-commit-version files: '' minimum_pre_commit_version: 0.15.0 +- id: mixed-line-ending + language: system + name: upgrade-your-pre-commit-version + entry: upgrade-your-pre-commit-version + files: '' + minimum_pre_commit_version: 0.15.0 - id: name-tests-test language: system name: upgrade-your-pre-commit-version diff --git a/pre_commit_hooks/mixed_line_ending.py b/pre_commit_hooks/mixed_line_ending.py new file mode 100644 index 00000000..76512a77 --- /dev/null +++ b/pre_commit_hooks/mixed_line_ending.py @@ -0,0 +1,212 @@ +import argparse +import re +import sys + +from enum import Enum + + +class LineEnding(Enum): + CR = b'\r', 'cr', re.compile(b'\r(?!\n)', re.DOTALL) + CRLF = b'\r\n', 'crlf', re.compile(b'\r\n', re.DOTALL) + LF = b'\n', 'lf', re.compile(b'(? 0 + + mixed |= le_found_previously and le_found_cur + le_found_previously |= le_found_cur + + if le_count == max_le_count: + most_le = None + elif le_count > max_le_count: + max_le_count = le_count + most_le = le + + if not mixed: + return MixedLineDetection.NOT_MIXED + + for mld in MixedLineDetection: + if ( + mld.line_ending_enum is not None and + mld.line_ending_enum == most_le + ): + return mld + + return MixedLineDetection.UNKNOWN + + +def _process_no_fix(filenames): + print('Checking if the files have mixed line ending.') + + mle_filenames = [] + for filename in filenames: + detect_result = _detect_line_ending(filename) + + if detect_result.mle_found: + mle_filenames.append(filename) + + mle_found = len(mle_filenames) > 0 + + if mle_found: + print( + 'The following files have mixed line endings:\n\t%s', + '\n\t'.join(mle_filenames), + ) + + return 1 if mle_found else 0 + + +def _process_fix_auto(filenames): + mle_found = False + + for filename in filenames: + detect_result = _detect_line_ending(filename) + + if detect_result == MixedLineDetection.NOT_MIXED: + print('The file %s has no mixed line ending', filename) + elif detect_result == MixedLineDetection.UNKNOWN: + print( + 'Could not define most frequent line ending in ' + 'file %s. File skiped.', filename, + ) + + mle_found = True + else: + le_enum = detect_result.line_ending_enum + + print( + 'The file %s has mixed line ending with a ' + 'majority of %s. Converting...', filename, le_enum.str_print, + ) + + _convert_line_ending(filename, le_enum.string) + mle_found = True + + print( + 'The file %s has been converted to %s line ending.', + filename, le_enum.str_print, + ) + + return 1 if mle_found else 0 + + +def _process_fix_force(filenames, line_ending_enum): + for filename in filenames: + _convert_line_ending(filename, line_ending_enum.string) + + print( + 'The file %s has been forced to %s line ending.', + filename, line_ending_enum.str_print, + ) + + return 1 + + +def _convert_line_ending(filename, line_ending): + with open(filename, 'rb+') as f: + bufin = f.read() + + # convert line ending + bufout = ANY_LINE_ENDING_PATTERN.sub(line_ending, bufin) + + # write the result in the file replacing the existing content + f.seek(0) + f.write(bufout) + f.truncate() + + +if __name__ == '__main__': + sys.exit(mixed_line_ending()) diff --git a/setup.py b/setup.py index 90ef3c62..c0fa9692 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,7 @@ 'simplejson', 'six', ], + extras_require={':python_version=="2.7"': ['enum34']}, entry_points={ 'console_scripts': [ 'autopep8-wrapper = pre_commit_hooks.autopep8_wrapper:main', @@ -53,6 +54,7 @@ 'file-contents-sorter = pre_commit_hooks.file_contents_sorter:main', 'fix-encoding-pragma = pre_commit_hooks.fix_encoding_pragma:main', 'forbid-new-submodules = pre_commit_hooks.forbid_new_submodules:main', + 'mixed-line-ending = pre_commit_hooks.mixed_line_ending:mixed_line_ending', 'name-tests-test = pre_commit_hooks.tests_should_end_in_test:validate_files', 'no-commit-to-branch = pre_commit_hooks.no_commit_to_branch:main', 'pretty-format-json = pre_commit_hooks.pretty_format_json:pretty_format_json', diff --git a/testing/resources/mixed_line_ending.txt b/testing/resources/mixed_line_ending.txt new file mode 100644 index 00000000..a4d8dc02 --- /dev/null +++ b/testing/resources/mixed_line_ending.txt @@ -0,0 +1,11 @@ +This line ends with 'LF' +This line ends with 'CRLF' +This line ends with 'LF' +This line ends with 'CRLF' +This line ends with 'LF' +This line ends with 'CRLF' +This line ends with 'LF' +This line ends with 'CRLF' +This line ends with 'LF' +This line ends with 'CRLF' +This line ends with 'LF' diff --git a/tests/mixed_line_ending_test.py b/tests/mixed_line_ending_test.py new file mode 100644 index 00000000..9b49b5e7 --- /dev/null +++ b/tests/mixed_line_ending_test.py @@ -0,0 +1,154 @@ +import pytest + +from pre_commit_hooks.mixed_line_ending import mixed_line_ending + +# Input, expected return value, expected output +TESTS_FIX_AUTO = ( + # only 'LF' + (b'foo\nbar\nbaz\n', 0, b'foo\nbar\nbaz\n'), + # only 'CRLF' + (b'foo\r\nbar\r\nbaz\r\n', 0, b'foo\r\nbar\r\nbaz\r\n'), + # only 'CR' + (b'foo\rbar\rbaz\r', 0, b'foo\rbar\rbaz\r'), + # mixed with majority of 'LF' + (b'foo\r\nbar\nbaz\n', 1, b'foo\nbar\nbaz\n'), + # mixed with majority of 'CRLF' + (b'foo\r\nbar\nbaz\r\n', 1, b'foo\r\nbar\r\nbaz\r\n'), + # mixed with majority of 'CR' + (b'foo\rbar\nbaz\r', 1, b'foo\rbar\rbaz\r'), + # mixed with as much 'LF' as 'CRLF' + (b'foo\r\nbar\nbaz', 1, b'foo\r\nbar\nbaz'), + # mixed with as much 'LF' as 'CR' + (b'foo\rbar\nbaz', 1, b'foo\rbar\nbaz'), + # mixed with as much 'CRLF' as 'CR' + (b'foo\r\nbar\nbaz', 1, b'foo\r\nbar\nbaz'), + # mixed with as much 'CRLF' as 'LF' as 'CR' + (b'foo\r\nbar\nbaz\r', 1, b'foo\r\nbar\nbaz\r'), +) + + +@pytest.mark.parametrize( + ('input_s', 'expected_retval', 'output'), + TESTS_FIX_AUTO, +) +def test_mixed_line_ending_fix_auto(input_s, expected_retval, output, tmpdir): + path = tmpdir.join('file.txt') + path.write(input_s) + ret = mixed_line_ending(('--fix=auto', path.strpath)) + + assert ret == expected_retval + assert path.read_binary() == output + + +# Input, expected return value, expected output +TESTS_NO_FIX = ( + # only 'LF' + (b'foo\nbar\nbaz\n', 0, b'foo\nbar\nbaz\n'), + # only 'CRLF' + (b'foo\r\nbar\r\nbaz\r\n', 0, b'foo\r\nbar\r\nbaz\r\n'), + # only 'CR' + (b'foo\rbar\rbaz\r', 0, b'foo\rbar\rbaz\r'), + # mixed with majority of 'LF' + (b'foo\r\nbar\nbaz\n', 1, b'foo\r\nbar\nbaz\n'), + # mixed with majority of 'CRLF' + (b'foo\r\nbar\nbaz\r\n', 1, b'foo\r\nbar\nbaz\r\n'), + # mixed with majority of 'CR' + (b'foo\rbar\nbaz\r', 1, b'foo\rbar\nbaz\r'), + # mixed with as much 'LF' as 'CR' + (b'foo\rbar\nbaz', 0, b'foo\rbar\nbaz'), + # mixed with as much 'CRLF' as 'CR' + (b'foo\r\nbar\nbaz', 0, b'foo\r\nbar\nbaz'), + # mixed with as much 'CRLF' as 'LF' as 'CR' + (b'foo\r\nbar\nbaz\r', 0, b'foo\r\nbar\nbaz\r'), +) + + +@pytest.mark.parametrize( + ('input_s', 'expected_retval', 'output'), + TESTS_NO_FIX, +) +def test_detect_mixed_line_ending(input_s, expected_retval, output, tmpdir): + path = tmpdir.join('file.txt') + path.write(input_s) + ret = mixed_line_ending(('--fix=no', path.strpath)) + + assert ret == expected_retval + assert path.read_binary() == output + + +# Input, expected return value, expected output +TESTS_FIX_FORCE_LF = ( + # only 'LF' + (b'foo\nbar\nbaz\n', 1, b'foo\nbar\nbaz\n'), + # only 'CRLF' + (b'foo\r\nbar\r\nbaz\r\n', 1, b'foo\nbar\nbaz\n'), + # only 'CR' + (b'foo\rbar\rbaz\r', 1, b'foo\nbar\nbaz\n'), + # mixed with majority of 'LF' + (b'foo\r\nbar\nbaz\n', 1, b'foo\nbar\nbaz\n'), + # mixed with majority of 'CRLF' + (b'foo\r\nbar\nbaz\r\n', 1, b'foo\nbar\nbaz\n'), + # mixed with majority of 'CR' + (b'foo\rbar\nbaz\r', 1, b'foo\nbar\nbaz\n'), + # mixed with as much 'LF' as 'CR' + (b'foo\rbar\nbaz', 1, b'foo\nbar\nbaz'), + # mixed with as much 'CRLF' as 'CR' + (b'foo\r\nbar\nbaz', 1, b'foo\nbar\nbaz'), + # mixed with as much 'CRLF' as 'LF' as 'CR' + (b'foo\r\nbar\nbaz\r', 1, b'foo\nbar\nbaz\n'), +) + + +@pytest.mark.parametrize( + ('input_s', 'expected_retval', 'output'), + TESTS_FIX_FORCE_LF, +) +def test_mixed_line_ending_fix_force_lf( + input_s, expected_retval, output, + tmpdir, +): + path = tmpdir.join('file.txt') + path.write(input_s) + ret = mixed_line_ending(('--fix=lf', path.strpath)) + + assert ret == expected_retval + assert path.read_binary() == output + + +# Input, expected return value, expected output +TESTS_FIX_FORCE_CRLF = ( + # only 'LF' + (b'foo\nbar\nbaz\n', 1, b'foo\r\nbar\r\nbaz\r\n'), + # only 'CRLF' + (b'foo\r\nbar\r\nbaz\r\n', 1, b'foo\r\nbar\r\nbaz\r\n'), + # only 'CR' + (b'foo\rbar\rbaz\r', 1, b'foo\r\nbar\r\nbaz\r\n'), + # mixed with majority of 'LF' + (b'foo\r\nbar\nbaz\n', 1, b'foo\r\nbar\r\nbaz\r\n'), + # mixed with majority of 'CRLF' + (b'foo\r\nbar\nbaz\r\n', 1, b'foo\r\nbar\r\nbaz\r\n'), + # mixed with majority of 'CR' + (b'foo\rbar\nbaz\r', 1, b'foo\r\nbar\r\nbaz\r\n'), + # mixed with as much 'LF' as 'CR' + (b'foo\rbar\nbaz', 1, b'foo\r\nbar\r\nbaz'), + # mixed with as much 'CRLF' as 'CR' + (b'foo\r\nbar\nbaz', 1, b'foo\r\nbar\r\nbaz'), + # mixed with as much 'CRLF' as 'LF' as 'CR' + (b'foo\r\nbar\nbaz\r', 1, b'foo\r\nbar\r\nbaz\r\n'), +) + + +@pytest.mark.parametrize( + ('input_s', 'expected_retval', 'output'), + TESTS_FIX_FORCE_CRLF, +) +def test_mixed_line_ending_fix_force_crlf( + input_s, expected_retval, output, + tmpdir, +): + path = tmpdir.join('file.txt') + path.write(input_s) + ret = mixed_line_ending(('--fix=crlf', path.strpath)) + + assert ret == expected_retval + assert path.read_binary() == output