19dc267e4c
Allow specifying the source and destination files' encodings in the template module * Added output_encoding to the template module, default to utf-8 * Added documentation for the new variables * Leveraged the encoding argument on to_text() and to_bytes() to keep the implementation as simple as possible * Added integration tests with files in utf-8 and windows-1252 encodings, testing all combinations * fix bad smell test by excluding windows-1252 files from the utf8 checks * fix bad smell test by excluding valid files from the smart quote test
40 lines
1.3 KiB
Python
Executable file
40 lines
1.3 KiB
Python
Executable file
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
|
|
def main():
|
|
skip = set([
|
|
'test/sanity/code-smell/%s' % os.path.basename(__file__),
|
|
'docs/docsite/rst/dev_guide/testing/sanity/no-smart-quotes.rst',
|
|
'test/integration/targets/unicode/unicode.yml',
|
|
'test/integration/targets/lookup_properties/lookup-8859-15.ini',
|
|
'test/integration/targets/template/files/encoding_1252_utf-8.expected',
|
|
'test/integration/targets/template/files/encoding_1252_windows-1252.expected',
|
|
'test/integration/targets/template/templates/encoding_1252.j2',
|
|
])
|
|
|
|
for path in sys.argv[1:] or sys.stdin.read().splitlines():
|
|
if path in skip:
|
|
continue
|
|
|
|
with open(path, 'rb') as path_fd:
|
|
for line, text in enumerate(path_fd.readlines()):
|
|
try:
|
|
text = text.decode('utf-8')
|
|
except UnicodeDecodeError as ex:
|
|
print('%s:%d:%d: UnicodeDecodeError: %s' % (path, line + 1, ex.start + 1, ex))
|
|
continue
|
|
|
|
match = re.search(u'([‘’“”])', text)
|
|
|
|
if match:
|
|
print('%s:%d:%d: use ASCII quotes `\'` and `"` instead of Unicode quotes' % (
|
|
path, line + 1, match.start(1) + 1))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|