yymm/checker.py

## checker.py
import os
from chardet.universaldetector import UniversalDetector


def check_encode(file_path):
    detector = UniversalDetector()
    with open(file_path, mode='rb') as f:
        for binary in f:
            detector.feed(binary)
            if detector.done:
                break
    detector.close()
    return detector.result['encoding'], detector.result['confidence']


def check_newline(file_path):
    crlf = 0
    lf = 0
    lines = 0
    with open(file_path, mode='rb') as f:
        for binary in f:
            lines += 1
            if binary[-2:] == b'\r\n':
                crlf += 1
            elif binary[-1:] == b'\n':
                lf += 1
    if lines == 0:
        return 'LF' # empty file jidge LF
    if lines == crlf or lines-1 == crlf:
        return 'CR+LF'
    elif lines == lf or lines-1 == lf:
        return 'LF'
    return 'Mixed(CR+LF & LF)'


def check_all_files(files, encode, newline, threshold=0.95):
    exit_code = 0
    for f in files:
        if os.path.isdir(f):
            continue # directory
        enc, con = check_encode(f)
        if enc == None:
            continue # binary file
        code = check_newline(f)
        message = ''
        if enc != encode and enc != 'ascii':
            message = ' [not ' + encode + '] ' + enc
        if con < threshold:
            message = ' [low confidence] ' + str(con)
        if code != newline:
            mesage = ' [not ' + newline + '] ' + code
        if len(message) != 0:
            print(f, message)
            exit_code = 1
    return exit_code

## converter.py
def convert_newline(file_path, lf=True):
    """convert newline
    lf=True => CR+LF to LF (default)
    lf=False => LF to CR+LF
    """
    data = b''
    with open(file_path, 'rb') as f:
        if lf:
            data = f.read().replace(b'\r', b'')
        else:
            data = f.read().replace(b'\n', b'\r\n')
    with open(file_path, 'wb') as f:
        f.write(data)


def convert_all_files(files, origin, target):
    for f in files:
        if os.path.isdir(f):
            continue # directory
        data = ''
        with open(f, 'rb') as fobj:
            data = fobj.read()
        o = data.decode(origin)
        t = o.encode(target)
        with open(f, 'wb') as fobj:
            fobj.write(t)
	import os
	from chardet.universaldetector import UniversalDetector


	def check_encode(file_path):
	detector = UniversalDetector()
	with open(file_path, mode='rb') as f:
	for binary in f:
	detector.feed(binary)
	if detector.done:
	break
	detector.close()
	return detector.result['encoding'], detector.result['confidence']


	def check_newline(file_path):
	crlf = 0
	lf = 0
	lines = 0
	with open(file_path, mode='rb') as f:
	for binary in f:
	lines += 1
	if binary[-2:] == b'\r\n':
	crlf += 1
	elif binary[-1:] == b'\n':
	lf += 1
	if lines == 0:
	return 'LF' # empty file jidge LF
	if lines == crlf or lines-1 == crlf:
	return 'CR+LF'
	elif lines == lf or lines-1 == lf:
	return 'LF'
	return 'Mixed(CR+LF & LF)'


	def check_all_files(files, encode, newline, threshold=0.95):
	exit_code = 0
	for f in files:
	if os.path.isdir(f):
	continue # directory
	enc, con = check_encode(f)
	if enc == None:
	continue # binary file
	code = check_newline(f)
	message = ''
	if enc != encode and enc != 'ascii':
	message = ' [not ' + encode + '] ' + enc
	if con < threshold:
	message = ' [low confidence] ' + str(con)
	if code != newline:
	mesage = ' [not ' + newline + '] ' + code
	if len(message) != 0:
	print(f, message)
	exit_code = 1
	return exit_code
	def convert_newline(file_path, lf=True):
	"""convert newline
	lf=True => CR+LF to LF (default)
	lf=False => LF to CR+LF
	"""
	data = b''
	with open(file_path, 'rb') as f:
	if lf:
	data = f.read().replace(b'\r', b'')
	else:
	data = f.read().replace(b'\n', b'\r\n')
	with open(file_path, 'wb') as f:
	f.write(data)


	def convert_all_files(files, origin, target):
	for f in files:
	if os.path.isdir(f):
	continue # directory
	data = ''
	with open(f, 'rb') as fobj:
	data = fobj.read()
	o = data.decode(origin)
	t = o.encode(target)
	with open(f, 'wb') as fobj:
	fobj.write(t)