Skip to content

Instantly share code, notes, and snippets.

@ZiTAL
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ZiTAL/1b011032616a5b3bf7e6 to your computer and use it in GitHub Desktop.
Save ZiTAL/1b011032616a5b3bf7e6 to your computer and use it in GitHub Desktop.
Python: Remove BOM from UTF-8 files
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys, os, re, binascii
if(len(sys.argv)<2):
print 'Errorea: Karpeta argumentu bidez sartu'
sys.exit()
else:
dir = sys.argv[1]
# fitxategien zerrenda
file_list = []
def scandir(dir):
if os.path.isdir(dir):
dir_list = os.listdir(dir)
for f in dir_list:
path = os.path.join(dir, f)
if os.path.isfile(path):
file_list.append(path)
elif os.path.isdir(path):
scandir(path)
# fitxategien zerrenda hartu
scandir(dir)
print "Fitxategi honeetatik BOM-a kentzen:"
for f in file_list:
# 1go lerroa bakarrik irakurri eta hexadecimal formatura pasa
hex = binascii.hexlify(open(f, 'r').readline())
# bom-a badauka ikusi
if(re.match("^efbbbf", hex)):
print f
file = open(f, 'r')
# fitxategi guztia hexadecimal formatura pasa
hex = binascii.hexlify(file.read())
# bom-a kendu
hex = re.sub("^efbbbf", '', hex)
# hex -> testura
deco = hex.decode("hex")
file = open(f, 'w')
file.write(deco)
file.close()
print "Eginda"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment