Skip to content

Instantly share code, notes, and snippets.

@cnsoft
Created May 26, 2014 14:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cnsoft/1bc4d74df57d18f93839 to your computer and use it in GitHub Desktop.
Save cnsoft/1bc4d74df57d18f93839 to your computer and use it in GitHub Desktop.
export some item from one json file to new file.
import json
import codecs
#open the source file
json_file = open('map.txt','r')
data = json_file.read()
datas = json.loads(data.decode("utf-8" ) )
#data1 = json.loads(json_file.read())#.decode('utf-8') )
# data.decode("utf-8-sig"))
#print data
print type(datas)
map_units = datas["units_inst"]
print type(map_units)
targetmap = "map_3"
out_units = []
for unit in map_units:
if( unit["map_name"] == targetmap):
out_units.append(unit)
print len(out_units)
#out put wanted units to template data..
out_dicts = {"units_inst": out_units}
output_file = codecs.open("output_file.json", "w", encoding="utf-8",buffering=0)
json.dump(out_dicts, output_file, indent=2, sort_keys=True, ensure_ascii=False)
print "done"
#confirm file ok.
json_file2 = open("output_file.json",'r')
data = json_file2.read()
datas = json.loads(data.decode("utf-8"))
print len(datas["units_inst"])
@cnsoft
Copy link
Author

cnsoft commented May 26, 2014

This is my implementation to convert any kind of encoding to UTF-8 without BOM and replacing windows enlines by universal format:

def utf8_converter(file_path, universal_endline=True):
'''
Convert any type of file to UTF-8 without BOM
and using universal endline by default.

Parameters
----------
file_path : string, file path.
universal_endline : boolean (True),
                    by default convert endlines to universal format.
'''

# Fix file path
file_path = os.path.realpath(os.path.expanduser(file_path))

# Read from file
file_open = open(file_path)
raw = file_open.read()
file_open.close()

# Decode
raw = raw.decode(chardet.detect(raw)['encoding'])
# Remove windows end line
if universal_endline:
    raw = raw.replace('\r\n', '\n')
# Encode to UTF-8
raw = raw.encode('utf8')
# Remove BOM
if raw.startswith(codecs.BOM_UTF8):
    raw = raw.replace(codecs.BOM_UTF8, '', 1)

# Write to file
file_open = open(file_path, 'w')
file_open.write(raw)
file_open.close()
return 0

@cnsoft
Copy link
Author

cnsoft commented May 26, 2014

Call this to remove bom header of utf-8 file

import codecs
import shutil
import sys

s = sys.stdin.read(3)
if s != codecs.BOM_UTF8:
sys.stdout.write(s)

shutil.copyfileobj(sys.stdin, sys.stdout)

named it as remove_bom.py

how this code is work? $ remove_bom.py < input.txt > output.txt

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment