Skip to content

Instantly share code, notes, and snippets.

@gocreating
Last active October 4, 2016 01:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gocreating/290742d3f0e423af43c9458b6062467b to your computer and use it in GitHub Desktop.
Save gocreating/290742d3f0e423af43c9458b6062467b to your computer and use it in GitHub Desktop.
import _mysql
import time
FILE_NAMES = [
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160918',
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160919',
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160920',
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160921',
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160922',
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160923',
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160924',
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160925',
]
SKIP_LINE_COUNT = 26
db=_mysql.connect(
host='localhost',
user='root',
passwd='root',
db='data_mining'
)
for filename in FILE_NAMES:
with open(filename) as f:
print '================'
print 'start parsing', filename
# line indicator
i = SKIP_LINE_COUNT
startTime = time.time()
for _ in range(SKIP_LINE_COUNT):
next(f)
for line in f:
# debugger
i = i + 1
if i % 10000 == 0:
print i, 'entries parsed'
# remove \n character
line = line[:-1]
lineParts = line.split(' ')
# avoid empty line
if len(lineParts) == 2:
entries = lineParts[1].split('|')[1:]
fields = ['`topic_group`']
values = ['\'' + lineParts[0].split('/')[1] + '\'']
for entry in entries:
parts = entry.replace('\'', '').split('=')
# avoid empty entry
if len(parts) == 2:
field = '`' + parts[0] + '`'
# avoid `NaN` sensor value
value = ('\'' + parts[1] + '\'').replace("'NaN'", 'NULL')
# avoid duplicate fields
if field not in fields:
fields.append(field)
values.append(value)
queryString = (
"""
INSERT INTO raw (%s) VALUES (%s)
""" % (','.join(fields), ','.join(values))
)
try:
db.query(queryString)
except Exception as err:
# error reporting
print ''
print '==================== %d line ====================' % i
print line
print ''
print fields
print ''
print values
print ''
print err
elapsedTime = time.time() - startTime
print i - SKIP_LINE_COUNT, 'lines parsed in ', elapsedTime, 'seconds'
DROP TABLE `data_mining`.`raw`;
CREATE TABLE raw (
`topic_group` VARCHAR(255) DEFAULT NULL,
`ver_format` VARCHAR(255) DEFAULT NULL,
`fmt_opt` VARCHAR(255) DEFAULT NULL,
`app` VARCHAR(255) DEFAULT NULL,
`ver_app` VARCHAR(255) DEFAULT NULL,
`device_id` VARCHAR(255) DEFAULT NULL,
`tick` BIGINT DEFAULT NULL,
`date` DATE DEFAULT NULL,
`time` TIME DEFAULT NULL,
`device` VARCHAR(255) DEFAULT NULL,
`s_0` FLOAT DEFAULT NULL,
`s_1` FLOAT DEFAULT NULL,
`s_2` FLOAT DEFAULT NULL,
`s_3` FLOAT DEFAULT NULL,
`s_4` FLOAT DEFAULT NULL,
`s_b0` FLOAT DEFAULT NULL,
`s_b1` FLOAT DEFAULT NULL,
`s_d0` FLOAT DEFAULT NULL,
`s_d1` VARCHAR(255) DEFAULT NULL,
`s_d2` FLOAT DEFAULT NULL,
`s_d3` FLOAT DEFAULT NULL,
`s_g0` FLOAT DEFAULT NULL,
`s_g1` FLOAT DEFAULT NULL,
`s_g2` FLOAT DEFAULT NULL,
`s_g3` FLOAT DEFAULT NULL,
`s_g4` FLOAT DEFAULT NULL,
`s_g5` FLOAT DEFAULT NULL,
`s_g6` FLOAT DEFAULT NULL,
`s_g7` FLOAT DEFAULT NULL,
`s_g8` FLOAT DEFAULT NULL,
`s_h0` FLOAT DEFAULT NULL,
`h` FLOAT DEFAULT NULL,
`s_h1` FLOAT DEFAULT NULL,
`s_h2` FLOAT DEFAULT NULL,
`s_h3` FLOAT DEFAULT NULL,
`s_l0` FLOAT DEFAULT NULL,
`s_l1` FLOAT DEFAULT NULL,
`s_l2` FLOAT DEFAULT NULL,
`s_l3` FLOAT DEFAULT NULL,
`s_o` FLOAT DEFAULT NULL,
`s_t0` FLOAT DEFAULT NULL,
`s_t1` FLOAT DEFAULT NULL,
`s_t2` FLOAT DEFAULT NULL,
`s_t3` FLOAT DEFAULT NULL,
`s_w0` FLOAT DEFAULT NULL,
`s_w1` FLOAT DEFAULT NULL,
`s_r10` FLOAT DEFAULT NULL,
`s_r60` FLOAT DEFAULT NULL,
`s_s` FLOAT DEFAULT NULL,
`s_n0` FLOAT DEFAULT NULL,
`s_n1` FLOAT DEFAULT NULL,
`PM10` FLOAT DEFAULT NULL,
`PM2_5` FLOAT DEFAULT NULL,
`Temperature` FLOAT DEFAULT NULL,
`Humidity` FLOAT DEFAULT NULL,
`gps_lat` FLOAT DEFAULT NULL,
`gps_lon` FLOAT DEFAULT NULL,
`gps_fix` INT DEFAULT NULL,
`gps_num` INT DEFAULT NULL,
`gps_alt` VARCHAR(255) DEFAULT NULL,
`FAKE_GPS` VARCHAR(255) DEFAULT NULL,
`SiteName` VARCHAR(255) DEFAULT NULL,
`SiteID` VARCHAR(255) DEFAULT NULL
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment