neubig/find-tweets.py

## find-tweets.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
import re
import datetime

pattern = ur'電力.*供給'


def output(fo, tweet_id, user_id, timestamp, body):
    fo.write('%s\t%s\t%s\t%s\n' % (tweet_id, user_id, timestamp, body.replace('\t', '\\t')))

def process(src, dst):

    fo = open(dst, 'w')
    for line in open(src):
        line = line.strip()
        (tweet_id, user_id, timestamp, body) = line.split('\t')
        if re.search(pattern, unicode(body, "utf-8")):
            output(fo, tweet_id, user_id, timestamp, body)

if __name__ == '__main__':
    for src in sys.argv[1:]:
        dst = '%s.%s.txt' % (src, datetime.datetime.now())
        print "finding %s in %s and printing to %s" % (pattern, src, dst)
        process(src, dst)
	#!/usr/bin/env python
	# -- coding: utf-8 --

	import sys
	import re
	import datetime

	pattern = ur'電力.*供給'


	def output(fo, tweet_id, user_id, timestamp, body):
	fo.write('%s\t%s\t%s\t%s\n' % (tweet_id, user_id, timestamp, body.replace('\t', '\\t')))

	def process(src, dst):

	fo = open(dst, 'w')
	for line in open(src):
	line = line.strip()
	(tweet_id, user_id, timestamp, body) = line.split('\t')
	if re.search(pattern, unicode(body, "utf-8")):
	output(fo, tweet_id, user_id, timestamp, body)

	if __name__ == '__main__':
	for src in sys.argv[1:]:
	dst = '%s.%s.txt' % (src, datetime.datetime.now())
	print "finding %s in %s and printing to %s" % (pattern, src, dst)
	process(src, dst)