Created
March 17, 2011 07:11
-
-
Save whym/873955 to your computer and use it in GitHub Desktop.
ANPI NLP の <location> タグに緯度経度情報を付加するスクリプト
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# assign latitude and longtude for <location> tags | |
# 使い方: | |
# | |
# 1. このファイルをanpi_geocode.pyとして保存する。 | |
# 2. http://code.google.com/p/geopy/ をインストール(Pythonのeasy_install、またはソースから)。 | |
# 3. http://code.google.com/intl/ja/apis/maps/signup.ht で取得したAPIキーを変数 apikey に代入。 | |
# 4. 下記のようにして、 tweets.tsv を変換し、緯度経度情報を埋め込む。 | |
# python anpi_geocode.py < tweets.tsv > tweets.geocoded.tsv | |
# | |
# 詳しくは http://trans-aid.jp/ANPI_NLP/index.php/メインページ | |
apikey = 'ABQIAAAAsndJU_61k7mvWOu8ZPW7DBTF-780seiXaP53HoJEncfLvlfj1xQ8UAFVhJAvkAe4LmY6Rfu19DIn3A' # こちら http://code.google.com/intl/ja/apis/maps/signup.html で取得したキーを代入してください | |
waitlen = 0.3 | |
import sys | |
import time | |
import re | |
import geopy | |
geocoder = geopy.geocoders.Google(apikey) | |
def geocode(location_match): | |
time.sleep(waitlen) | |
attrs = location_match.group(1) | |
location = location_match.group(2) | |
try: | |
place, (lat, lng) = geocoder.geocode(location.encode('utf-8')) | |
except (ValueError, geopy.geocoders.google.GQueryError): | |
return '<location%s geocoded="false">%s</location>' % (attrs, location) | |
return '<location%s geocoded="true" lat="%f" lng="%f" place="%s">%s</location>' % (attrs, lat, lng, place, location) | |
if __name__ == '__main__': | |
location_pat = re.compile(r'<location(.*?)>(.*?)</location>') | |
for line in sys.stdin: | |
print location_pat.sub(geocode, line[:-1].decode('utf-8')).encode('utf-8') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment