Skip to content

Instantly share code, notes, and snippets.

@petitviolet
Last active December 17, 2015 14:28
Show Gist options
  • Save petitviolet/5624321 to your computer and use it in GitHub Desktop.
Save petitviolet/5624321 to your computer and use it in GitHub Desktop.
search.pyで利用する、YahooApiから返って来たxmlから検索結果1件ごとのタイトルとurlとサマリーを抽出する。 タイトルとurlは改行と空白文字を許さず、サマリーはそのまま取得する。
# -*- encoding:utf-8 -*-
import re
result_pattern = '<Result>(.+?)</Result>'
link_pattern = '<Url>(.*?)</Url>'
title_pattern = '<Title>(.*?)</Title>'
summary_pattern = '<Summary>(.*?)</Summary>'
strip_pattern = r'\\n|\s'
catch_result = re.compile(result_pattern, re.S)
catch_link = re.compile(link_pattern, re.S)
catch_title = re.compile(title_pattern, re.S)
catch_summary = re.compile(summary_pattern, re.S)
stripper = re.compile(strip_pattern, re.S)
def split_result(html):
'''YahooApi叩いて返って来たxmlから
[title, link, summary]を抽出して返す
'''
results = catch_result.finditer(html)
splited_result = []
for result in results:
if result:
result = result.group(0)
else:
continue
try:
title = catch_title.search(result).group(1)
title = stripper.sub('', title)
# title = title.replace('\n', '').replace(' ', '').strip()
except AttributeError:
title = 'タイトルなかったよ'
try:
link = catch_link.search(result).group(1)
link = stripper.sub('', link)
# link = link.replace('\n', '').replace(' ', '').strip()
except AttributeError:
link = 'urlなかったよ'
try:
summary = catch_summary.search(result).group(1)
except AttributeError:
summary = 'スニペットなかったよ'
splited_result.append([title, link, summary])
return splited_result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment