Skip to content

Instantly share code, notes, and snippets.

@tribela
Created February 18, 2018 18:38
Show Gist options
  • Save tribela/f8147a1b1496e0d557b3896caed04e60 to your computer and use it in GitHub Desktop.
Save tribela/f8147a1b1496e0d557b3896caed04e60 to your computer and use it in GitHub Desktop.
naver cafe search
import sys
import requests
from pyquery import PyQuery
def get_content(link):
resp = requests.get(
link,
headers={
'Referer': ('https://m.cafe.naver.com/' +
'SectionArticleSearch.nhn?query')
}
)
html = PyQuery(resp.content)
post = html('#postContent')[0]
return '\n'.join(post.itertext()).strip().replace('\xa0', '')
def search(term):
resp = requests.get(
'https://m.cafe.naver.com/SectionArticleSearch.nhn',
params={
'query': f'"{term}"',
}
)
html = PyQuery(resp.content)
links = [a.get('href') for a in html('ul#searchList a')]
return {
link: get_content(link)
for link in links
}
if __name__ == '__main__':
for url, content in search(sys.argv[1]).items():
print(url)
print(content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment