Skip to content

Instantly share code, notes, and snippets.

@16Yongjin
Last active March 23, 2021 13:55
Show Gist options
  • Save 16Yongjin/82d37938042269ce5d70b5271ca579b8 to your computer and use it in GitHub Desktop.
Save 16Yongjin/82d37938042269ce5d70b5271ca579b8 to your computer and use it in GitHub Desktop.
from instagram_private_api import Client, ClientCompatPatch
from pymongo import MongoClient
from pprint import pprint
from datetime import date
from time import sleep
from random import random
def safeget(dct, *keys):
for key in keys:
try:
dct = dct[key]
except KeyError:
return None
except TypeError:
return None
return dct
def format_timestamp(timestamp):
d = date.fromtimestamp(1614906818)
return d.strftime('%Y-%m-%dT%H:%M:%S')
def extract_item(item):
user_id = safeget(item, 'user', 'pk')
# 글 없이 사진만 올려놓고 댓글에 해시태그 다는 경우: 해시태그가 있는 댓글 가져오기
comments_with_tags = ''.join(map(lambda x: safeget(x, 'text'), (filter(lambda c: safeget(c, 'user', 'pk') == user_id and '#' in c.get('text', ''), item.get('preview_comments', [])))))
return {
'text': safeget(item, 'caption', 'text') or comments_with_tags,
'created_at': item['taken_at'],
'created_at_iso': format_timestamp(item['taken_at']),
'user_id': user_id,
'caption_pk': safeget(item, 'caption', 'pk'),
'id': item['pk'],
'like_count': item['like_count'],
'comment_count': item['comment_count'],
'location': item.get('location', None),
'next_max_id': item.get('next_max_id', None)
}
def extract_items(items):
return list(map(extract_item, map(lambda x: x['media'], items)))
user_name = '' # 아이디
password = '' # 비밀번호
# 몽고 DB 접속
client = MongoClient('mongodb://localhost:27017/')
db = client.instagram
collection = db.tag_section
api = Client(user_name, password)
next_media_ids = [] # 페이지네이션 아이디
for i in range(10000):
results = api.tag_section('먹스타그램', tab='recent', next_media_ids=next_media_ids) # 태그 섹션 검색
items = extract_items(results['sections'][0]['layout_content']['medias']) # 필요한 정보만 뽑기
next_media_ids = list(filter(lambda x: x is not None, map(lambda x: x['next_max_id'], items))) # 페이지네이션 아이디 뽑기
collection.insert_many(items) # DB 저장
print(i, 'saved', len(items))
sleep(random() * 10 + 5) # 10 ~ 15초 쉬기
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment