16Yongjin/instaram_tag_section.py

## instaram_tag_section.py
from instagram_private_api import Client, ClientCompatPatch
from pymongo import MongoClient
from pprint import pprint
from datetime import date
from time import sleep
from random import random

def safeget(dct, *keys):
    for key in keys:
        try:
            dct = dct[key]
        except KeyError:
            return None
        except TypeError:
            return None
    return dct

def format_timestamp(timestamp):
  d = date.fromtimestamp(1614906818)
  return d.strftime('%Y-%m-%dT%H:%M:%S')

def extract_item(item):
  user_id = safeget(item, 'user', 'pk')
  # 글 없이 사진만 올려놓고 댓글에 해시태그 다는 경우: 해시태그가 있는 댓글 가져오기
  comments_with_tags = ''.join(map(lambda x: safeget(x, 'text'), (filter(lambda c: safeget(c, 'user', 'pk') == user_id and '#' in c.get('text', ''), item.get('preview_comments', [])))))
  return {
    'text': safeget(item, 'caption', 'text') or comments_with_tags,
    'created_at': item['taken_at'],
    'created_at_iso': format_timestamp(item['taken_at']),
    'user_id': user_id,
    'caption_pk': safeget(item, 'caption', 'pk'),
    'id': item['pk'],
    'like_count': item['like_count'],
    'comment_count': item['comment_count'],
    'location': item.get('location', None),
    'next_max_id': item.get('next_max_id', None)
  }

def extract_items(items):
  return list(map(extract_item, map(lambda x: x['media'], items)))

user_name = '' # 아이디
password = '' # 비밀번호

# 몽고 DB 접속
client = MongoClient('mongodb://localhost:27017/')
db = client.instagram
collection = db.tag_section

api = Client(user_name, password)
next_media_ids = [] # 페이지네이션 아이디

for i in range(10000):
  results = api.tag_section('먹스타그램', tab='recent', next_media_ids=next_media_ids) # 태그 섹션 검색
  items = extract_items(results['sections'][0]['layout_content']['medias']) # 필요한 정보만 뽑기
  next_media_ids = list(filter(lambda x: x is not None, map(lambda x: x['next_max_id'], items))) # 페이지네이션 아이디 뽑기
  collection.insert_many(items) # DB 저장
  print(i, 'saved', len(items))
  sleep(random() * 10 + 5) # 10 ~ 15초 쉬기
	from instagram_private_api import Client, ClientCompatPatch
	from pymongo import MongoClient
	from pprint import pprint
	from datetime import date
	from time import sleep
	from random import random

	def safeget(dct, *keys):
	for key in keys:
	try:
	dct = dct[key]
	except KeyError:
	return None
	except TypeError:
	return None
	return dct

	def format_timestamp(timestamp):
	d = date.fromtimestamp(1614906818)
	return d.strftime('%Y-%m-%dT%H:%M:%S')

	def extract_item(item):
	user_id = safeget(item, 'user', 'pk')
	# 글 없이 사진만 올려놓고 댓글에 해시태그 다는 경우: 해시태그가 있는 댓글 가져오기
	comments_with_tags = ''.join(map(lambda x: safeget(x, 'text'), (filter(lambda c: safeget(c, 'user', 'pk') == user_id and '#' in c.get('text', ''), item.get('preview_comments', [])))))
	return {
	'text': safeget(item, 'caption', 'text') or comments_with_tags,
	'created_at': item['taken_at'],
	'created_at_iso': format_timestamp(item['taken_at']),
	'user_id': user_id,
	'caption_pk': safeget(item, 'caption', 'pk'),
	'id': item['pk'],
	'like_count': item['like_count'],
	'comment_count': item['comment_count'],
	'location': item.get('location', None),
	'next_max_id': item.get('next_max_id', None)
	}

	def extract_items(items):
	return list(map(extract_item, map(lambda x: x['media'], items)))

	user_name = '' # 아이디
	password = '' # 비밀번호

	# 몽고 DB 접속
	client = MongoClient('mongodb://localhost:27017/')
	db = client.instagram
	collection = db.tag_section

	api = Client(user_name, password)
	next_media_ids = [] # 페이지네이션 아이디

	for i in range(10000):
	results = api.tag_section('먹스타그램', tab='recent', next_media_ids=next_media_ids) # 태그 섹션 검색
	items = extract_items(results['sections'][0]['layout_content']['medias']) # 필요한 정보만 뽑기
	next_media_ids = list(filter(lambda x: x is not None, map(lambda x: x['next_max_id'], items))) # 페이지네이션 아이디 뽑기
	collection.insert_many(items) # DB 저장
	print(i, 'saved', len(items))
	sleep(random() * 10 + 5) # 10 ~ 15초 쉬기