msakamoto-sf/liked-image-downloader.py

## 142 changes: 142 additions & 0 deletions liked-image-downloader.py
@@ -0,0 +1,142 @@

    # Twitter で「いいね！」した画像をローカルにダウンロードする Python スクリプト。
# Twitter で「いいね！」した画像をローカルにダウンロードする Python スクリプト。

    # (自分自身のアカウントを前提)
# (自分自身のアカウントを前提)

    #
#

    # requirements: python3 (>= 3.10)
# requirements: python3 (>= 3.10)

    #
#

    # setup:
# setup:

    # 1. `python3 -m venv venv`
# 1. `python3 -m venv venv`

    # 2. `pip install tweepy==4.4.0`
# 2. `pip install tweepy==4.4.0`

    # (if you installed via windows installer, try 'py -m venv venv')
# (if you installed via windows installer, try 'py -m venv venv')

    #
#

    # prepare key and secrets:
# prepare key and secrets:

    # 1. sign up https://developer.twitter.com/en
# 1. sign up https://developer.twitter.com/en

    # 2. create project and app
# 2. create project and app

    # 3. save your app's consumer keys (API Key and Secret)
# 3. save your app's consumer keys (API Key and Secret)

    # 4. open your app's "Keys and tokens" page, then generate your Access Token and Secret
# 4. open your app's "Keys and tokens" page, then generate your Access Token and Secret

    #
#

    # run:
# run:

    # 1. set environment values (see python script below)
# 1. set environment values (see python script below)

    # 2. `python3 liked-image-downloader.py`
# 2. `python3 liked-image-downloader.py`

    #    - (for win: `py liked-image-downloader.py`)
#    - (for win: `py liked-image-downloader.py`)

    #
#

    # reference:
# reference:

    # - https://www.tweepy.org/
# - https://www.tweepy.org/

    # - https://docs.tweepy.org/en/v4.4.0/index.html
# - https://docs.tweepy.org/en/v4.4.0/index.html

    # - Twitterで「いいね」した画像を一括保存するPHPとTwitterAPI | キノコログ
# - Twitterで「いいね」した画像を一括保存するPHPとTwitterAPI | キノコログ

    #   - https://kinocolog.com/favo_image_download/
#   - https://kinocolog.com/favo_image_download/

    # - Twitterいいね画像定期保存Bot - Qiita
# - Twitterいいね画像定期保存Bot - Qiita

    #   - https://qiita.com/hxbdy625/items/3806bdc2b36f64986119
#   - https://qiita.com/hxbdy625/items/3806bdc2b36f64986119


    import os
import os

    import datetime
import datetime

    import time
import time

    from pprint import pprint
from pprint import pprint

    import urllib
import urllib

    import tweepy
import tweepy


    twitter_username =os.getenv('TWITTER_USERNAME', '')
twitter_username =os.getenv('TWITTER_USERNAME', '')

    oauth_consumer_key = os.getenv('CONSUMER_KEY', '<consumer-key>')
oauth_consumer_key = os.getenv('CONSUMER_KEY', '<consumer-key>')

    oauth_consumer_secret = os.getenv('CONSUMER_SECRET', '<consumer-secret>')
oauth_consumer_secret = os.getenv('CONSUMER_SECRET', '<consumer-secret>')

    oauth_access_token = os.getenv('TOKEN', '<token>')
oauth_access_token = os.getenv('TOKEN', '<token>')

    oauth_access_token_secret = os.getenv('TOKEN_SECRET', '<token-secret>')
oauth_access_token_secret = os.getenv('TOKEN_SECRET', '<token-secret>')


    client = tweepy.Client(
client = tweepy.Client(

        consumer_key=oauth_consumer_key,
    consumer_key=oauth_consumer_key,

        consumer_secret=oauth_consumer_secret,
    consumer_secret=oauth_consumer_secret,

        access_token=oauth_access_token,
    access_token=oauth_access_token,

        access_token_secret=oauth_access_token_secret)
    access_token_secret=oauth_access_token_secret)


    res = client.get_user(username=twitter_username, user_auth=True)
res = client.get_user(username=twitter_username, user_auth=True)

    #pprint(res.data)
#pprint(res.data)

    twitter_user_id = res.data.id
twitter_user_id = res.data.id

    print(f'==> twitter username[{twitter_username}], id = [{twitter_user_id}]')
print(f'==> twitter username[{twitter_username}], id = [{twitter_user_id}]')


    PHOTO_PREFIX_TO_URLS = {}
PHOTO_PREFIX_TO_URLS = {}


    def collect_photo_urls(response):
def collect_photo_urls(response):

        #pprint(response)
    #pprint(response)

        liked_tweets = response.data
    liked_tweets = response.data

        result_count = response.meta['result_count']
    result_count = response.meta['result_count']

        next_token = response.meta['next_token']
    next_token = response.meta['next_token']

        print(f'==> {len(liked_tweets)} tweets retrieved, result_count={result_count}, next_token=[{next_token}]')
    print(f'==> {len(liked_tweets)} tweets retrieved, result_count={result_count}, next_token=[{next_token}]')


        media_key_to_photo_urls = {}
    media_key_to_photo_urls = {}

        medias = response.includes['media']
    medias = response.includes['media']

        for media in medias:
    for media in medias:

            media_key = media.media_key
        media_key = media.media_key

            media_type = media.type
        media_type = media.type

            media_url = media.url if hasattr(media, 'url') else ''
        media_url = media.url if hasattr(media, 'url') else ''

            #print(f'===>> key={media_key}, type={media_type}, url={media_url}')
        #print(f'===>> key={media_key}, type={media_type}, url={media_url}')

            if media_url:
        if media_url:

                media_key_to_photo_urls[media_key] = media_url
            media_key_to_photo_urls[media_key] = media_url


        #pprint(media_key_to_photo_urls)
    #pprint(media_key_to_photo_urls)


        for liked_tweet in liked_tweets:
    for liked_tweet in liked_tweets:

            tweet_id = liked_tweet.id
        tweet_id = liked_tweet.id

            tweet_text = liked_tweet.text
        tweet_text = liked_tweet.text

            tweet_attachments = liked_tweet.attachments
        tweet_attachments = liked_tweet.attachments

            tweet_created_at = liked_tweet.created_at
        tweet_created_at = liked_tweet.created_at

            #print(f'===>> id={tweet_id}, created_at={tweet_created_at}, attachments={tweet_attachments}')
        #print(f'===>> id={tweet_id}, created_at={tweet_created_at}, attachments={tweet_attachments}')

            # timestamp_prefix = datetime.datetime.fromisoformat(tweet_created_at).strftime('%Y%m%d%H%M%S')
        # timestamp_prefix = datetime.datetime.fromisoformat(tweet_created_at).strftime('%Y%m%d%H%M%S')

            timestamp_prefix = tweet_created_at.strftime('%Y%m%d%H%M%S')
        timestamp_prefix = tweet_created_at.strftime('%Y%m%d%H%M%S')

            id_suffix3 = str(tweet_id)[-3:]
        id_suffix3 = str(tweet_id)[-3:]

            prefix0 = timestamp_prefix + id_suffix3
        prefix0 = timestamp_prefix + id_suffix3

            #print(prefix0)
        #print(prefix0)

            if not tweet_attachments:
        if not tweet_attachments:

                continue
            continue

            if 'media_keys' not in tweet_attachments:
        if 'media_keys' not in tweet_attachments:

                continue
            continue

            for idx, media_key in enumerate(tweet_attachments['media_keys']):
        for idx, media_key in enumerate(tweet_attachments['media_keys']):

                prefix = f'{prefix0}_{idx}'
            prefix = f'{prefix0}_{idx}'

                photo_url = media_key_to_photo_urls[media_key] if media_key in media_key_to_photo_urls else ''
            photo_url = media_key_to_photo_urls[media_key] if media_key in media_key_to_photo_urls else ''

                #print(f'===>> prefix={prefix}, url={photo_url}')
            #print(f'===>> prefix={prefix}, url={photo_url}')

                if not photo_url:
            if not photo_url:

                    print(f'#### NOTICE tweet id={tweet_id}, text={tweet_text} SKIPPED cause url is empty.')
                print(f'#### NOTICE tweet id={tweet_id}, text={tweet_text} SKIPPED cause url is empty.')

                    continue
                continue

                if prefix in PHOTO_PREFIX_TO_URLS:
            if prefix in PHOTO_PREFIX_TO_URLS:

                    print(f'#### WARN prefix {prefix} duplicated, SKIPPED for url={photo_url}')
                print(f'#### WARN prefix {prefix} duplicated, SKIPPED for url={photo_url}')

                    continue
                continue

                PHOTO_PREFIX_TO_URLS[prefix] = photo_url
            PHOTO_PREFIX_TO_URLS[prefix] = photo_url


        #pprint(PHOTO_PREFIX_TO_URLS)
    #pprint(PHOTO_PREFIX_TO_URLS)


    # MAX_RESULTS = 100
# MAX_RESULTS = 100

    # LIMIT = 10
# LIMIT = 10

    MAX_RESULTS = 100
MAX_RESULTS = 100

    LIMIT = 30
LIMIT = 30

    for response in tweepy.Paginator(client.get_liked_tweets,
for response in tweepy.Paginator(client.get_liked_tweets,

                                    id=twitter_user_id,
                                id=twitter_user_id,

                                    user_auth=True,
                                user_auth=True,

                                    max_results=MAX_RESULTS,
                                max_results=MAX_RESULTS,

                                    expansions=['attachments.media_keys'],
                                expansions=['attachments.media_keys'],

                                    media_fields=['type', 'url'],
                                media_fields=['type', 'url'],

                                    tweet_fields=['attachments', 'created_at'],
                                tweet_fields=['attachments', 'created_at'],

                                    limit=LIMIT):
                                limit=LIMIT):

        collect_photo_urls(response)
    collect_photo_urls(response)

        time.sleep(1)
    time.sleep(1)


    #pprint(PHOTO_PREFIX_TO_URLS)
#pprint(PHOTO_PREFIX_TO_URLS)

    TOTAL_SIZE = len(PHOTO_PREFIX_TO_URLS)
TOTAL_SIZE = len(PHOTO_PREFIX_TO_URLS)

    print(f'==> photo url : {TOTAL_SIZE} urls collected.')
print(f'==> photo url : {TOTAL_SIZE} urls collected.')


    save_dirname = './images_' + datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')
save_dirname = './images_' + datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')

    os.mkdir(save_dirname)
os.mkdir(save_dirname)


    count = 1
count = 1

    for prefix, url in PHOTO_PREFIX_TO_URLS.items():
for prefix, url in PHOTO_PREFIX_TO_URLS.items():

        urlpath = urllib.parse.urlparse(url).path
    urlpath = urllib.parse.urlparse(url).path

        urlext = os.path.splitext(urlpath)[1] # we get ".ext" string
    urlext = os.path.splitext(urlpath)[1] # we get ".ext" string

        localfilepath = f'{save_dirname}/{prefix}{urlext}'
    localfilepath = f'{save_dirname}/{prefix}{urlext}'

        print(f'{count}/{TOTAL_SIZE}, {url} -> {localfilepath}')
    print(f'{count}/{TOTAL_SIZE}, {url} -> {localfilepath}')

        try:
    try:

            with urllib.request.urlopen(url) as urlconn:
        with urllib.request.urlopen(url) as urlconn:

                data = urlconn.read()
            data = urlconn.read()

                with open(localfilepath, mode='wb') as localfile:
            with open(localfilepath, mode='wb') as localfile:

                    localfile.write(data)
                localfile.write(data)

        except urllib.error.URLError as e:
    except urllib.error.URLError as e:

            print(e)
        print(e)


        count += 1
    count += 1

        time.sleep(1)
    time.sleep(1)