Skip to content

Instantly share code, notes, and snippets.

@empjustine
Forked from cnicodeme/pocket.py
Last active May 16, 2020 08:24
Show Gist options
  • Save empjustine/fa1e632733ea44f72ec07f4494174c16 to your computer and use it in GitHub Desktop.
Save empjustine/fa1e632733ea44f72ec07f4494174c16 to your computer and use it in GitHub Desktop.
Update your reading list by running this local script.
.idea/
venv/
__pycache__/

You have accepted the connection

Please hit enter on the terminal.

"""
https://getpocket.com/developer/docs/authentication
The Pocket Authentication API uses a variant of OAuth 2.0 for authentication. OAuth 2.0 is meant to be
straightforward to implement, and also provides increased security for user authentication because 3rd party client
apps no longer need to request or store a user's login information to authenticate with Pocket.
"""
import inspect
import os
import urllib.parse
import requests
_AUTHENTICATION_HEADERS = {"X-Accept": "application/json"}
_REDIRECT_URI = urllib.parse.ParseResult(
scheme="https",
netloc="gist.github.com",
path="/empjustine/fa1e632733ea44f72ec07f4494174c16",
params="",
query="",
fragment="file-done-md",
)
def get_access_token(consumer_key):
oauth_request = requests.post(
"https://getpocket.com/v3/oauth/request",
json={"consumer_key": consumer_key, "redirect_uri": _REDIRECT_URI.geturl(),},
headers=_AUTHENTICATION_HEADERS,
)
oauth_request.raise_for_status()
request_token = oauth_request.json().get("code")
query_string = urllib.parse.urlencode(
{"request_token": request_token, "redirect_uri": _REDIRECT_URI.geturl()}
)
print(f"https://getpocket.com/auth/authorize?{query_string}")
input("Please hit enter once you have accepted the connection on the browser.")
oauth_authorize = requests.post(
"https://getpocket.com/v3/oauth/authorize",
json={"consumer_key": consumer_key, "code": request_token},
headers=_AUTHENTICATION_HEADERS,
)
oauth_authorize.raise_for_status()
result = oauth_authorize.json()
access_token = result.get("access_token")
print(f"POCKET_CONSUMER_KEY={consumer_key}")
print(f"POCKET_ACCESS_TOKEN={access_token}")
exit()
def _color1(str_segment):
return f"\033[1m\033[92m{str_segment}\033[0m\033[0m"
def get_consumer_key():
assert "POCKET_CONSUMER_KEY" in os.environ, _POCKET_CONSUMER_KEY_MISSING_MESSAGE
consumer_key = os.environ["POCKET_CONSUMER_KEY"]
return consumer_key
def required_body_parameters():
return {
"consumer_key": get_consumer_key(),
"access_token": os.environ["POCKET_ACCESS_TOKEN"],
}
_POCKET_CONSUMER_KEY_MISSING_MESSAGE = """
POCKET_CONSUMER_KEY missing.
You will need a Pocket application.
You can create one at:
https://getpocket.com/developer/apps/new
"""
"""
Rate Limits
https://getpocket.com/developer/docs/rate-limits
The Pocket API has two separate rate limits. These dictate how many calls can be made to the server within a given time.
Enforcing rate limits prevents a single app or user from overwhelming the server. The response codes will tell you if
you've hit your limit. Your application should be looking for these and if it encounters a rate limit status code, it
should back off until it hits the reset time. Ignoring these codes may cause your access to be disabled.
User Limit
Each user is limited to 320 calls per hour. This should be very sufficient for most users as the average user only makes
changes to their list periodically. To ensure the user stays within this limit, make use of the send method for batching
requests.
Consumer Key Limit
Each application is limited to 10,000 calls per hour. (...)
"""
import time
import typing
import gpauthentication
import gptagduration
import requests
class RateLimitResponseHeader(typing.NamedTuple):
"""
Response Headers
https://getpocket.com/developer/docs/rate-limits
The Pocket API responses include custom headers that provide information about the current status of rate limiting for
both the current user and consumer key.
- `'X-Limit-User-Limit'`: Current rate limit enforced per user
- `'X-Limit-User-Remaining'`: Number of calls remaining before hitting user's rate limit
- `'X-Limit-User-Reset'`: Seconds until user's rate limit resets
- `'X-Limit-Key-Limit'`: Current rate limit enforced per consumer key
- `'X-Limit-Key-Remaining'`: Number of calls remaining before hitting consumer key's rate limit
- `'X-Limit-Key-Reset'`: Seconds until consumer key rate limit resets
"""
remaining_header_key: str
reset_header_key: str
def remaining(self, response: requests.Response):
return int(response.headers.get(self.remaining_header_key, "1"))
def reset(self, response: requests.Response):
if self.remaining(response) > 0:
return 0
return int(response.headers.get(self.reset_header, "0"))
LIMIT_REMAINING_HEADERS = {
RateLimitResponseHeader(
remaining_header_key="X-Limit-User-Remaining",
reset_header_key="X-Limit-User-Reset",
),
RateLimitResponseHeader(
remaining_header_key="X-Limit-Key-Remaining",
reset_header_key="X-Limit-Key-Reset",
),
}
TAG_UNTAGGED = "_untagged_"
class RetrieveParameters(typing.TypedDict, total=False):
"""
Pocket API: Retrieving a User's Pocket Data
https://getpocket.com/developer/docs/v3/retrieve
`state`
- `'unread'` = only return unread items (default)
- `'archive'` = only return archived items
- `'all'` = return both unread and archived items
`favorite`
- `0` = only return un-favorited items
- `1` = only return favorited items
`tag`
- `str` = only return items tagged with tag_name
- `'_untagged_'` = only return untagged items
`contentType`
- `'article'` = only return articles
- `'video'` = only return videos or articles with embedded videos
- `'image'` = only return images
`sort`
- `'newest'` = return items in order of newest to oldest
- `'oldest'` = return items in order of oldest to newest
- `'title'` = return items in order of title alphabetically
- `'site'` = return items in order of url alphabetically
`detailType`
- `'simple'` = return basic information about each item, including title, url, status, and more
- `'complete'` = return all data about each item, including tags, images, authors, videos, and more
`search`
- `str` = Only return items whose title or url contain the search string
`domain`
- `str` = Only return items from a particular domain
`since`
- `int` = Only return items modified since the given since unix timestamp
`count`
- `int` = Only return count number of items
`offset`
- `int` = Used only with count; start returning from offset position of results
"""
state: typing.Literal[
"unread", # only return unread items (default)
"archive", # only return archived items
"all", # return both unread and archived items
]
favorite: typing.Literal[
0, 1, # only return un-favorited items # only return favorited items
]
tag: str # "${tag_name}" = only return items tagged with "${tag_name}", "_untagged_" = only return untagged items
contentType: typing.Literal[
"article", # only return articles
"video", # only return videos or articles with embedded videos
"image", # only return images
]
sort: typing.Literal[
"newest", # return items in order of newest to oldest
"oldest", # return items in order of oldest to newest
"title", # return items in order of title alphabetically
"site", # return items in order of url alphabetically
]
detailType: typing.Literal[
"simple", # return basic information about each item, including title, url, status, and more
"complete", # return all data about each item, including tags, images, authors, videos, and more
]
search: str # Only return items whose title or url contain the search string
domain: str # Only return items from a particular domain
since: int # Only return items modified since the given since unix timestamp
count: int # Only return count number of items
offset: int # Used only with count; start returning from offset position of results
def retrieve(data: RetrieveParameters):
"""
Pocket API: Retrieving a User's Pocket Data
https://getpocket.com/developer/docs/v3/retrieve
Pocket's /v3/get endpoint is a single call that is incredibly versatile. A few examples of the types of requests you can
make:
- Retrieve a user’s list of unread items
- Sync data that has changed since the last time your app checked
- Retrieve paged results sorted by the most recent saves
- Retrieve just videos that the user has saved
- Search for a given keyword in item’s title and url
- Retrieve all items for a given domain
and more
Required Permissions
In order to use the /v3/get endpoint, your consumer key must have the "Retrieve" permission.
"""
url = "https://getpocket.com/v3/get"
response = requests.post(
url=url, data={**gpauthentication.required_body_parameters(), **data},
)
for header, wait_interval in {
header: header.reset(response) for header in LIMIT_REMAINING_HEADERS
}.items():
if wait_interval > 0:
print(f"> Throttling; header: {header}, wait_interval: {wait_interval}.")
time.sleep(wait_interval + 2)
return retrieve(url, data)
else:
response.raise_for_status()
return response
def modify(actions: typing.Iterable[typing.Union[gptagduration.TagsAdd]]):
"""
Pocket API: Modifying a User's Pocket Data
https://getpocket.com/developer/docs/v3/modify
Pocket’s /v3/send endpoint allows you to make a change or batch several changes to a user’s list or Pocket data.
"""
response = requests.post(
url="https://getpocket.com/v3/send",
json={**gpauthentication.required_body_parameters(), "actions": actions},
)
for header, wait_interval in {
header: header.reset(response) for header in LIMIT_REMAINING_HEADERS
}.items():
if wait_interval > 0:
print(f"> Throttling; header: {header}, wait_interval: {wait_interval}.")
time.sleep(wait_interval + 2)
return modify(actions)
else:
response.raise_for_status()
return response
#!/usr/bin/env python
import json
import os
import gpauthentication
import gpratelimitedpost
import gptagduration
ARTICLES_PER_ITERATION = 500
def main():
if "POCKET_ACCESS_TOKEN" not in os.environ:
consumer_key = gpauthentication.get_consumer_key()
gpauthentication.get_access_token(consumer_key)
initial_offset = int(os.environ.get("POCKET_OFFSET", "0"))
offset, updated, found_tags = _get_articles_offset(initial_offset, 0, 0)
print({"offset": offset, "updated": updated, "found_tags": found_tags})
def _get_articles_offset(offset, updated, found_tags):
print(f"Loading items [{offset} ... {offset + ARTICLES_PER_ITERATION}].")
parameters = gpratelimitedpost.RetrieveParameters(
state="all",
sort="newest",
detailType="complete",
count=ARTICLES_PER_ITERATION,
offset=offset,
)
response = gpratelimitedpost.retrieve(data=parameters)
contents = response.json()
with open(f"offset_{offset}.json", "w") as f:
json.dump(contents, f, indent=2)
if len(contents["list"]) == 0:
return offset, updated, found_tags
print(f"> Found {len(contents['list'])} items.")
actions = list(gptagduration.tag_actions(contents))
if len(actions) > 0:
print(f"> Applying {len(actions)} actions.")
gpratelimitedpost.modify(actions=actions)
return _get_articles_offset(
offset=offset + ARTICLES_PER_ITERATION,
updated=updated + len(actions),
found_tags=found_tags,
)
if __name__ == "__main__":
main()
import typing
DURATION_BREAKPOINTS = (2, 5, 10, 15, 20, 30, 45, 60)
class TagsAdd(typing.TypedDict):
"""
Action: tags_add
https://getpocket.com/developer/docs/v3/modify#action_tags_add
Add one or more tags to an item.
item_id
- `int` = The id of the item to perform the action on.
tags
- `str` = A comma-delimited list of one or more tags.
time
- `typing.Optional[timestamp]` = The time the action occurred.
"""
action: typing.Literal["tags_add"]
item_id: int
tags: str
def tag_duration(article, item_id):
assert article["item_id"] == item_id
article_duration = _article_duration(article)
tag_name = _tag_name_from_duration(article_duration)
current_tags = article.get("tags", dict())
if tag_name not in current_tags.keys():
yield TagsAdd(action="tags_add", item_id=item_id, tags=tag_name)
def _tag_name_from_duration(duration):
for duration_breakpoint in DURATION_BREAKPOINTS:
if duration < duration_breakpoint:
return f"{duration_breakpoint:02} minutes"
else:
return "60 minutes+"
def _article_duration(article):
word_duration = (int(article.get("word_count")) / 275) * 60
image_duration = len(article.get("images", tuple())) * 12
return (word_duration + image_duration) / 60
def tag_actions(articles):
for item_id, article in articles.get("list").items():
yield from tag_duration(article, item_id)
import collections
import json
import pathlib
import urllib.parse
CONTENT_URL_KEYS = ("given_url", "resolved_url")
if __name__ == "__main__":
tags = collections.Counter()
urls = collections.Counter()
for offset_path in pathlib.Path.cwd().glob("offset_*.json"):
with offset_path.open("r") as f:
offset = json.load(f)
articles = offset["list"]
if len(articles) == 0:
break
for item_id, article in articles.items():
assert item_id == article["item_id"]
tags.update(tag for tag in article.get("tags", dict()).keys())
urls.update(
urllib.parse.urlsplit(article[content_url_key]).netloc
for content_url_key in CONTENT_URL_KEYS
)
print(tags)
print(urls)
MIT License
Copyright (c) 2020 minute-pocket
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
requests==2.23.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment