You have accepted the connection
Please hit enter on the terminal.
.idea/ | |
venv/ | |
__pycache__/ |
""" | |
https://getpocket.com/developer/docs/authentication | |
The Pocket Authentication API uses a variant of OAuth 2.0 for authentication. OAuth 2.0 is meant to be | |
straightforward to implement, and also provides increased security for user authentication because 3rd party client | |
apps no longer need to request or store a user's login information to authenticate with Pocket. | |
""" | |
import inspect | |
import os | |
import urllib.parse | |
import requests | |
_AUTHENTICATION_HEADERS = {"X-Accept": "application/json"} | |
_REDIRECT_URI = urllib.parse.ParseResult( | |
scheme="https", | |
netloc="gist.github.com", | |
path="/empjustine/fa1e632733ea44f72ec07f4494174c16", | |
params="", | |
query="", | |
fragment="file-done-md", | |
) | |
def get_access_token(consumer_key): | |
oauth_request = requests.post( | |
"https://getpocket.com/v3/oauth/request", | |
json={"consumer_key": consumer_key, "redirect_uri": _REDIRECT_URI.geturl(),}, | |
headers=_AUTHENTICATION_HEADERS, | |
) | |
oauth_request.raise_for_status() | |
request_token = oauth_request.json().get("code") | |
query_string = urllib.parse.urlencode( | |
{"request_token": request_token, "redirect_uri": _REDIRECT_URI.geturl()} | |
) | |
print(f"https://getpocket.com/auth/authorize?{query_string}") | |
input("Please hit enter once you have accepted the connection on the browser.") | |
oauth_authorize = requests.post( | |
"https://getpocket.com/v3/oauth/authorize", | |
json={"consumer_key": consumer_key, "code": request_token}, | |
headers=_AUTHENTICATION_HEADERS, | |
) | |
oauth_authorize.raise_for_status() | |
result = oauth_authorize.json() | |
access_token = result.get("access_token") | |
print(f"POCKET_CONSUMER_KEY={consumer_key}") | |
print(f"POCKET_ACCESS_TOKEN={access_token}") | |
exit() | |
def _color1(str_segment): | |
return f"\033[1m\033[92m{str_segment}\033[0m\033[0m" | |
def get_consumer_key(): | |
assert "POCKET_CONSUMER_KEY" in os.environ, _POCKET_CONSUMER_KEY_MISSING_MESSAGE | |
consumer_key = os.environ["POCKET_CONSUMER_KEY"] | |
return consumer_key | |
def required_body_parameters(): | |
return { | |
"consumer_key": get_consumer_key(), | |
"access_token": os.environ["POCKET_ACCESS_TOKEN"], | |
} | |
_POCKET_CONSUMER_KEY_MISSING_MESSAGE = """ | |
POCKET_CONSUMER_KEY missing. | |
You will need a Pocket application. | |
You can create one at: | |
https://getpocket.com/developer/apps/new | |
""" |
""" | |
Rate Limits | |
https://getpocket.com/developer/docs/rate-limits | |
The Pocket API has two separate rate limits. These dictate how many calls can be made to the server within a given time. | |
Enforcing rate limits prevents a single app or user from overwhelming the server. The response codes will tell you if | |
you've hit your limit. Your application should be looking for these and if it encounters a rate limit status code, it | |
should back off until it hits the reset time. Ignoring these codes may cause your access to be disabled. | |
User Limit | |
Each user is limited to 320 calls per hour. This should be very sufficient for most users as the average user only makes | |
changes to their list periodically. To ensure the user stays within this limit, make use of the send method for batching | |
requests. | |
Consumer Key Limit | |
Each application is limited to 10,000 calls per hour. (...) | |
""" | |
import time | |
import typing | |
import gpauthentication | |
import gptagduration | |
import requests | |
class RateLimitResponseHeader(typing.NamedTuple): | |
""" | |
Response Headers | |
https://getpocket.com/developer/docs/rate-limits | |
The Pocket API responses include custom headers that provide information about the current status of rate limiting for | |
both the current user and consumer key. | |
- `'X-Limit-User-Limit'`: Current rate limit enforced per user | |
- `'X-Limit-User-Remaining'`: Number of calls remaining before hitting user's rate limit | |
- `'X-Limit-User-Reset'`: Seconds until user's rate limit resets | |
- `'X-Limit-Key-Limit'`: Current rate limit enforced per consumer key | |
- `'X-Limit-Key-Remaining'`: Number of calls remaining before hitting consumer key's rate limit | |
- `'X-Limit-Key-Reset'`: Seconds until consumer key rate limit resets | |
""" | |
remaining_header_key: str | |
reset_header_key: str | |
def remaining(self, response: requests.Response): | |
return int(response.headers.get(self.remaining_header_key, "1")) | |
def reset(self, response: requests.Response): | |
if self.remaining(response) > 0: | |
return 0 | |
return int(response.headers.get(self.reset_header, "0")) | |
LIMIT_REMAINING_HEADERS = { | |
RateLimitResponseHeader( | |
remaining_header_key="X-Limit-User-Remaining", | |
reset_header_key="X-Limit-User-Reset", | |
), | |
RateLimitResponseHeader( | |
remaining_header_key="X-Limit-Key-Remaining", | |
reset_header_key="X-Limit-Key-Reset", | |
), | |
} | |
TAG_UNTAGGED = "_untagged_" | |
class RetrieveParameters(typing.TypedDict, total=False): | |
""" | |
Pocket API: Retrieving a User's Pocket Data | |
https://getpocket.com/developer/docs/v3/retrieve | |
`state` | |
- `'unread'` = only return unread items (default) | |
- `'archive'` = only return archived items | |
- `'all'` = return both unread and archived items | |
`favorite` | |
- `0` = only return un-favorited items | |
- `1` = only return favorited items | |
`tag` | |
- `str` = only return items tagged with tag_name | |
- `'_untagged_'` = only return untagged items | |
`contentType` | |
- `'article'` = only return articles | |
- `'video'` = only return videos or articles with embedded videos | |
- `'image'` = only return images | |
`sort` | |
- `'newest'` = return items in order of newest to oldest | |
- `'oldest'` = return items in order of oldest to newest | |
- `'title'` = return items in order of title alphabetically | |
- `'site'` = return items in order of url alphabetically | |
`detailType` | |
- `'simple'` = return basic information about each item, including title, url, status, and more | |
- `'complete'` = return all data about each item, including tags, images, authors, videos, and more | |
`search` | |
- `str` = Only return items whose title or url contain the search string | |
`domain` | |
- `str` = Only return items from a particular domain | |
`since` | |
- `int` = Only return items modified since the given since unix timestamp | |
`count` | |
- `int` = Only return count number of items | |
`offset` | |
- `int` = Used only with count; start returning from offset position of results | |
""" | |
state: typing.Literal[ | |
"unread", # only return unread items (default) | |
"archive", # only return archived items | |
"all", # return both unread and archived items | |
] | |
favorite: typing.Literal[ | |
0, 1, # only return un-favorited items # only return favorited items | |
] | |
tag: str # "${tag_name}" = only return items tagged with "${tag_name}", "_untagged_" = only return untagged items | |
contentType: typing.Literal[ | |
"article", # only return articles | |
"video", # only return videos or articles with embedded videos | |
"image", # only return images | |
] | |
sort: typing.Literal[ | |
"newest", # return items in order of newest to oldest | |
"oldest", # return items in order of oldest to newest | |
"title", # return items in order of title alphabetically | |
"site", # return items in order of url alphabetically | |
] | |
detailType: typing.Literal[ | |
"simple", # return basic information about each item, including title, url, status, and more | |
"complete", # return all data about each item, including tags, images, authors, videos, and more | |
] | |
search: str # Only return items whose title or url contain the search string | |
domain: str # Only return items from a particular domain | |
since: int # Only return items modified since the given since unix timestamp | |
count: int # Only return count number of items | |
offset: int # Used only with count; start returning from offset position of results | |
def retrieve(data: RetrieveParameters): | |
""" | |
Pocket API: Retrieving a User's Pocket Data | |
https://getpocket.com/developer/docs/v3/retrieve | |
Pocket's /v3/get endpoint is a single call that is incredibly versatile. A few examples of the types of requests you can | |
make: | |
- Retrieve a user’s list of unread items | |
- Sync data that has changed since the last time your app checked | |
- Retrieve paged results sorted by the most recent saves | |
- Retrieve just videos that the user has saved | |
- Search for a given keyword in item’s title and url | |
- Retrieve all items for a given domain | |
and more | |
Required Permissions | |
In order to use the /v3/get endpoint, your consumer key must have the "Retrieve" permission. | |
""" | |
url = "https://getpocket.com/v3/get" | |
response = requests.post( | |
url=url, data={**gpauthentication.required_body_parameters(), **data}, | |
) | |
for header, wait_interval in { | |
header: header.reset(response) for header in LIMIT_REMAINING_HEADERS | |
}.items(): | |
if wait_interval > 0: | |
print(f"> Throttling; header: {header}, wait_interval: {wait_interval}.") | |
time.sleep(wait_interval + 2) | |
return retrieve(url, data) | |
else: | |
response.raise_for_status() | |
return response | |
def modify(actions: typing.Iterable[typing.Union[gptagduration.TagsAdd]]): | |
""" | |
Pocket API: Modifying a User's Pocket Data | |
https://getpocket.com/developer/docs/v3/modify | |
Pocket’s /v3/send endpoint allows you to make a change or batch several changes to a user’s list or Pocket data. | |
""" | |
response = requests.post( | |
url="https://getpocket.com/v3/send", | |
json={**gpauthentication.required_body_parameters(), "actions": actions}, | |
) | |
for header, wait_interval in { | |
header: header.reset(response) for header in LIMIT_REMAINING_HEADERS | |
}.items(): | |
if wait_interval > 0: | |
print(f"> Throttling; header: {header}, wait_interval: {wait_interval}.") | |
time.sleep(wait_interval + 2) | |
return modify(actions) | |
else: | |
response.raise_for_status() | |
return response |
#!/usr/bin/env python | |
import json | |
import os | |
import gpauthentication | |
import gpratelimitedpost | |
import gptagduration | |
ARTICLES_PER_ITERATION = 500 | |
def main(): | |
if "POCKET_ACCESS_TOKEN" not in os.environ: | |
consumer_key = gpauthentication.get_consumer_key() | |
gpauthentication.get_access_token(consumer_key) | |
initial_offset = int(os.environ.get("POCKET_OFFSET", "0")) | |
offset, updated, found_tags = _get_articles_offset(initial_offset, 0, 0) | |
print({"offset": offset, "updated": updated, "found_tags": found_tags}) | |
def _get_articles_offset(offset, updated, found_tags): | |
print(f"Loading items [{offset} ... {offset + ARTICLES_PER_ITERATION}].") | |
parameters = gpratelimitedpost.RetrieveParameters( | |
state="all", | |
sort="newest", | |
detailType="complete", | |
count=ARTICLES_PER_ITERATION, | |
offset=offset, | |
) | |
response = gpratelimitedpost.retrieve(data=parameters) | |
contents = response.json() | |
with open(f"offset_{offset}.json", "w") as f: | |
json.dump(contents, f, indent=2) | |
if len(contents["list"]) == 0: | |
return offset, updated, found_tags | |
print(f"> Found {len(contents['list'])} items.") | |
actions = list(gptagduration.tag_actions(contents)) | |
if len(actions) > 0: | |
print(f"> Applying {len(actions)} actions.") | |
gpratelimitedpost.modify(actions=actions) | |
return _get_articles_offset( | |
offset=offset + ARTICLES_PER_ITERATION, | |
updated=updated + len(actions), | |
found_tags=found_tags, | |
) | |
if __name__ == "__main__": | |
main() |
import typing | |
DURATION_BREAKPOINTS = (2, 5, 10, 15, 20, 30, 45, 60) | |
class TagsAdd(typing.TypedDict): | |
""" | |
Action: tags_add | |
https://getpocket.com/developer/docs/v3/modify#action_tags_add | |
Add one or more tags to an item. | |
item_id | |
- `int` = The id of the item to perform the action on. | |
tags | |
- `str` = A comma-delimited list of one or more tags. | |
time | |
- `typing.Optional[timestamp]` = The time the action occurred. | |
""" | |
action: typing.Literal["tags_add"] | |
item_id: int | |
tags: str | |
def tag_duration(article, item_id): | |
assert article["item_id"] == item_id | |
article_duration = _article_duration(article) | |
tag_name = _tag_name_from_duration(article_duration) | |
current_tags = article.get("tags", dict()) | |
if tag_name not in current_tags.keys(): | |
yield TagsAdd(action="tags_add", item_id=item_id, tags=tag_name) | |
def _tag_name_from_duration(duration): | |
for duration_breakpoint in DURATION_BREAKPOINTS: | |
if duration < duration_breakpoint: | |
return f"{duration_breakpoint:02} minutes" | |
else: | |
return "60 minutes+" | |
def _article_duration(article): | |
word_duration = (int(article.get("word_count")) / 275) * 60 | |
image_duration = len(article.get("images", tuple())) * 12 | |
return (word_duration + image_duration) / 60 | |
def tag_actions(articles): | |
for item_id, article in articles.get("list").items(): | |
yield from tag_duration(article, item_id) |
import collections | |
import json | |
import pathlib | |
import urllib.parse | |
CONTENT_URL_KEYS = ("given_url", "resolved_url") | |
if __name__ == "__main__": | |
tags = collections.Counter() | |
urls = collections.Counter() | |
for offset_path in pathlib.Path.cwd().glob("offset_*.json"): | |
with offset_path.open("r") as f: | |
offset = json.load(f) | |
articles = offset["list"] | |
if len(articles) == 0: | |
break | |
for item_id, article in articles.items(): | |
assert item_id == article["item_id"] | |
tags.update(tag for tag in article.get("tags", dict()).keys()) | |
urls.update( | |
urllib.parse.urlsplit(article[content_url_key]).netloc | |
for content_url_key in CONTENT_URL_KEYS | |
) | |
print(tags) | |
print(urls) |
MIT License | |
Copyright (c) 2020 minute-pocket | |
Permission is hereby granted, free of charge, to any person obtaining a copy | |
of this software and associated documentation files (the "Software"), to deal | |
in the Software without restriction, including without limitation the rights | |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
copies of the Software, and to permit persons to whom the Software is | |
furnished to do so, subject to the following conditions: | |
The above copyright notice and this permission notice shall be included in all | |
copies or substantial portions of the Software. | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
SOFTWARE. |
requests==2.23.0 |