Last active
July 15, 2019 12:35
-
-
Save pandanote-info/581d6f303a7ac9873b58a4023a785796 to your computer and use it in GitHub Desktop.
GitHub APIを使ってGitHub Pagesのコミット情報を取得後に、ファイルの更新情報を抽出してその結果のデータベースへ保管及びmarkdownファイルの作成を行うスクリプト。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import os | |
import re | |
import json | |
import urllib | |
from datetime import datetime | |
import dateutil.parser | |
from dateutil import tz | |
import mysql.connector | |
from requests_oauthlib import OAuth2Session | |
from collections import deque | |
# データベースへのアクセスに必要な情報の定義。 | |
db_config_file = 'db_config.json' | |
def get_message_from_github(access_token,url): | |
github = OAuth2Session() | |
github.headers['Authorization'] = 'token '+access_token; | |
r = github.get(url) | |
if r.status_code != 200: | |
sys.exit(1) | |
return r.json() | |
dbconfig = {} | |
with open(db_config_file) as dbfile: | |
t = dbfile.read() | |
dbconfig = json.loads(t) | |
files = {} | |
conn = mysql.connector.connect(user=dbconfig["user"],password=dbconfig["password"],host=dbconfig["host"],database=dbconfig["dbname"]) | |
cur = conn.cursor() | |
cur.execute("select filename,description,created_at,updated_at from github_pages") | |
filename_to_check = [] | |
for (filename,description,created_at,updated_at) in cur: | |
files[filename] = {} | |
files[filename]["description"] = description | |
files[filename]["created_at"] = dateutil.parser.parse(created_at.strftime("%Y-%m-%d %H:%M:%S")).replace(tzinfo=dateutil.tz.tzutc()) | |
files[filename]["updated_at"] = dateutil.parser.parse(updated_at.strftime("%Y-%m-%d %H:%M:%S")).replace(tzinfo=dateutil.tz.tzutc()) | |
filename_to_check.append(filename) | |
cur.close() | |
mdfiles = sys.argv[1:] | |
pattern = '^../../pandanote-info.github.io/(.*)' | |
desc_pattern = '^description: (.*)' | |
for f in mdfiles: | |
with open (f) as infile: | |
result = re.match(pattern,f) | |
filename = result.group(1) | |
line = infile.readline() | |
while line: | |
desc_result = re.match(desc_pattern,line) | |
if desc_result: | |
if filename not in files: | |
files[filename] = {} | |
files[filename]["description"] = desc_result.group(1) | |
line = infile.readline() | |
if filename in filename_to_check: | |
filename_to_check.remove(filename) | |
#print(files) | |
#sys.exit(1) | |
if len(filename_to_check) > 0: | |
for foc in filename_to_check: | |
cur = conn.cursor() | |
cur.execute("delete from github_pages where filename=%s",foc) | |
conn.commit() | |
cur.close() | |
since_param = '' | |
cur = conn.cursor() | |
cur.execute('select max(updated_at) from github_pages') | |
since = cur.fetchone() | |
if since[0] != None: | |
since_param = '&since='+since[0].strftime("%Y-%m-%dT%H:%M:%SZ") | |
cur.close() | |
# GitHub API用のクライアント鍵を格納したファイル。GitHubより取得できる。 | |
github_client_key_file = 'github_client_key.json' | |
client_id = '' | |
client_secret = '' | |
access_token = '' | |
with open(github_client_key_file) as infile: | |
text = infile.read() | |
keys = json.loads(text) | |
access_token = keys["access_token"] | |
for filename in files.keys(): | |
f = files[filename] | |
message = get_message_from_github(access_token,"https://api.github.com/repos/pandanote-info/pandanote-info.github.io/commits?path="+urllib.parse.quote(filename)+since_param) | |
for commit in message: | |
commit_date = commit["commit"]["committer"]["date"] | |
dt = dateutil.parser.parse(commit_date) | |
print(filename,file=sys.stderr) | |
if "created_at" in f: | |
if f["created_at"] > dt: | |
f["created_at"] = dt | |
if f["updated_at"] < dt: | |
f["updated_at"] = dt | |
else: | |
f["created_at"] = dt | |
f["updated_at"] = dt | |
cur = conn.cursor() | |
for filename in files.keys(): | |
f = files[filename] | |
#print(filename," ",f) | |
#print("insert into github_pages(filename,description,created_at,updated_at) values('{0:s}','{1:s}','{2:s}','{3:s}') on duplicate key update filename={4:s}".format(filename,f["description"],f["created_at"].strftime("%Y-%m-%d %H:%M:%S"),f["updated_at"].strftime("%Y-%m-%d %H:%M:%S"),filename)) | |
cur.execute("insert into github_pages(filename,description,created_at,updated_at) values(%s,%s,%s,%s) on duplicate key update updated_at=%s",(filename,f["description"],f["created_at"].strftime("%Y-%m-%d %H:%M:%S"),f["updated_at"].strftime("%Y-%m-%d %H:%M:%S"),f["updated_at"].strftime("%Y-%m-%d %H:%M:%S"))) | |
conn.commit() | |
cur.close() | |
print("本Webサイトのコンテンツにつきましては随時更新を行っていますので、更新日の新しい方から順に配列しています。\n") | |
print("なお、このページに限り、更新日の表示はJST(日本標準時)による表示です。(本Webサイトの他のページの時刻表示はUTC(JSTより9時間遅れです。)としています。)") | |
cur = conn.cursor() | |
cur.execute("select filename,description,created_at,updated_at from github_pages order by updated_at desc") | |
for f in cur: | |
created_at_at_localtime = dateutil.parser.parse(f[2].strftime("%Y-%m-%d %H:%M:%S")).replace(tzinfo=dateutil.tz.tzutc()).astimezone(dateutil.tz.tzlocal()).strftime("%Y/%m/%d") | |
updated_at_at_localtime = dateutil.parser.parse(f[3].strftime("%Y-%m-%d %H:%M:%S")).replace(tzinfo=dateutil.tz.tzutc()).astimezone(dateutil.tz.tzlocal()).strftime("%Y/%m/%d") | |
print("* [{0}](https://sidestory.pandanote.info/{1})".format(f[1],re.sub('\.md$','.html',f[0])), end='') | |
if created_at_at_localtime == updated_at_at_localtime: | |
print("({0})".format(created_at_at_localtime)) | |
else: | |
print("({0},last update: {1})".format(created_at_at_localtime,updated_at_at_localtime)) | |
conn.commit() | |
cur.close() | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment