Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
GitHub APIを使ってGitHub Pagesのコミット情報を取得後に、ファイルの更新情報を抽出してその結果のデータベースへ保管及びmarkdownファイルの作成を行うスクリプト。
#!/usr/bin/env python3
import sys
import os
import re
import json
import urllib
from datetime import datetime
import dateutil.parser
from dateutil import tz
import mysql.connector
from requests_oauthlib import OAuth2Session
from collections import deque
# データベースへのアクセスに必要な情報の定義。
db_config_file = 'db_config.json'
def get_message_from_github(access_token,url):
github = OAuth2Session()
github.headers['Authorization'] = 'token '+access_token;
r = github.get(url)
if r.status_code != 200:
sys.exit(1)
return r.json()
dbconfig = {}
with open(db_config_file) as dbfile:
t = dbfile.read()
dbconfig = json.loads(t)
files = {}
conn = mysql.connector.connect(user=dbconfig["user"],password=dbconfig["password"],host=dbconfig["host"],database=dbconfig["dbname"])
cur = conn.cursor()
cur.execute("select filename,description,created_at,updated_at from github_pages")
filename_to_check = []
for (filename,description,created_at,updated_at) in cur:
files[filename] = {}
files[filename]["description"] = description
files[filename]["created_at"] = dateutil.parser.parse(created_at.strftime("%Y-%m-%d %H:%M:%S")).replace(tzinfo=dateutil.tz.tzutc())
files[filename]["updated_at"] = dateutil.parser.parse(updated_at.strftime("%Y-%m-%d %H:%M:%S")).replace(tzinfo=dateutil.tz.tzutc())
filename_to_check.append(filename)
cur.close()
mdfiles = sys.argv[1:]
pattern = '^../../pandanote-info.github.io/(.*)'
desc_pattern = '^description: (.*)'
for f in mdfiles:
with open (f) as infile:
result = re.match(pattern,f)
filename = result.group(1)
line = infile.readline()
while line:
desc_result = re.match(desc_pattern,line)
if desc_result:
if filename not in files:
files[filename] = {}
files[filename]["description"] = desc_result.group(1)
line = infile.readline()
if filename in filename_to_check:
filename_to_check.remove(filename)
#print(files)
#sys.exit(1)
if len(filename_to_check) > 0:
for foc in filename_to_check:
cur = conn.cursor()
cur.execute("delete from github_pages where filename=%s",foc)
conn.commit()
cur.close()
since_param = ''
cur = conn.cursor()
cur.execute('select max(updated_at) from github_pages')
since = cur.fetchone()
if since[0] != None:
since_param = '&since='+since[0].strftime("%Y-%m-%dT%H:%M:%SZ")
cur.close()
# GitHub API用のクライアント鍵を格納したファイル。GitHubより取得できる。
github_client_key_file = 'github_client_key.json'
client_id = ''
client_secret = ''
access_token = ''
with open(github_client_key_file) as infile:
text = infile.read()
keys = json.loads(text)
access_token = keys["access_token"]
for filename in files.keys():
f = files[filename]
message = get_message_from_github(access_token,"https://api.github.com/repos/pandanote-info/pandanote-info.github.io/commits?path="+urllib.parse.quote(filename)+since_param)
for commit in message:
commit_date = commit["commit"]["committer"]["date"]
dt = dateutil.parser.parse(commit_date)
print(filename,file=sys.stderr)
if "created_at" in f:
if f["created_at"] > dt:
f["created_at"] = dt
if f["updated_at"] < dt:
f["updated_at"] = dt
else:
f["created_at"] = dt
f["updated_at"] = dt
cur = conn.cursor()
for filename in files.keys():
f = files[filename]
#print(filename," ",f)
#print("insert into github_pages(filename,description,created_at,updated_at) values('{0:s}','{1:s}','{2:s}','{3:s}') on duplicate key update filename={4:s}".format(filename,f["description"],f["created_at"].strftime("%Y-%m-%d %H:%M:%S"),f["updated_at"].strftime("%Y-%m-%d %H:%M:%S"),filename))
cur.execute("insert into github_pages(filename,description,created_at,updated_at) values(%s,%s,%s,%s) on duplicate key update updated_at=%s",(filename,f["description"],f["created_at"].strftime("%Y-%m-%d %H:%M:%S"),f["updated_at"].strftime("%Y-%m-%d %H:%M:%S"),f["updated_at"].strftime("%Y-%m-%d %H:%M:%S")))
conn.commit()
cur.close()
print("本Webサイトのコンテンツにつきましては随時更新を行っていますので、更新日の新しい方から順に配列しています。\n")
print("なお、このページに限り、更新日の表示はJST(日本標準時)による表示です。(本Webサイトの他のページの時刻表示はUTC(JSTより9時間遅れです。)としています。)")
cur = conn.cursor()
cur.execute("select filename,description,created_at,updated_at from github_pages order by updated_at desc")
for f in cur:
created_at_at_localtime = dateutil.parser.parse(f[2].strftime("%Y-%m-%d %H:%M:%S")).replace(tzinfo=dateutil.tz.tzutc()).astimezone(dateutil.tz.tzlocal()).strftime("%Y/%m/%d")
updated_at_at_localtime = dateutil.parser.parse(f[3].strftime("%Y-%m-%d %H:%M:%S")).replace(tzinfo=dateutil.tz.tzutc()).astimezone(dateutil.tz.tzlocal()).strftime("%Y/%m/%d")
print("* [{0}](https://sidestory.pandanote.info/{1})".format(f[1],re.sub('\.md$','.html',f[0])), end='')
if created_at_at_localtime == updated_at_at_localtime:
print("({0})".format(created_at_at_localtime))
else:
print("({0},last update: {1})".format(created_at_at_localtime,updated_at_at_localtime))
conn.commit()
cur.close()
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.