Last active
August 30, 2018 08:52
-
-
Save akiko-pusu/ee501712e992305a951cb6e147e23621 to your computer and use it in GitHub Desktop.
QiitaTeamのjsonをパースするスクリプト
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html> | |
<head> | |
<meta charset="UTF-8"> | |
<link rel="stylesheet" media="all" href="style.css"> | |
<link rel="stylesheet" media="all" href="github-markdown.css"> | |
</head> | |
<body> | |
<div class='teamMain'> | |
<div class="teamSidebarContainer"> | |
<div class="teamSidebarContainer_main js-toc-section-chaser-wrapper"> | |
<article class="teamArticle"> | |
{rendered_body} | |
</article> | |
<hr/> | |
<div class="itemsShowComment"> | |
<div class="itemsShowComment_wrapper" id="comments"> | |
<div id="TeamCommentListContainer-react-component-a226d9d7-6e82-498d-864b-4f49f4c9e273"> | |
<div class="commentList"> | |
{comment_contents} | |
</div> | |
</div> | |
</div> | |
</div> | |
</div> | |
</div> | |
<body> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
from googleapiclient.discovery import build | |
from httplib2 import Http | |
from oauth2client import file, client, tools | |
from apiclient import discovery | |
from apiclient.http import MediaFileUpload | |
class GoogleDriveUpload(): | |
def __init__(self): | |
self.service = self.set_service() | |
def set_service(self): | |
# If modifying these scopes, delete the file token.json. | |
SCOPES = 'https://www.googleapis.com/auth/drive.file' | |
store = file.Storage('token.json') | |
creds = store.get() | |
if not creds or creds.invalid: | |
flow = client.flow_from_clientsecrets('credentials.json', SCOPES) | |
creds = tools.run_flow(flow, store) | |
service = build('drive', 'v3', http=creds.authorize(Http())) | |
return service | |
def upload(self, filename, title, folder_id='13fIQrI63uDZQDfxDn5vCmo9YQ9ZrNsGu'): | |
# ConvertQiitaTest | |
file_metadata = { | |
'name': title, | |
'mimeType': 'application/vnd.google-apps.file', | |
'parents': [folder_id] | |
} | |
media = MediaFileUpload(filename, | |
mimetype='text/html', | |
resumable=True) | |
file = self.service.files().create(body=file_metadata, | |
media_body=media, | |
fields='id').execute() | |
print('File ID: %s' % file.get('id')) | |
def main(): | |
"""Shows basic usage of the Drive v3 API. | |
Prints the names and ids of the first 10 files the user has access to. | |
""" | |
# ローカルにあるtokenをまず確認 / invalidならリフレッシュ | |
store = file.Storage('token.json') | |
creds = store.get() | |
if not creds or creds.invalid: | |
flow = client.flow_from_clientsecrets('credentials.json', SCOPES) | |
creds = tools.run_flow(flow, store) | |
service = build('drive', 'v3', http=creds.authorize(Http())) | |
# Call the Drive v3 API | |
results = service.files().list( | |
q="'akiko@sider.review' in owners", | |
pageSize=10, fields="nextPageToken, files(id, kind, name, webViewLink, mimeType)").execute() | |
items = results.get('files', []) | |
if not items: | |
print('No files found.') | |
else: | |
print('Files:') | |
for item in items: | |
print('{0} / {1} ({2})\n\t{3} ({4})'.format(item['name'], | |
item['kind'], item['id'], item['webViewLink'], item['mimeType'])) | |
if __name__ == '__main__': | |
main() | |
''' | |
$ python quickstart.py | |
Files: | |
ConvertQiitaTest / drive#file (13fIQrI63uDZQDfxDn5vCmo9YQ9ZrNsGu) | |
https://drive.google.com/drive/folders/13fIQrI63uDZQDfxDn5vCmo9YQ9ZrNsGu | |
Getting started / drive#file (0BzADz7IRjMQ0c3RhcnRlcl9maWxlX2Rhc2hlclYw) | |
https://drive.google.com/a/sider.review/file/d/0BzADz7IRjMQ0c3RhcnRlcl9maWxlX2Rhc2hlclYw/view?usp=drivesdk | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Copyright (c) 2017 Chris Patuzzo | |
https://twitter.com/chrispatuzzo | |
Permission is hereby granted, free of charge, to any person obtaining a copy | |
of this software and associated documentation files (the "Software"), to deal | |
in the Software without restriction, including without limitation the rights | |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
copies of the Software, and to permit persons to whom the Software is | |
furnished to do so, subject to the following conditions: | |
The above copyright notice and this permission notice shall be included in all | |
copies or substantial portions of the Software. | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
SOFTWARE. | |
*/ | |
body { | |
font-family: Helvetica, arial, sans-serif; | |
font-size: 14px; | |
line-height: 1.6; | |
padding-top: 10px; | |
padding-bottom: 10px; | |
background-color: white; | |
padding: 30px; | |
color: #333; | |
} | |
body > *:first-child { | |
margin-top: 0 !important; | |
} | |
body > *:last-child { | |
margin-bottom: 0 !important; | |
} | |
a { | |
color: #4183C4; | |
text-decoration: none; | |
} | |
a.absent { | |
color: #cc0000; | |
} | |
a.anchor { | |
display: block; | |
padding-left: 30px; | |
margin-left: -30px; | |
cursor: pointer; | |
position: absolute; | |
top: 0; | |
left: 0; | |
bottom: 0; | |
} | |
h1, h2, h3, h4, h5, h6 { | |
margin: 20px 0 10px; | |
padding: 0; | |
font-weight: bold; | |
-webkit-font-smoothing: antialiased; | |
cursor: text; | |
position: relative; | |
} | |
h2:first-child, h1:first-child, h1:first-child + h2, h3:first-child, h4:first-child, h5:first-child, h6:first-child { | |
margin-top: 0; | |
padding-top: 0; | |
} | |
h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, h5:hover a.anchor, h6:hover a.anchor { | |
text-decoration: none; | |
} | |
h1 tt, h1 code { | |
font-size: inherit; | |
} | |
h2 tt, h2 code { | |
font-size: inherit; | |
} | |
h3 tt, h3 code { | |
font-size: inherit; | |
} | |
h4 tt, h4 code { | |
font-size: inherit; | |
} | |
h5 tt, h5 code { | |
font-size: inherit; | |
} | |
h6 tt, h6 code { | |
font-size: inherit; | |
} | |
h1 { | |
font-size: 28px; | |
color: black; | |
} | |
h2 { | |
font-size: 24px; | |
border-bottom: 1px solid #cccccc; | |
color: black; | |
} | |
h3 { | |
font-size: 18px; | |
} | |
h4 { | |
font-size: 16px; | |
} | |
h5 { | |
font-size: 14px; | |
} | |
h6 { | |
color: #777777; | |
font-size: 14px; | |
} | |
p, blockquote, ul, ol, dl, li, table, pre { | |
margin: 15px 0; | |
} | |
hr { | |
border: 0 none; | |
color: #cccccc; | |
height: 4px; | |
padding: 0; | |
} | |
body > h2:first-child { | |
margin-top: 0; | |
padding-top: 0; | |
} | |
body > h1:first-child { | |
margin-top: 0; | |
padding-top: 0; | |
} | |
body > h1:first-child + h2 { | |
margin-top: 0; | |
padding-top: 0; | |
} | |
body > h3:first-child, body > h4:first-child, body > h5:first-child, body > h6:first-child { | |
margin-top: 0; | |
padding-top: 0; | |
} | |
a:first-child h1, a:first-child h2, a:first-child h3, a:first-child h4, a:first-child h5, a:first-child h6 { | |
margin-top: 0; | |
padding-top: 0; | |
} | |
h1 p, h2 p, h3 p, h4 p, h5 p, h6 p { | |
margin-top: 0; | |
} | |
li p.first { | |
display: inline-block; | |
} | |
ul, ol { | |
padding-left: 30px; | |
} | |
ul :first-child, ol :first-child { | |
margin-top: 0; | |
} | |
ul :last-child, ol :last-child { | |
margin-bottom: 0; | |
} | |
dl { | |
padding: 0; | |
} | |
dl dt { | |
font-size: 14px; | |
font-weight: bold; | |
font-style: italic; | |
padding: 0; | |
margin: 15px 0 5px; | |
} | |
dl dt:first-child { | |
padding: 0; | |
} | |
dl dt > :first-child { | |
margin-top: 0; | |
} | |
dl dt > :last-child { | |
margin-bottom: 0; | |
} | |
dl dd { | |
margin: 0 0 15px; | |
padding: 0 15px; | |
} | |
dl dd > :first-child { | |
margin-top: 0; | |
} | |
dl dd > :last-child { | |
margin-bottom: 0; | |
} | |
blockquote { | |
border-left: 4px solid #dddddd; | |
padding: 0 15px; | |
color: #777777; | |
} | |
blockquote > :first-child { | |
margin-top: 0; | |
} | |
blockquote > :last-child { | |
margin-bottom: 0; | |
} | |
table { | |
padding: 0; | |
} | |
table tr { | |
border-top: 1px solid #cccccc; | |
background-color: white; | |
margin: 0; | |
padding: 0; | |
} | |
table tr:nth-child(2n) { | |
background-color: #f8f8f8; | |
} | |
table tr th { | |
font-weight: bold; | |
border: 1px solid #cccccc; | |
text-align: left; | |
margin: 0; | |
padding: 6px 13px; | |
} | |
table tr td { | |
border: 1px solid #cccccc; | |
text-align: left; | |
margin: 0; | |
padding: 6px 13px; | |
} | |
table tr th :first-child, table tr td :first-child { | |
margin-top: 0; | |
} | |
table tr th :last-child, table tr td :last-child { | |
margin-bottom: 0; | |
} | |
img { | |
max-width: 100%; | |
} | |
span.frame { | |
display: block; | |
overflow: hidden; | |
} | |
span.frame > span { | |
border: 1px solid #dddddd; | |
display: block; | |
float: left; | |
overflow: hidden; | |
margin: 13px 0 0; | |
padding: 7px; | |
width: auto; | |
} | |
span.frame span img { | |
display: block; | |
float: left; | |
} | |
span.frame span span { | |
clear: both; | |
color: #333333; | |
display: block; | |
padding: 5px 0 0; | |
} | |
span.align-center { | |
display: block; | |
overflow: hidden; | |
clear: both; | |
} | |
span.align-center > span { | |
display: block; | |
overflow: hidden; | |
margin: 13px auto 0; | |
text-align: center; | |
} | |
span.align-center span img { | |
margin: 0 auto; | |
text-align: center; | |
} | |
span.align-right { | |
display: block; | |
overflow: hidden; | |
clear: both; | |
} | |
span.align-right > span { | |
display: block; | |
overflow: hidden; | |
margin: 13px 0 0; | |
text-align: right; | |
} | |
span.align-right span img { | |
margin: 0; | |
text-align: right; | |
} | |
span.float-left { | |
display: block; | |
margin-right: 13px; | |
overflow: hidden; | |
float: left; | |
} | |
span.float-left span { | |
margin: 13px 0 0; | |
} | |
span.float-right { | |
display: block; | |
margin-left: 13px; | |
overflow: hidden; | |
float: right; | |
} | |
span.float-right > span { | |
display: block; | |
overflow: hidden; | |
margin: 13px auto 0; | |
text-align: right; | |
} | |
code, tt { | |
margin: 0 2px; | |
padding: 0 5px; | |
white-space: nowrap; | |
border: 1px solid #eaeaea; | |
background-color: #f8f8f8; | |
border-radius: 3px; | |
} | |
pre code { | |
margin: 0; | |
padding: 0; | |
white-space: pre; | |
border: none; | |
background: transparent; | |
} | |
.highlight pre { | |
background-color: #f8f8f8; | |
border: 1px solid #cccccc; | |
font-size: 13px; | |
line-height: 19px; | |
overflow: auto; | |
padding: 6px 10px; | |
border-radius: 3px; | |
} | |
pre { | |
background-color: #f8f8f8; | |
border: 1px solid #cccccc; | |
font-size: 13px; | |
line-height: 19px; | |
overflow: auto; | |
padding: 6px 10px; | |
border-radius: 3px; | |
} | |
pre code, pre tt { | |
background-color: transparent; | |
border: none; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import os, json, textwrap | |
from pathlib import Path | |
''' | |
- jsonを読みこむ | |
- jsonをパースする | |
- jsonから要素を抽出する | |
- 要素をファイルに書き出してみる | |
- htmlで開いてみる | |
- テンプレートを用意する | |
- テンプレートを開いて要素をはめこむ | |
- テンプレートを適用させて保存する | |
- コメント部分を抽出する | |
- コメント用のテンプレートを用意する | |
- コメント部分をはめ込む | |
- テスト用ディレクトリを作る | |
- htmlをGoogleDriveにアップする | |
''' | |
class ParseQiita: | |
def hello(self): | |
print("Hello World!") | |
def load_json(self, file): | |
print(file) | |
json_data = open(file).read() | |
data = json.loads(json_data) | |
return data | |
def read_element_from_json(self, file, element_name='id'): | |
data = self.load_json(file=file) | |
return data[element_name] | |
def get_comments(self, data): | |
return data['comments'] | |
def generate_tags_content(self, data): | |
tags = data['tags'] | |
all_tags = '' | |
for tag in tags: | |
tag_content = "<span class='it-Tags_item'>{name}</span>\n".format(name=tag['name']) | |
all_tags = all_tags + tag_content | |
return all_tags | |
def generate_comment_content(self, data): | |
comments = self.get_comments(data) | |
all_comments = '' | |
for comment in comments: | |
comment_body = textwrap.dedent(''' | |
<div class="comment"> | |
<div class="comment_content"> | |
<div class="commentHeader"> | |
<div class="commentHeader_creator">@{user_id}</div> | |
<div class="commentHeader_metadata pull-right">{updated_at}</div> | |
</div> | |
<div class="markdownContent"> | |
{rendered_body} | |
</div> | |
</div> | |
</div> | |
''').format(rendered_body=comment['rendered_body'], user_id=comment['user']['id'], updated_at=comment['updated_at']) | |
all_comments = all_comments + comment_body | |
return all_comments | |
def target_files(self): | |
p = Path("qiitateam_sideci/articles/") | |
return list(p.glob("*.json")) | |
def save_all_files(self, files): | |
for file in files: | |
file = str(file) | |
print('hoge' + file) | |
saved_file = self.save_file(file, ext='html') | |
print(saved_file) | |
return True | |
def save_file(self, file_name, ext='txt'): | |
''' | |
書き出す内容、ファイル名、拡張子を指定して書き出す | |
''' | |
new_dir_path = 'results' | |
if not os.path.exists(new_dir_path): | |
os.makedirs(new_dir_path) | |
data = self.load_json(file=file_name) | |
comment_contents = self.generate_comment_content(data) | |
user = data['user']['id'] | |
title = data['title'] | |
updated_at = data['updated_at'] | |
body = data['rendered_body'] | |
url = data['url'] | |
tag_contents = self.generate_tags_content(data) | |
# 書き込みモードでopen | |
name_ext = os.path.splitext(os.path.basename(file_name)) | |
file = new_dir_path + "/{0}.{1}".format(name_ext[0], ext) | |
new_file = open(file, 'w', encoding='utf-8') | |
write_content = self.load_template().format(rendered_body=body, url=url, tag_contents=tag_contents, | |
comment_contents=comment_contents, title=title, user_id=user, updated_at=updated_at) | |
new_file.write(write_content) | |
new_file.close() | |
return file | |
def load_template(self, template_name='article_template.html'): | |
template = open(template_name, 'r', encoding='utf-8') | |
return template.read() | |
def parse(dry_run=False): | |
''' | |
クラスの動作確認様 | |
''' | |
p = ParseQiita() | |
list = p.target_files() | |
print('Target files: {0}'.format(len(list))) | |
if dry_run: | |
p.save_all_files(list) | |
print('Converted.') | |
def main(): | |
return parse() | |
if __name__== "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
from googleapiclient.discovery import build | |
from httplib2 import Http | |
from oauth2client import file, client, tools | |
# If modifying these scopes, delete the file token.json. | |
SCOPES = 'https://www.googleapis.com/auth/drive.metadata.readonly' | |
def main(): | |
"""Shows basic usage of the Drive v3 API. | |
Prints the names and ids of the first 10 files the user has access to. | |
""" | |
# ローカルにあるtokenをまず確認 / invalidならリフレッシュ | |
store = file.Storage('token.json') | |
creds = store.get() | |
if not creds or creds.invalid: | |
flow = client.flow_from_clientsecrets('credentials.json', SCOPES) | |
creds = tools.run_flow(flow, store) | |
service = build('drive', 'v3', http=creds.authorize(Http())) | |
# Call the Drive v3 API | |
# q='....' は検索クエリを入れる。この場合は、ownerが自分のものだけをリストアップ | |
results = service.files().list( | |
q="'akiko@sider.review' in owners", | |
pageSize=10, fields="nextPageToken, files(id, kind, name, webViewLink, mimeType)").execute() | |
items = results.get('files', []) | |
if not items: | |
print('No files found.') | |
else: | |
print('Files:') | |
for item in items: | |
print('{0} / {1} ({2})\n\t{3}'.format(item['name'], item['kind'], item['id'], item['webViewLink'], item['mimeType'])) | |
if __name__ == '__main__': | |
main() | |
''' | |
$ python quickstart.py | |
Files: | |
ConvertQiitaTest / drive#file (13fIQrI63uDZQDfxDn5vCmo9YQ9ZrNsGu) | |
https://drive.google.com/drive/folders/13fIQrI63uDZQDfxDn5vCmo9YQ9ZrNsGu | |
Getting started / drive#file (0BzADz7IRjMQ0c3RhcnRlcl9maWxlX2Rhc2hlclYw) | |
https://drive.google.com/a/sider.review/file/d/0BzADz7IRjMQ0c3RhcnRlcl9maWxlX2Rhc2hlclYw/view?usp=drivesdk | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.teamAccountSwitch, .teamArticle, .teamCategoryBrowser, .teamComments, .teamDrafts, .teamGroupMembers, .teamInvitation, .teamItems, .memberList, .teamPatch, .teamPatches, .teamRecordList, .teamRevisions_main, .teamSidebarContainer_announcementSection, .teamSidebarContainer_main_inner, .teamTemplateForm, .teamTemplates { | |
background-color: #fff; | |
border-radius: 4px; | |
border: solid 1px #ddd; | |
padding: 36px; | |
} | |
.code-frame { | |
border-radius: 3px; | |
margin: 1em 0; | |
background-color: #f7f7f7; | |
line-height: 0; | |
overflow: hidden; | |
} | |
.highlight { | |
background: #fff; | |
} | |
.code-frame pre { | |
border-radius: 3px; | |
border: 0; | |
margin: 0; | |
padding: .6em 1.2em; | |
line-height: 1.2; | |
background-color: #f7f7f7; | |
overflow-x: auto; | |
word-wrap: normal; | |
white-space: pre; | |
font-size: .8em; | |
font-family: Consolas,Liberation Mono,Menlo,Courier,monospace; | |
} | |
code { | |
padding: 2px 4px; | |
color: #4a4a4a; | |
background-color: #f7f7f7; | |
border-radius: 3px; | |
font-size: .9em; | |
} | |
.comment_content { | |
border-radius: 3px; | |
position: relative; | |
background-color: #fff; | |
border: solid 1px #ccc; | |
margin-top: 11px; | |
margin-bottom: 20px; | |
} | |
.comment_content .markdownContent, .comment_content .references { | |
padding: 16px; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
1. 単体のjson
2. ディレクトリ以下のjson
3. GoogleDocsにアップ
テスト用ディレクトリを作成する(手動)
資料になるドキュメントを探す
Google APIを有効にする設定(APIキー取得とか)
Credentialを利用した読み取り -> OK
ファイルのアップロードのテスト
一括でアップロードできるか、1ページずつちまちまアップできるか考える
4. タグ、関連コンテンツ、画像の調整
いろいろ。。。