Skip to content

Instantly share code, notes, and snippets.

@kefi550
Created January 22, 2023 04:31
Show Gist options
  • Save kefi550/08ad68a4873e2f4dd65a0bdf662e66f2 to your computer and use it in GitHub Desktop.
Save kefi550/08ad68a4873e2f4dd65a0bdf662e66f2 to your computer and use it in GitHub Desktop.
Girls Girls ガルガルのツイートから撮可レギュレーションをパースする
import twitter
import json
import re
from dataclasses import dataclass
TWITTER_CONSUMER_KEY = '*'
TWITTER_CONSUMER_SECRET = '*'
TWITTER_ACCESS_TOKEN_KEY = '*'
TWITTER_ACCESS_TOKEN_SECRET = '*'
SCREEN_NAME = "Girls_Girlsinfo"
api = twitter.Api(consumer_key=TWITTER_CONSUMER_KEY,
consumer_secret=TWITTER_CONSUMER_SECRET,
access_token_key=TWITTER_ACCESS_TOKEN_KEY,
access_token_secret=TWITTER_ACCESS_TOKEN_SECRET,
)
@dataclass
class glgl_regulation():
group: str
event: str
dt: str
place: str
regulation_comment: str
def get_tweets():
timeline = api.GetUserTimeline(screen_name=SCREEN_NAME)
earliest_tweet = min(timeline, key=lambda x: x.id).id
while True:
tweets = api.GetUserTimeline(screen_name=SCREEN_NAME,max_id=earliest_tweet,count=200)
new_earliest = min(tweets, key=lambda x: x.id).id
if not tweets or new_earliest == earliest_tweet:
break
else:
earliest_tweet = new_earliest
timeline += tweets
return timeline
def parse_glgl_regulation(text: str) -> glgl_regulation:
line_splited_tweet = text.split('\n')
line_splited_tweet = list(filter(None, line_splited_tweet))
group_line = []
place_line = []
regulation_line = []
dt = line_splited_tweet[0]
event = line_splited_tweet[1]
for splited in line_splited_tweet:
if "さんです" in splited:
group_match = re.search('(.*)さんです.*', splited)
group_name = group_match.group(1)
group_line.append(group_name)
if "in " in splited:
place_match = re.search('in (.*)', splited)
place_txt = place_match.group(1)
place_line.append(place_txt)
if "撮影" in splited or "静止画" in splited:
regu_match = re.search('(.*)です.*', splited)
regu_txt = regu_match.group(1)
regulation_line.append(regu_txt)
if len(group_line) != 1:
print(group_line)
if len(place_line) != 1:
print(place_line)
if len(regulation_line) != 1:
regulation_line = [" ".join(regulation_line)]
regulation = regulation_line[0]
return glgl_regulation(group_line[0], event, dt, place_line[0], regulation_line[0])
if __name__ == "__main__":
timeline = get_tweets()
regulations = []
for tweet in timeline:
if "撮影" in tweet.text or "静止画" in tweet.text:
regu = parse_glgl_regulation(tweet.text)
regulations.append(regu)
regulations = sorted(regulations, key=lambda x: x.group)
print('| グループ名 | レギュレーション | 日時 | 場所 |')
print('|---|---|---|---|')
for r in regulations:
print('| ', r.group, ' | ', r.regulation_comment, ' | ', r.dt, ' | 'k r.place, ' |')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment