Skip to content

Instantly share code, notes, and snippets.

@lackofdream
Last active February 24, 2016 06:11
Show Gist options
  • Save lackofdream/1457484a5a1889ba7ba0 to your computer and use it in GitHub Desktop.
Save lackofdream/1457484a5a1889ba7ba0 to your computer and use it in GitHub Desktop.
\xd1\xa7\xc9\xfa\xb8\xf6\xc8\xcb\xd0\xc5\xcf\xa2\xcf\xb5\xcd\xb3
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import re
def get_start_day():
return datetime(2016, 2, 29)
def date_to_str(dt):
return dt.strftime('%Y%m%dT%H%M%S')
bydays = ['SU', 'MO', 'TU', 'WE', 'TH', 'FR', 'SA', 'SU']
def ics_head() -> str:
return """BEGIN:VCALENDAR
PRODID:-//Ning Humbert//Schedule2ICS//ZH
VERSION:0.1
CALSCALE:GREGORIAN
METHOD:PUBLISH
BEGIN:VTIMEZONE
TZID:Asia/Shanghai
BEGIN:STANDARD
TZOFFSETFROM:+0800
TZOFFSETTO:+0800
TZNAME:CST
DTSTART:19700101T000000
END:STANDARD
END:VTIMEZONE
"""
def ics_tail() -> str:
return "END:VCALENDAR\n"
def gen_ics(body: str):
with open('test.ics', 'w') as f:
f.write(ics_head() + body + ics_tail())
def read_html_to_bs(path: str):
with open(path, 'rb') as f:
bin_data = f.read()
try:
html_data = bin_data.decode()
except UnicodeDecodeError:
html_data = bin_data.decode('gbk')
return BeautifulSoup(html_data, 'lxml')
def find_table(bs):
ts = bs.findAll('table')
for t in reversed(ts):
if t.attrs.get('bgcolor') == '#F2EDF8':
return t
return None
def find_courses(table):
trs = table.findAll('tr')
i = 0
for tr in trs:
if len(tr.findAll('td')) != 9:
break
i += 1
return trs[:i] + trs[i + 1:]
def vanish_dash(maybe_dash: str) -> list:
found = maybe_dash.find('-')
try:
if found == -1:
return [int(maybe_dash)]
start_n = int(maybe_dash[:found])
end_n = int(maybe_dash[found + 1:])
ret = []
for i in range(start_n, end_n + 1):
ret.append(i)
return ret
except ValueError:
return []
def generate_dash(none_dash: list) -> list:
if len(none_dash) == 0:
return []
ret = []
none_dash += [1000]
tmp_start = none_dash[0]
tmp_end = none_dash[0]
for i in none_dash[1:]:
if i - tmp_end == 1:
tmp_end = i
else:
ret.append((tmp_start, tmp_end))
tmp_start = tmp_end = i
return ret
def handle_date(date: str):
maybe_dashes = re.findall('(\d+\-\d+|\d+)', date)
ret = []
for i in maybe_dashes:
ret += vanish_dash(i)
return generate_dash(ret)
def get_time(start_week: int, course_time: str):
dash_pos = course_time.find('-')
assert dash_pos != -1
course_day = bydays[int(course_time[:dash_pos])]
course_seq = int(course_time[dash_pos + 1:])
that_day = get_start_day() + timedelta(weeks=start_week - 1) + timedelta(days=int(course_time[:dash_pos]) - 1)
assert isinstance(that_day, datetime)
if course_seq == 1:
that_start_time = datetime(
year=that_day.year,
month=that_day.month,
day=that_day.day,
hour=8
)
that_end_time = datetime(
year=that_day.year,
month=that_day.month,
day=that_day.day,
hour=9,
minute=45
)
elif course_seq == 2:
that_start_time = datetime(
year=that_day.year,
month=that_day.month,
day=that_day.day,
hour=10,
minute=5
)
that_end_time = datetime(
year=that_day.year,
month=that_day.month,
day=that_day.day,
hour=11,
minute=50
)
elif course_seq == 3:
that_start_time = datetime(
year=that_day.year,
month=that_day.month,
day=that_day.day,
hour=14
)
that_end_time = datetime(
year=that_day.year,
month=that_day.month,
day=that_day.day,
hour=15,
minute=45
)
elif course_seq == 4:
that_start_time = datetime(
year=that_day.year,
month=that_day.month,
day=that_day.day,
hour=16,
minute=5
)
that_end_time = datetime(
year=that_day.year,
month=that_day.month,
day=that_day.day,
hour=17,
minute=50
)
elif course_seq == 5:
that_start_time = datetime(
year=that_day.year,
month=that_day.month,
day=that_day.day,
hour=18,
minute=40
)
that_end_time = datetime(
year=that_day.year,
month=that_day.month,
day=that_day.day,
hour=20,
minute=25
)
else:
that_start_time = datetime(
year=that_day.year,
month=that_day.month,
day=that_day.day,
hour=20,
minute=35
)
that_end_time = datetime(
year=that_day.year,
month=that_day.month,
day=that_day.day,
hour=22,
minute=20
)
return {
'byday': course_day,
'start_time': that_start_time,
'end_time': that_end_time
}
def handle_a_course(_course):
tds = _course.findAll('td')
assert len(tds) == 9
course_name = tds[0].get_text()[:-1]
course_id = tds[2].get_text()[:-1]
course_nid = tds[3].get_text()[:-1]
# course_is_must = tds[4].get_text()[:-1]
# course_is_exam = tds[5].get_text()[:-1]
course_location = tds[6].get_text()[:-1]
course_time = tds[7].get_text()[:-1]
course_date = tds[8].get_text()[:-1]
course_uid = course_id + course_nid + course_time
ret = ""
date_segments = handle_date(course_date)
for date_segment in date_segments:
time_info = get_time(date_segment[0], course_time)
ret += """BEGIN:VEVENT
DTSTART;TZID=Asia/Shanghai:{start_time}
DTEND;TZID=Asia/Shanghai:{end_time}
RRULE:FREQ=WEEKLY;COUNT={count};BYDAY={byday}
UID:{course_uid}
DESCRIPTION:
LOCATION:{location}
SEQUENCE:1
STATUS:CONFIRMED
SUMMARY:{course_name}
TRANSP:OPAQUE
END:VEVENT
""".format(
start_time=date_to_str(time_info.get('start_time')),
end_time=date_to_str(time_info.get('end_time')),
byday=time_info.get('byday'),
course_uid=course_uid + str(date_segment[0]),
location=course_location,
course_name=course_name,
count=date_segment[1] - date_segment[0] + 1
)
return ret
if __name__ == '__main__':
all_course = ""
courses = find_courses(find_table(read_html_to_bs('b.html')))
for course in courses:
all_course += handle_a_course(course)
gen_ics(all_course)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment