DanielOaks/gensite.py

## gensite.py
#!/usr/bin/env python3
"""gensite.py

Usage:
    gensite.py <group> <source-dir>
    gensite.py -h | --help
    gensite.py --version

Options:
    <source-dir>    Where the directory we'll be grabbing files from exists.
    -h --help       Show this screen.
    --version       Show version.
"""
import os
import email
import pytz
import dateparser
import dateutil.parser
from docopt import docopt

arguments = docopt(__doc__, version='gensite.py 1.0')

sorted_threads = []
threads = {}
for (dirpath, dirnames, filenames) in os.walk(arguments['<source-dir>']):
    for dirname in dirnames:
        threads[dirname] = {
            'name': '',
            'datetime': None,
            'posts': {},
            'sorted_posts': [],
        }
        for (dpath, dnames, fnames) in os.walk(os.path.join(dirpath, dirname)):
            for post in fnames:
                threads[dirname]['posts'][post] = email.message_from_string(open(os.path.join(dpath, post)).read())

        thread_time = dateutil.parser.parse('3000-12-25').replace(tzinfo=pytz.UTC)
        sorted_posts = []
        for post_id, info in threads[dirname]['posts'].items():
            if info['Date'] == None:
                post_time = thread_time
            else:
                date_string = info['Date'].strip().replace('UNDEFINED', 'UTC').replace('pacific', 'PST').replace('Central', 'CST').replace('PACIFIC', 'PST').replace('est', 'EST').replace('-600', '-0600')
                try:
                    post_time = dateutil.parser.parse(date_string)
                except:
                    post_time = dateparser.parse(date_string)
                    if post_time is None:
                        post_time = thread_time
                        print('fucking date format is broke:', date_string)
            if post_time.tzinfo is None:
                post_time = post_time.replace(tzinfo=pytz.UTC)
            sorted_posts.append([post_time, post_id])
            if post_time < thread_time:
                thread_time = post_time
        threads[dirname]['sorted_posts'] = sorted(sorted_posts)
        threads[dirname]['datetime'] = thread_time

        first_post_id = threads[dirname]['sorted_posts'][0][1]
        first_post_subject = threads[dirname]['posts'][first_post_id]['Subject'].split('\n')[0]
        while first_post_subject.startswith('Re: '):
            first_post_subject = first_post_subject[4:]
        threads[dirname]['name'] = first_post_subject

        sorted_threads.append([thread_time, dirname])

# make index
thread_index_html = ''
for time, thread_id in sorted(sorted_threads):
    thread_index_html += '<div>{} - <a href="./threads/{}.html">{}</a> - {} replies</div>\n'.format(time, thread_id, threads[thread_id]['name'], len(threads[thread_id]['posts']))

with open(os.path.join('gen-{}'.format(arguments['<group>']), 'index.html'), 'w') as index:
    index.write("""
    <html>
        <body>
            <h1>{board}</h1>
            {threads}
        </body>
    </html>
    """.format(board=arguments['<group>'], threads=thread_index_html))

#TODO(dan): make paginated thread index

# make threads themselves
for thread_id, thread in threads.items():
    posts_html = ''
    for post_time, post_id in thread['sorted_posts']:
        post = thread['posts'][post_id]
        posts_html += '<div style="font-family: monospace">{}</div>'.format(str(post).replace('\n', '<br/>')) + '<hr/>\n'

    with open(os.path.join('gen-{}'.format(arguments['<group>']), 'threads', '{}.html'.format(thread_id)), 'w') as page:
        page.write("""
    <html>
        <body>
            <h1>{board}</h1>
            <h2>{thread_name}</h2>
            <span class="date">{datetime}</span>
            {posts}
        </body>
    </html>
""".format(board=arguments['<group>'], thread_name=thread['name'], datetime=thread['datetime'], posts=posts_html))


    thread_index_html += '<div>{} - <a href="./threads/{}.html">{}</a> - {} replies</div>\n'.format(time, thread_id, threads[thread_id]['name'], len(threads[thread_id]['posts']))


print('I have', len(threads), 'threads')
	#!/usr/bin/env python3
	"""gensite.py

	Usage:
	gensite.py <group> <source-dir>
	gensite.py -h \| --help
	gensite.py --version

	Options:
	<source-dir> Where the directory we'll be grabbing files from exists.
	-h --help Show this screen.
	--version Show version.
	"""
	import os
	import email
	import pytz
	import dateparser
	import dateutil.parser
	from docopt import docopt

	arguments = docopt(__doc__, version='gensite.py 1.0')

	sorted_threads = []
	threads = {}
	for (dirpath, dirnames, filenames) in os.walk(arguments['<source-dir>']):
	for dirname in dirnames:
	threads[dirname] = {
	'name': '',
	'datetime': None,
	'posts': {},
	'sorted_posts': [],
	}
	for (dpath, dnames, fnames) in os.walk(os.path.join(dirpath, dirname)):
	for post in fnames:
	threads[dirname]['posts'][post] = email.message_from_string(open(os.path.join(dpath, post)).read())

	thread_time = dateutil.parser.parse('3000-12-25').replace(tzinfo=pytz.UTC)
	sorted_posts = []
	for post_id, info in threads[dirname]['posts'].items():
	if info['Date'] == None:
	post_time = thread_time
	else:
	date_string = info['Date'].strip().replace('UNDEFINED', 'UTC').replace('pacific', 'PST').replace('Central', 'CST').replace('PACIFIC', 'PST').replace('est', 'EST').replace('-600', '-0600')
	try:
	post_time = dateutil.parser.parse(date_string)
	except:
	post_time = dateparser.parse(date_string)
	if post_time is None:
	post_time = thread_time
	print('fucking date format is broke:', date_string)
	if post_time.tzinfo is None:
	post_time = post_time.replace(tzinfo=pytz.UTC)
	sorted_posts.append([post_time, post_id])
	if post_time < thread_time:
	thread_time = post_time
	threads[dirname]['sorted_posts'] = sorted(sorted_posts)
	threads[dirname]['datetime'] = thread_time

	first_post_id = threads[dirname]['sorted_posts'][0][1]
	first_post_subject = threads[dirname]['posts'][first_post_id]['Subject'].split('\n')[0]
	while first_post_subject.startswith('Re: '):
	first_post_subject = first_post_subject[4:]
	threads[dirname]['name'] = first_post_subject

	sorted_threads.append([thread_time, dirname])

	# make index
	thread_index_html = ''
	for time, thread_id in sorted(sorted_threads):
	thread_index_html += '<div>{} - <a href="./threads/{}.html">{}</a> - {} replies</div>\n'.format(time, thread_id, threads[thread_id]['name'], len(threads[thread_id]['posts']))

	with open(os.path.join('gen-{}'.format(arguments['<group>']), 'index.html'), 'w') as index:
	index.write("""
	<html>
	<body>
	<h1>{board}</h1>
	{threads}
	</body>
	</html>
	""".format(board=arguments['<group>'], threads=thread_index_html))

	#TODO(dan): make paginated thread index

	# make threads themselves
	for thread_id, thread in threads.items():
	posts_html = ''
	for post_time, post_id in thread['sorted_posts']:
	post = thread['posts'][post_id]
	posts_html += '<div style="font-family: monospace">{}</div>'.format(str(post).replace('\n', '<br/>')) + '<hr/>\n'

	with open(os.path.join('gen-{}'.format(arguments['<group>']), 'threads', '{}.html'.format(thread_id)), 'w') as page:
	page.write("""
	<html>
	<body>
	<h1>{board}</h1>
	<h2>{thread_name}</h2>
	<span class="date">{datetime}</span>
	{posts}
	</body>
	</html>
	""".format(board=arguments['<group>'], thread_name=thread['name'], datetime=thread['datetime'], posts=posts_html))


	thread_index_html += '<div>{} - <a href="./threads/{}.html">{}</a> - {} replies</div>\n'.format(time, thread_id, threads[thread_id]['name'], len(threads[thread_id]['posts']))



	print('I have', len(threads), 'threads')