Skip to content

Instantly share code, notes, and snippets.

@epoz
Created October 1, 2012 19:02

Revisions

  1. epoz revised this gist Oct 1, 2012. 1 changed file with 3 additions and 1 deletion.
    4 changes: 3 additions & 1 deletion gimmesrc.py
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,6 @@
    #!/usr/bin/env python
    # Example: python gimmesrc.py De_Cive > txt

    import sys, urllib, urllib2

    URL = 'http://en.wikisource.org/w/index.php?action=raw&title='
    @@ -32,4 +34,4 @@
    if line.startswith('[['):
    in_contents = False
    if in_contents:
    print line
    print line
  2. epoz created this gist Oct 1, 2012.
    35 changes: 35 additions & 0 deletions gimmesrc.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,35 @@
    #!/usr/bin/env python
    import sys, urllib, urllib2

    URL = 'http://en.wikisource.org/w/index.php?action=raw&title='

    if __name__ == '__main__':
    title = sys.argv[1]
    title_parts = []
    data = urllib2.urlopen(URL+title).read()
    in_contents = False
    for line in data.split('\n'):
    if line.strip() == '===Contents===':
    in_contents = True
    continue
    if in_contents:
    if line == '':
    in_contents = False
    else:
    line = line.split('|')
    title_parts.append(line[0].strip('*['))
    for part in title_parts:
    if not part.startswith('/'):
    continue
    print part
    part = urllib.quote_plus(part.replace(' ', '_'))
    data = urllib2.urlopen(URL+title+part).read()
    in_contents = False
    for line in data.split('\n'):
    if line.strip() == '<div class="prose">':
    in_contents = True
    continue
    if line.startswith('[['):
    in_contents = False
    if in_contents:
    print line