/duolingo.recipe

## duolingo.recipe
"""
duolingo.com
"""

import json
import re
import urllib
from difflib import SequenceMatcher

from calibre.web.feeds.news import BasicNewsRecipe


__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'


"""
Download all Duolingo lesson tips and notes for your language
and turn them into a handy reference book!

USAGE:

1) you must have calibre installed. Download it from https://calibre-ebook.com/
Calibre is free, cross-platform e-book creation / conversion / management software.

2) go to https://www.duolingo.com/ , log in,
   and make sure you have switched to the language you want to download

3) from the command line, type

ebook-convert duolingo.recipe <outputfilename>.<ext> --username <myduolingousername> --password <myduolingopassword> -vv --test

where <ext> is the book format you want, that is, epub azw3 mobi pdf
if you omit <outputfilename> it will use the same name as input, i.e., duolingo

Example:

ebook-convert duolingo.recipe duo_french.epub --username bob --password mary123 -vv --test

4) This creates a test book called duo_french.epub with only 2 lessons naximum.
Open it. If everything looks good, run the command again without
the --test this time, i.e.,

ebook-convert duolingo.recipe duo_french.epub --username bob --password mary123 -vv

Notes:
a) The -vv tells it to spit out some possibly useful debug info.
b) Once you have the epub, you can convert it to other formats without running
the script (and downloading everything) again, like this:

ebook-convert duo_french.epub .mobi --username bob --password mary123 -vv
ebook-convert duo_french.epub .azw3 --username bob --password mary123 -vv
ebook-convert duo_french.epub .pdf --username bob --password mary123 -vv

Good luck! Contact heybart on reddit if you have a problem
"""

class DuolingoLessons(BasicNewsRecipe):

    # A few customizable options

    # how book title will appear
    # {} will be replaced with language name
    title_with_vocabs = u'{} grammar and vocabulary with Duolingo'
    title_no_vocabs   = u'Learning {} with Duolingo'

    # URL of image to use as the book cover
    # set to a web address or local file
    # example: 'http://i.imgur.com/KDslMRP.jpg' or 'c:/pics/owl.png'
    #   Windows users: use forward slashes / instead of backslashes \
    # set to 'auto' to use the default picture of the duolingo owl

    cover_url   = 'auto'

    # cover_url   = 'https://duolingo-images.s3.amazonaws.com/avatars/15224667/q13kbDuwyI/xlarge' # American English
    # cover_url   = 'http://d2rhekw5qr4gcj.cloudfront.net/img/400sqf/from/uploads/course_photos/8947308000160730232415.png' # spanish
    # cover_url   = 'http://i.imgur.com/ZUIigS0.png' # german
    # cover_url   = 'http://i.imgur.com/KDslMRP.jpg' #french
    # cover_url   = 'http://d2rhekw5qr4gcj.cloudfront.net/img/400sqf/from/uploads/course_photos/2656444000150612030056.png' # norwegian

    # description for your book, shows up in metadata
    description  = 'Duolingo Lesson Tips and Notes'

    # include lesson words? either True or False
    include_vocabs    = False

    # (if include_vocabs = True) include definitions of lessons words?
    include_defs      = True

    # (if include_defs = True) put definitions inline instead of popup footnotes?
    inline_defs       = False

    definition_color  = '#222'

    # symbol to indicate lesson has tips & notes
    dagger = u'\u2020'

    # ------------------------
    # don't mess with rest of this stuff unless you know what you're doing :)
    # see https://manual.calibre-ebook.com/news_recipe.html
    # for documentation on calibre recipe API
    # -------------------------
    __author__  = 'heybart on reddit'
    __version__ = '0.16.3a'

    # there should be no reason to change this
    index_url = 'https://www.duolingo.com'
    login_url = 'https://www.duolingo.com/login'

    max_articles_per_feed = 150
    no_stylesheets     = False
    no_javascript      = True
    needs_subscription = True

    extra_css = (
        '.calibre_navbar {display:none} '
        'a.sup, a sup { text-decoration: none !important; } '
        'sup.invis { color: white !important; } '
        'table { border-bottom: 1px solid #888; } '
        '.vocabs { border-bottom: 1px dotted #888; } '
        'li.vocab { margin-top: 3px; margin-bottom: 3px; } '
        '.vocab_word { font-size: 1em; font-weight: bold } '
        '.vocab_def { font-size: 0.90em; color: ' + definition_color + '; } '
        'th { border-bottom: 1px dotted #aaa; } td, th {padding: 5px; } '
        '.footnotes { page-break-before: always;} '
        'h1 { font-size: 1.3em; border-bottom: 1px solid #aaa;} '
        'h2 { font-size: 1.2em; } h3 { font-size: 1.15em; } '
        'h4 { font-size: 1.10em; } h5,h6 { font-size: 1.05em; } '
    )

    # green duolingo owl, no specific language
    default_cover_url = 'http://65.media.tumblr.com/5fd6b3ccc4e8c978c87f469b236558ad/tumblr_inline_mwkqv1OuOg1ss97ol.png'

    learning_language     = None
    learning_language_id  = None

    # indices to use for <a name=...> to ensure uniqueness
    a_indices = {}

    def get_browser(self):
        print('getbrowse')
        br = BasicNewsRecipe.get_browser(self)
        data = {'login': self.username, 'password': self.password}
        br.open(self.login_url, urllib.urlencode(data))
        return br

    def get_raw(self, url):
        br = BasicNewsRecipe.get_browser(self)
        return br.open(url).read()

    def get_json(self, url):
        return json.loads(self.get_raw(url))

    def lookup_definitions(self, learning_lang, from_lang, words):
        """
        use API call to look up definition of words
        learning_lang   := language to translate to
        from_lang       := language to translate from
        words           := list of words
        example:
            learning_lang   = "fr"
            from_lang       = "en"
            words           = ["me","femme","pays"]
            https://d2.duolingo.com/api/1/dictionary/hints/fr/en?tokens=["me","femme","pays"]
        returns
            {"me": ["me", "myself"], "pays": ["country", "countries", "land", "region", "village"], "femme": ["woman", "wife"]}
        """
        # flatten list
        words  = [item for sublist in words for item in sublist]
        params = json.dumps(words, separators=(',', ':'))
        url = 'https://d2.duolingo.com/api/1/dictionary/hints/{}/{}?tokens={}' \
            .format(learning_lang, from_lang, urlquote(params))
        return self.get_json(url)

    def make_anchor(self, prefix, name = ''):
        idx = self.a_indices.get(prefix, 1)
        self.a_indices[prefix] = idx + 1
        return '{}{}{}'.format(prefix, idx, re.sub(r'[^\w\d]', '', name))

    # def postprocess_book(self, oeb, opts, log):
    #     # Remove the superfluous extra feed page at the beginning of the book, replacing it
    #     # with the proper credits
    #     for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="ul"]'):
    #         item.getparent().remove(item)

    #     for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="p"]'):
    #         item.getparent().remove(item)

    # def postprocess_html(self, soup, first_fetch):
    #     try:
    #         with open('d:/python/duolingo/tmp/'+self.make_anchor('post_')+'.html', 'w') as myfile:
    #             myfile.write(soup.prettify())
    #     except:
    #         pass
    #     return soup

    def preprocess_raw_html(self, raw_html, url):
        """
        extract article title ('name') and the tips and notes ('explanation')
        and optionally vocabulary words ('lessonWords')
        from json and return result as html
        """

        def get_vocabs(lang_data):
            """return string of lesson vocabs built from lang_data
            """
            if not self.include_vocabs: return ('', '')
            # list of list of words, one list for each lesson
            word_lists = lang_data.get('lessonWords')
            if word_lists is None: return ('', '')
            vocab_section = self.make_anchor('voc_')
            vocabs = endnotes = ''
            if self.include_defs:
                defs = self.lookup_definitions(
                    self.learning_language,
                    lang_data['fromLanguage'], word_lists)
            for words in word_lists:
                if self.include_defs:
                    strng = sep = ''
                    for word in words:
                        if self.inline_defs:
                            strng += (
                                '{}&#9830;&nbsp;<span class="vocab_word">{}</span>: '
                                '<span class="vocab_def"> {}</span>').format(
                                sep, word, ', '.join(defs[word]))
                        else:
                            # kindle formats require a superscripted link from noteref
                            # to the footnote and a link back from footnote to noteref
                            # to make popup footnote work
                            fn = self.make_anchor('fn_', word)       # name for footnote
                            ref = self.make_anchor('ref_', word)     # name for noteref
                            strng += (
                                '{0}{3}<a id="{1}" href="#{2}" epub:type="noteref">'
                                '<sup>*</sup></a>').format(sep, ref, fn, word)
                            # ' <a href="#{4}">&#8629;</a></p>') \
                            endnotes += (
                                '<p id="{0}" epub:type="footnote">'
                                '<a href="#{3}">{1}</a>: {2}</p>') \
                                .format(fn, word, ', '.join(defs[word]), ref)
                        sep = ',&nbsp; '
                else:
                    strng = ', &nbsp;'.join(words)
                vocabs += '<li>{}</li>'.format(strng)
            vocabs = '<div class="vocabs" id="{}"><ol>{}</ol></div>'.format(vocab_section, vocabs)
            endnotes = '<aside epub:type="footnotes" class="footnotes">{}</aside>'.format(endnotes)
            return (vocabs, endnotes)

        def get_notes(lang_data):
            """process and return tips & notes from lang_data['explanation]
            """
            notes = lang_data.get('explanation', '')
            if len(notes) < 200: return ''
            # if notes contains a heading similar to title, remove it
            # because we'll add the title ourselves
            hreg = re.compile(r'^\s*\<h\d\>(.+?)\<\/h\d\>')
            m = hreg.match(notes)
            if m and similar(m.group(1), lang_data['name']):
                notes = hreg.sub('', notes)
            # strip out extraneous "<hr /> blah blah" near bottom
            notes = re.sub('\<hr \/\>.{,5}a href(.+)?$', '', notes, 1, re.DOTALL)
            # strip out extraneous "blah blah <hr />" near top
            notes = re.sub('^.{,100}\<hr \/\>', '', notes, 1, re.DOTALL)
            return notes

        try:
            lang_data = json.loads(raw_html).get('skills')[0]
        except:
            abort_article('Unexpected json data')
            return

        heading   = lang_data['name']
        notes     = get_notes(lang_data)
        (vocabs, endnotes) = get_vocabs(lang_data)

        if vocabs or notes:
            class_name = ''
            # if has both notes and vocabs add a link to the heading
            # so you can skip over the vocabs and jump to the notes
            if notes and vocabs:
                anchor = self.make_anchor('notes_', heading)
                heading += ' <a href="#{}">{}</a>'.format(anchor, self.dagger)
                notes  = '<a name="{0}" id="{0}"></a>{1}'.format(anchor, notes)
                class_name = 'has_notes'
            heading = '<h1 class="{}">{}</h1>'.format(class_name, heading)
            html  = (
                '<?xml version="1.0" encoding="utf-8"?>'
                '<html xmlns:epub="http://www.idpf.org/2007/ops">'
                '<head><title></title></head><body>{}{}{}{}</body></html>') \
                .format(heading, vocabs, notes, endnotes)
            try:
                with open('d:/python/duolingo/tmp/%s.html' % self.make_anchor('tmp_'), 'w') as myfile:
                    myfile.write(html)
            except:
                pass
            return html
        else:
            self.abort_article(heading + ' has no notes or vocabs.')

    def print_version(self, url):
        """
        change user facing url
        .../skill/<language>/<topic>
        to
        .../2016-04-13/skills?learningLanguage=<learning_lang>&urlName=<topic>
        this gives us the json data we really want
        """
        return re.sub(
            '/skill/[^/]+/',
            '/2016-04-13/skills?learningLanguage=' + self.learning_language_id + '&urlName=',
            url, 1)

    def populate_article_metadata(self, article, soup, first):
        """add dagger to title if h1 class == has_notes
        """
        h = soup.find('h1')
        if h and h.get('class') == 'has_notes':
            article.title = article.title + ' ' + self.dagger

    def get_learning_language_id(self):
        """
        look up the learning language id needed for the skill API call
        from the javascript struct duo.available_languages
        downloaded from home page. (Don't know of a more elegant way of getting this)
        usually it is the same as learning lang abbreviation but not always
        e.g., for Norwegian, language abbrev = nb, learning language id = no-BO
        """
        if self.learning_language_id is None:
            raw_str = self.get_raw(self.index_url)
            m = re.match(
                '.+duo\.available\_languages\s*\=\s*(\[(.+?)\])',
                raw_str, re.DOTALL)
            langs = json.loads(m.group(1))
            for lang in langs:
                if lang['key'] == self.learning_language:
                    self.learning_language_id = lang['learning_language_id']
                    self.log('learning_language_id: ', self.learning_language_id)
                    break
        # find nothing? well, let's hope for the best!
        if self.learning_language_id is None:
            self.learning_language_id = self.learning_language

    def parse_index(self):
        """
        get user data from which we get list of skills (i.e., lessons)
        as well as learning language, then build article list
        """
        print('parsei')

        user_data = self.get_json('https://www.duolingo.com/users/' + self.username)

        self.learning_language = user_data['learning_language']
        if not self.learning_language:
            abort_recipe_processing('Failed to get learning_language')
        self.log('learning_language detected: ', self.learning_language)

        self.get_learning_language_id()

        lang_data = user_data['language_data'][self.learning_language]
        lang_str  = lang_data['language_string']
        skills    = lang_data['skills']

        if self.include_vocabs:
            self.title = self.title_with_vocabs.format(lang_str)
        else:
            self.title = self.title_no_vocabs.format(lang_str)
        if self.cover_url == 'auto':
            self.cover_url = self.default_cover_url

        articles = []
        # skills needs to be sorted by y coord (position in tree), then x coord
        for skill in sorted(skills, key=lambda x: (x['coords_y'], x['coords_x'])):
            url = '{}/skill/{}/{}'.format(self.index_url,
                self.learning_language, urlquote(skill['url_title']))
            #url = '{}/2016-04-13/skills?learningLanguage={}&urlName={}'.format(
            #    self.index_url, self.learning_language_id, urlquote(skill['url_title']))
            # self.log('Found article:', url)
            articles.append({'title': skill['title'], 'url': url})
        return [(self.title, articles)]

def similar(a, b):
    """string a and b are similar if non junk chars SequenceMatcher ratio > .75
    """
    return SequenceMatcher(
        lambda x: x in ' -:_12345678890:/()[]?!', a.lower(), b.lower()
    ).ratio() >= 0.75

def urlquote(params):
    """safely quote url params with UTF-8 encoding
    """
    return urllib.quote_plus(params.encode('UTF-8'))
	"""
	duolingo.com
	"""

	import json
	import re
	import urllib
	from difflib import SequenceMatcher

	from calibre.web.feeds.news import BasicNewsRecipe


	__license__ = 'GPL v3'
	__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
	__docformat__ = 'restructuredtext en'


	"""
	Download all Duolingo lesson tips and notes for your language
	and turn them into a handy reference book!

	USAGE:

	1) you must have calibre installed. Download it from https://calibre-ebook.com/
	Calibre is free, cross-platform e-book creation / conversion / management software.

	2) go to https://www.duolingo.com/ , log in,
	and make sure you have switched to the language you want to download

	3) from the command line, type

	ebook-convert duolingo.recipe <outputfilename>.<ext> --username <myduolingousername> --password <myduolingopassword> -vv --test

	where <ext> is the book format you want, that is, epub azw3 mobi pdf
	if you omit <outputfilename> it will use the same name as input, i.e., duolingo

	Example:

	ebook-convert duolingo.recipe duo_french.epub --username bob --password mary123 -vv --test

	4) This creates a test book called duo_french.epub with only 2 lessons naximum.
	Open it. If everything looks good, run the command again without
	the --test this time, i.e.,

	ebook-convert duolingo.recipe duo_french.epub --username bob --password mary123 -vv

	Notes:
	a) The -vv tells it to spit out some possibly useful debug info.
	b) Once you have the epub, you can convert it to other formats without running
	the script (and downloading everything) again, like this:

	ebook-convert duo_french.epub .mobi --username bob --password mary123 -vv
	ebook-convert duo_french.epub .azw3 --username bob --password mary123 -vv
	ebook-convert duo_french.epub .pdf --username bob --password mary123 -vv

	Good luck! Contact heybart on reddit if you have a problem
	"""

	class DuolingoLessons(BasicNewsRecipe):

	# A few customizable options

	# how book title will appear
	# {} will be replaced with language name
	title_with_vocabs = u'{} grammar and vocabulary with Duolingo'
	title_no_vocabs = u'Learning {} with Duolingo'

	# URL of image to use as the book cover
	# set to a web address or local file
	# example: 'http://i.imgur.com/KDslMRP.jpg' or 'c:/pics/owl.png'
	# Windows users: use forward slashes / instead of backslashes \
	# set to 'auto' to use the default picture of the duolingo owl

	cover_url = 'auto'

	# cover_url = 'https://duolingo-images.s3.amazonaws.com/avatars/15224667/q13kbDuwyI/xlarge' # American English
	# cover_url = 'http://d2rhekw5qr4gcj.cloudfront.net/img/400sqf/from/uploads/course_photos/8947308000160730232415.png' # spanish
	# cover_url = 'http://i.imgur.com/ZUIigS0.png' # german
	# cover_url = 'http://i.imgur.com/KDslMRP.jpg' #french
	# cover_url = 'http://d2rhekw5qr4gcj.cloudfront.net/img/400sqf/from/uploads/course_photos/2656444000150612030056.png' # norwegian

	# description for your book, shows up in metadata
	description = 'Duolingo Lesson Tips and Notes'

	# include lesson words? either True or False
	include_vocabs = False

	# (if include_vocabs = True) include definitions of lessons words?
	include_defs = True

	# (if include_defs = True) put definitions inline instead of popup footnotes?
	inline_defs = False

	definition_color = '#222'

	# symbol to indicate lesson has tips & notes
	dagger = u'\u2020'

	# ------------------------
	# don't mess with rest of this stuff unless you know what you're doing :)
	# see https://manual.calibre-ebook.com/news_recipe.html
	# for documentation on calibre recipe API
	# -------------------------
	__author__ = 'heybart on reddit'
	__version__ = '0.16.3a'

	# there should be no reason to change this
	index_url = 'https://www.duolingo.com'
	login_url = 'https://www.duolingo.com/login'

	max_articles_per_feed = 150
	no_stylesheets = False
	no_javascript = True
	needs_subscription = True

	extra_css = (
	'.calibre_navbar {display:none} '
	'a.sup, a sup { text-decoration: none !important; } '
	'sup.invis { color: white !important; } '
	'table { border-bottom: 1px solid #888; } '
	'.vocabs { border-bottom: 1px dotted #888; } '
	'li.vocab { margin-top: 3px; margin-bottom: 3px; } '
	'.vocab_word { font-size: 1em; font-weight: bold } '
	'.vocab_def { font-size: 0.90em; color: ' + definition_color + '; } '
	'th { border-bottom: 1px dotted #aaa; } td, th {padding: 5px; } '
	'.footnotes { page-break-before: always;} '
	'h1 { font-size: 1.3em; border-bottom: 1px solid #aaa;} '
	'h2 { font-size: 1.2em; } h3 { font-size: 1.15em; } '
	'h4 { font-size: 1.10em; } h5,h6 { font-size: 1.05em; } '
	)

	# green duolingo owl, no specific language
	default_cover_url = 'http://65.media.tumblr.com/5fd6b3ccc4e8c978c87f469b236558ad/tumblr_inline_mwkqv1OuOg1ss97ol.png'

	learning_language = None
	learning_language_id = None

	# indices to use for <a name=...> to ensure uniqueness
	a_indices = {}

	def get_browser(self):
	print('getbrowse')
	br = BasicNewsRecipe.get_browser(self)
	data = {'login': self.username, 'password': self.password}
	br.open(self.login_url, urllib.urlencode(data))
	return br

	def get_raw(self, url):
	br = BasicNewsRecipe.get_browser(self)
	return br.open(url).read()

	def get_json(self, url):
	return json.loads(self.get_raw(url))

	def lookup_definitions(self, learning_lang, from_lang, words):
	"""
	use API call to look up definition of words
	learning_lang := language to translate to
	from_lang := language to translate from
	words := list of words
	example:
	learning_lang = "fr"
	from_lang = "en"
	words = ["me","femme","pays"]
	https://d2.duolingo.com/api/1/dictionary/hints/fr/en?tokens=["me","femme","pays"]
	returns
	{"me": ["me", "myself"], "pays": ["country", "countries", "land", "region", "village"], "femme": ["woman", "wife"]}
	"""
	# flatten list
	words = [item for sublist in words for item in sublist]
	params = json.dumps(words, separators=(',', ':'))
	url = 'https://d2.duolingo.com/api/1/dictionary/hints/{}/{}?tokens={}' \
	.format(learning_lang, from_lang, urlquote(params))
	return self.get_json(url)

	def make_anchor(self, prefix, name = ''):
	idx = self.a_indices.get(prefix, 1)
	self.a_indices[prefix] = idx + 1
	return '{}{}{}'.format(prefix, idx, re.sub(r'[^\w\d]', '', name))

	# def postprocess_book(self, oeb, opts, log):
	# # Remove the superfluous extra feed page at the beginning of the book, replacing it
	# # with the proper credits
	# for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="ul"]'):
	# item.getparent().remove(item)

	# for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="p"]'):
	# item.getparent().remove(item)

	# def postprocess_html(self, soup, first_fetch):
	# try:
	# with open('d:/python/duolingo/tmp/'+self.make_anchor('post_')+'.html', 'w') as myfile:
	# myfile.write(soup.prettify())
	# except:
	# pass
	# return soup

	def preprocess_raw_html(self, raw_html, url):
	"""
	extract article title ('name') and the tips and notes ('explanation')
	and optionally vocabulary words ('lessonWords')
	from json and return result as html
	"""

	def get_vocabs(lang_data):
	"""return string of lesson vocabs built from lang_data
	"""
	if not self.include_vocabs: return ('', '')
	# list of list of words, one list for each lesson
	word_lists = lang_data.get('lessonWords')
	if word_lists is None: return ('', '')
	vocab_section = self.make_anchor('voc_')
	vocabs = endnotes = ''
	if self.include_defs:
	defs = self.lookup_definitions(
	self.learning_language,
	lang_data['fromLanguage'], word_lists)
	for words in word_lists:
	if self.include_defs:
	strng = sep = ''
	for word in words:
	if self.inline_defs:
	strng += (
	'{}♦ <span class="vocab_word">{}</span>: '
	'<span class="vocab_def"> {}</span>').format(
	sep, word, ', '.join(defs[word]))
	else:
	# kindle formats require a superscripted link from noteref
	# to the footnote and a link back from footnote to noteref
	# to make popup footnote work
	fn = self.make_anchor('fn_', word) # name for footnote
	ref = self.make_anchor('ref_', word) # name for noteref
	strng += (
	'{0}{3}<a id="{1}" href="#{2}" epub:type="noteref">'
	'<sup>*</sup></a>').format(sep, ref, fn, word)
	# ' <a href="#{4}">↵</a></p>') \
	endnotes += (
	'<p id="{0}" epub:type="footnote">'
	'<a href="#{3}">{1}</a>: {2}</p>') \
	.format(fn, word, ', '.join(defs[word]), ref)
	sep = ',  '
	else:
	strng = ',  '.join(words)
	vocabs += '<li>{}</li>'.format(strng)
	vocabs = '<div class="vocabs" id="{}"><ol>{}</ol></div>'.format(vocab_section, vocabs)
	endnotes = '<aside epub:type="footnotes" class="footnotes">{}</aside>'.format(endnotes)
	return (vocabs, endnotes)

	def get_notes(lang_data):
	"""process and return tips & notes from lang_data['explanation]
	"""
	notes = lang_data.get('explanation', '')
	if len(notes) < 200: return ''
	# if notes contains a heading similar to title, remove it
	# because we'll add the title ourselves
	hreg = re.compile(r'^\s*\<h\d\>(.+?)\<\/h\d\>')
	m = hreg.match(notes)
	if m and similar(m.group(1), lang_data['name']):
	notes = hreg.sub('', notes)
	# strip out extraneous "<hr /> blah blah" near bottom
	notes = re.sub('\<hr \/\>.{,5}a href(.+)?$', '', notes, 1, re.DOTALL)
	# strip out extraneous "blah blah <hr />" near top
	notes = re.sub('^.{,100}\<hr \/\>', '', notes, 1, re.DOTALL)
	return notes

	try:
	lang_data = json.loads(raw_html).get('skills')[0]
	except:
	abort_article('Unexpected json data')
	return

	heading = lang_data['name']
	notes = get_notes(lang_data)
	(vocabs, endnotes) = get_vocabs(lang_data)

	if vocabs or notes:
	class_name = ''
	# if has both notes and vocabs add a link to the heading
	# so you can skip over the vocabs and jump to the notes
	if notes and vocabs:
	anchor = self.make_anchor('notes_', heading)
	heading += ' <a href="#{}">{}</a>'.format(anchor, self.dagger)
	notes = '<a name="{0}" id="{0}"></a>{1}'.format(anchor, notes)
	class_name = 'has_notes'
	heading = '<h1 class="{}">{}</h1>'.format(class_name, heading)
	html = (
	'<?xml version="1.0" encoding="utf-8"?>'
	'<html xmlns:epub="http://www.idpf.org/2007/ops">'
	'<head><title></title></head><body>{}{}{}{}</body></html>') \
	.format(heading, vocabs, notes, endnotes)
	try:
	with open('d:/python/duolingo/tmp/%s.html' % self.make_anchor('tmp_'), 'w') as myfile:
	myfile.write(html)
	except:
	pass
	return html
	else:
	self.abort_article(heading + ' has no notes or vocabs.')

	def print_version(self, url):
	"""
	change user facing url
	.../skill/<language>/<topic>
	to
	.../2016-04-13/skills?learningLanguage=<learning_lang>&urlName=<topic>
	this gives us the json data we really want
	"""
	return re.sub(
	'/skill/[^/]+/',
	'/2016-04-13/skills?learningLanguage=' + self.learning_language_id + '&urlName=',
	url, 1)

	def populate_article_metadata(self, article, soup, first):
	"""add dagger to title if h1 class == has_notes
	"""
	h = soup.find('h1')
	if h and h.get('class') == 'has_notes':
	article.title = article.title + ' ' + self.dagger

	def get_learning_language_id(self):
	"""
	look up the learning language id needed for the skill API call
	from the javascript struct duo.available_languages
	downloaded from home page. (Don't know of a more elegant way of getting this)
	usually it is the same as learning lang abbreviation but not always
	e.g., for Norwegian, language abbrev = nb, learning language id = no-BO
	"""
	if self.learning_language_id is None:
	raw_str = self.get_raw(self.index_url)
	m = re.match(
	'.+duo\.available\_languages\s\=\s(\[(.+?)\])',
	raw_str, re.DOTALL)
	langs = json.loads(m.group(1))
	for lang in langs:
	if lang['key'] == self.learning_language:
	self.learning_language_id = lang['learning_language_id']
	self.log('learning_language_id: ', self.learning_language_id)
	break
	# find nothing? well, let's hope for the best!
	if self.learning_language_id is None:
	self.learning_language_id = self.learning_language

	def parse_index(self):
	"""
	get user data from which we get list of skills (i.e., lessons)
	as well as learning language, then build article list
	"""
	print('parsei')

	user_data = self.get_json('https://www.duolingo.com/users/' + self.username)

	self.learning_language = user_data['learning_language']
	if not self.learning_language:
	abort_recipe_processing('Failed to get learning_language')
	self.log('learning_language detected: ', self.learning_language)

	self.get_learning_language_id()

	lang_data = user_data['language_data'][self.learning_language]
	lang_str = lang_data['language_string']
	skills = lang_data['skills']

	if self.include_vocabs:
	self.title = self.title_with_vocabs.format(lang_str)
	else:
	self.title = self.title_no_vocabs.format(lang_str)
	if self.cover_url == 'auto':
	self.cover_url = self.default_cover_url

	articles = []
	# skills needs to be sorted by y coord (position in tree), then x coord
	for skill in sorted(skills, key=lambda x: (x['coords_y'], x['coords_x'])):
	url = '{}/skill/{}/{}'.format(self.index_url,
	self.learning_language, urlquote(skill['url_title']))
	#url = '{}/2016-04-13/skills?learningLanguage={}&urlName={}'.format(
	# self.index_url, self.learning_language_id, urlquote(skill['url_title']))
	# self.log('Found article:', url)
	articles.append({'title': skill['title'], 'url': url})
	return [(self.title, articles)]

	def similar(a, b):
	"""string a and b are similar if non junk chars SequenceMatcher ratio > .75
	"""
	return SequenceMatcher(
	lambda x: x in ' -:_12345678890:/()[]?!', a.lower(), b.lower()
	).ratio() >= 0.75

	def urlquote(params):
	"""safely quote url params with UTF-8 encoding
	"""
	return urllib.quote_plus(params.encode('UTF-8'))