Last active
August 29, 2015 14:27
-
-
Save pluser/3e6a503929ec8cf71c9e to your computer and use it in GitHub Desktop.
Improvement of org-mode plugin in static blog generator Nikola.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import nikola.utils | |
logger = nikola.utils.get_logger('plugin.orgmode', nikola.utils.STDERR_HANDLER) | |
def __init__(self): | |
self.compile_html = self.compile_html_batch_mode | |
### compile regexp ### | |
for attrmarker in self.attrmarkers: | |
for index, regexp in enumerate(attrmarker['regexps']): | |
attrmarker['regexps'][index] = re.compile(regexp, re.MULTILINE|re.IGNORECASE) | |
for maskmarker in self.maskmarkers: | |
maskmarker['compiled'] = re.compile(maskmarker['begin']+r'.*?'+maskmarker['end'], re.DOTALL|re.IGNORECASE|re.MULTILINE) | |
self.logger.debug('Regexps are compiled.') | |
attrmarkers = ( | |
{'keyword': 'annotations', 'regexps': [r'^\.\.\s+annotations?:\s+(?P<value>.*)$', r'^\#\+ANNOTATIONS?:\s+(?P<value>.*)$']}, | |
{'keyword': 'author', 'regexps': [r'^\.\.\s+author:\s+(?P<value>.*)$', r'^\#\+AUTHOR:\s+(?P<value>.*)$']}, | |
{'keyword': 'category', 'regexps': [r'^\.\.\s+categor(?:y|ies):\s+(?P<value>.*)$', r'^\#\+CATEGOR(?:Y|IES):\s+(?P<value>.*)$']}, | |
{'keyword': 'date', 'regexps': [r'^\.\.\s+date:\s+(?P<value>.*)$', r'^\#\+DATE:\s+(?P<value>.*)$']}, | |
{'keyword': 'description', 'regexps': [r'^\.\.\s+description:\s+(?P<value>.*)$', r'^\#\+DESCRIPTION:\s+(?P<value>.*)$']}, | |
{'keyword': 'enclosure', 'regexps': [r'^\.\.\s+enclosure:\s+(?P<value>.*)$', r'^\#\+ENCLOSURE:\s+(?P<value>.*)$']}, | |
{'keyword': 'filters', 'regexps': [r'^\.\.\s+filters?:\s+(?P<value>.*)$', r'^\#\+FILTERS?:\s+(?P<value>.*)$']}, | |
{'keyword': 'hidetitle', 'regexps': [r'^\.\.\s+hidetitle:\s+(?P<value>.*)$', r'^\#\+HIDETITLE:\s+(?P<value>.*)$']}, | |
{'keyword': 'link', 'regexps': [r'^\.\.\s+link:\s+(?P<value>.*)$', r'^\#\+N[-_]?LINK:\s+(?P<value>.*)$']}, # #+LINK is omitted; Emacs uses this attribute as another purpose. Use #+NIKOLA_LINK instead. | |
{'keyword': 'noannotations', 'regexps': [r'^\.\.\s+noannotations?:\s+(?P<value>.*)$', r'^\#\+NOANNOTATIONS?:\s+(?P<value>.*)$']}, | |
{'keyword': 'nocomments', 'regexps': [r'^\.\.\s+nocomments?:\s+(?P<value>.*)$', r'^\#\+NOCOMMENTS?:\s+(?P<value>.*)$']}, | |
{'keyword': 'password', 'regexps': [r'^\.\.\s+password:\s+(?P<value>.*)$', r'^\#\+PASSWORD:\s+(?P<value>.*)$']}, | |
{'keyword': 'previewimage', 'regexps': [r'^\.\.\s+previewimage:\s+(?P<value>.*)$', r'^\#\+PREVIEWIMAGE:\s+(?P<value>.*)$']}, | |
{'keyword': 'slug', 'regexps': [r'^\.\.\s+slug:\s+(?P<value>.*)$', r'^\#\+SLUG:\s+(?P<value>.*)$']}, | |
{'keyword': 'tags', 'regexps': [r'^\.\.\s+tags?:\s+(?P<value>.*)$', r'^\#\+N[-_]?TAGS?:\s+(?P<value>.*)$']}, # #+TAGS is omitted; Emacs uses this attribute more advanced. Use #+NIKOLA_TAGS instead. | |
{'keyword': 'template', 'regexps': [r'^\.\.\s+template:\s+(?P<value>.*)$', r'^\#\+TEMPLATE:\s+(?P<value>.*)$']}, | |
{'keyword': 'title', 'regexps': [r'^\.\.\s+title:\s+(?P<value>.*)$', r'^\#\+TITLE:\s+(?P<value>.*)$']}, | |
{'keyword': 'type', 'regexps': [r'^\.\.\s+type:\s+(?P<value>.*)$', r'^\#\+TYPE:\s+(?P<value>.*)$']}, | |
) | |
maskmarkers = ( | |
{'begin': r'^\#\+BEGIN_EXAMPLE', 'end': r'^\#\+END_EXAMPLE'}, | |
{'begin': r'^\#\+BEGIN_NIKOLA_IGNORE', 'end': r'^\#\+END_NIKOLA_IGNORE'}, | |
{'begin': r'^\#\+BEGIN_SRC', 'end': r'^\#\+END_SRC'}, | |
) | |
def read_metadata(self, post, file_metadata_regexp=None, unslugify_titles=False, lang=None): | |
"""This function parse metadata. | |
Parse will be disabled in special section '#+BEGIN_NIKOLA_IGNORE ... #+END_NIKOLA_IGNORE'. | |
You can write metadata in your .org decument with following syntax. | |
Lower number is high priority. | |
1. #+NIKOLA_TITLE: Awesome title | |
#+NIKOLA_DATE: 2015-01-01 09:00:00 UTC+09:00 | |
#+NIKOLA_SLUG: this-is-an-awesome-page | |
2. #+BEGIN_COMMENT | |
.. TITLE: Awesome title | |
.. DATE: 2015-01-01 09:00:00 UTC+09:00 | |
.. SLUG: this-is-an-awesome-page | |
#+END_COMMENT | |
3. #+TITLE: Awesome title | |
#+DATE: 2015-01-01 09:00:00 UTC+09:00 | |
#+SLUG: this-is-an-awesome-page | |
#+N_TAGS: this, is, special, case | |
#+N_LINK: http://this.is.special.case/too | |
""" | |
with codecs.open(post.source_path, 'r', "utf8") as fd: | |
content = fd.read() | |
### convert maskmarkers to maskranges ### | |
maskranges = set() | |
for maskmarker in self.maskmarkers: | |
if 'compiled' in maskmarker: | |
match_iter = maskmarker['compiled'].finditer(content) | |
else: | |
match_iter = re.finditer(maskmarker['begin']+r'.*?'+maskmarker['end'], content, re.DOTALL|re.IGNORECASE|re.MULTILINE) | |
self.logger.info('Regexp was not compiled.') | |
for match in match_iter: | |
maskranges.add(match.span()) | |
self.logger.debug('Masked section was found in: {}.'.format(post.source_path)) | |
def check_mask_range(span): | |
for maskrange in maskranges: | |
if maskrange[0] < span[0] < maskrange[1] or maskrange[0] < span[1] < maskrange[1]: | |
return False | |
return True | |
def find_attr_value(match_iter): | |
for match in match_iter: | |
if check_mask_range(match.span()): | |
return match | |
else: | |
self.logger.debug('Masked metadata was found in: {}.'.format(post.source_path)) | |
return None | |
metadata = dict() | |
for elm in self.attrmarkers: | |
for regexp in elm['regexps']: | |
if isinstance(regexp, type(re.compile(''))): # If regexp is compiled | |
match_iter = regexp.finditer(content) | |
else: | |
match_iter = re.finditer(regexp, content, re.IGNORECASE|re.MULTILINE) | |
self.logger.info('Regexp was not compiled.') | |
match = find_attr_value(match_iter) | |
if match: | |
metadata[elm['keyword']] = match.group('value') | |
break | |
match_iter = re.finditer(r'^\#\+NIKOLA[-_](?P<keyword>\w+?):\s+(?P<value>.*)$', content, re.IGNORECASE|re.MULTILINE) | |
match = find_attr_value(match_iter) | |
metadata[match.group('keyword').lower()] = match.group('value') | |
return metadata |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment