Skip to content

Instantly share code, notes, and snippets.

@pluser
Last active August 29, 2015 14:27
Show Gist options
  • Save pluser/3e6a503929ec8cf71c9e to your computer and use it in GitHub Desktop.
Save pluser/3e6a503929ec8cf71c9e to your computer and use it in GitHub Desktop.
Improvement of org-mode plugin in static blog generator Nikola.
import re
import nikola.utils
logger = nikola.utils.get_logger('plugin.orgmode', nikola.utils.STDERR_HANDLER)
def __init__(self):
self.compile_html = self.compile_html_batch_mode
### compile regexp ###
for attrmarker in self.attrmarkers:
for index, regexp in enumerate(attrmarker['regexps']):
attrmarker['regexps'][index] = re.compile(regexp, re.MULTILINE|re.IGNORECASE)
for maskmarker in self.maskmarkers:
maskmarker['compiled'] = re.compile(maskmarker['begin']+r'.*?'+maskmarker['end'], re.DOTALL|re.IGNORECASE|re.MULTILINE)
self.logger.debug('Regexps are compiled.')
attrmarkers = (
{'keyword': 'annotations', 'regexps': [r'^\.\.\s+annotations?:\s+(?P<value>.*)$', r'^\#\+ANNOTATIONS?:\s+(?P<value>.*)$']},
{'keyword': 'author', 'regexps': [r'^\.\.\s+author:\s+(?P<value>.*)$', r'^\#\+AUTHOR:\s+(?P<value>.*)$']},
{'keyword': 'category', 'regexps': [r'^\.\.\s+categor(?:y|ies):\s+(?P<value>.*)$', r'^\#\+CATEGOR(?:Y|IES):\s+(?P<value>.*)$']},
{'keyword': 'date', 'regexps': [r'^\.\.\s+date:\s+(?P<value>.*)$', r'^\#\+DATE:\s+(?P<value>.*)$']},
{'keyword': 'description', 'regexps': [r'^\.\.\s+description:\s+(?P<value>.*)$', r'^\#\+DESCRIPTION:\s+(?P<value>.*)$']},
{'keyword': 'enclosure', 'regexps': [r'^\.\.\s+enclosure:\s+(?P<value>.*)$', r'^\#\+ENCLOSURE:\s+(?P<value>.*)$']},
{'keyword': 'filters', 'regexps': [r'^\.\.\s+filters?:\s+(?P<value>.*)$', r'^\#\+FILTERS?:\s+(?P<value>.*)$']},
{'keyword': 'hidetitle', 'regexps': [r'^\.\.\s+hidetitle:\s+(?P<value>.*)$', r'^\#\+HIDETITLE:\s+(?P<value>.*)$']},
{'keyword': 'link', 'regexps': [r'^\.\.\s+link:\s+(?P<value>.*)$', r'^\#\+N[-_]?LINK:\s+(?P<value>.*)$']}, # #+LINK is omitted; Emacs uses this attribute as another purpose. Use #+NIKOLA_LINK instead.
{'keyword': 'noannotations', 'regexps': [r'^\.\.\s+noannotations?:\s+(?P<value>.*)$', r'^\#\+NOANNOTATIONS?:\s+(?P<value>.*)$']},
{'keyword': 'nocomments', 'regexps': [r'^\.\.\s+nocomments?:\s+(?P<value>.*)$', r'^\#\+NOCOMMENTS?:\s+(?P<value>.*)$']},
{'keyword': 'password', 'regexps': [r'^\.\.\s+password:\s+(?P<value>.*)$', r'^\#\+PASSWORD:\s+(?P<value>.*)$']},
{'keyword': 'previewimage', 'regexps': [r'^\.\.\s+previewimage:\s+(?P<value>.*)$', r'^\#\+PREVIEWIMAGE:\s+(?P<value>.*)$']},
{'keyword': 'slug', 'regexps': [r'^\.\.\s+slug:\s+(?P<value>.*)$', r'^\#\+SLUG:\s+(?P<value>.*)$']},
{'keyword': 'tags', 'regexps': [r'^\.\.\s+tags?:\s+(?P<value>.*)$', r'^\#\+N[-_]?TAGS?:\s+(?P<value>.*)$']}, # #+TAGS is omitted; Emacs uses this attribute more advanced. Use #+NIKOLA_TAGS instead.
{'keyword': 'template', 'regexps': [r'^\.\.\s+template:\s+(?P<value>.*)$', r'^\#\+TEMPLATE:\s+(?P<value>.*)$']},
{'keyword': 'title', 'regexps': [r'^\.\.\s+title:\s+(?P<value>.*)$', r'^\#\+TITLE:\s+(?P<value>.*)$']},
{'keyword': 'type', 'regexps': [r'^\.\.\s+type:\s+(?P<value>.*)$', r'^\#\+TYPE:\s+(?P<value>.*)$']},
)
maskmarkers = (
{'begin': r'^\#\+BEGIN_EXAMPLE', 'end': r'^\#\+END_EXAMPLE'},
{'begin': r'^\#\+BEGIN_NIKOLA_IGNORE', 'end': r'^\#\+END_NIKOLA_IGNORE'},
{'begin': r'^\#\+BEGIN_SRC', 'end': r'^\#\+END_SRC'},
)
def read_metadata(self, post, file_metadata_regexp=None, unslugify_titles=False, lang=None):
"""This function parse metadata.
Parse will be disabled in special section '#+BEGIN_NIKOLA_IGNORE ... #+END_NIKOLA_IGNORE'.
You can write metadata in your .org decument with following syntax.
Lower number is high priority.
1. #+NIKOLA_TITLE: Awesome title
#+NIKOLA_DATE: 2015-01-01 09:00:00 UTC+09:00
#+NIKOLA_SLUG: this-is-an-awesome-page
2. #+BEGIN_COMMENT
.. TITLE: Awesome title
.. DATE: 2015-01-01 09:00:00 UTC+09:00
.. SLUG: this-is-an-awesome-page
#+END_COMMENT
3. #+TITLE: Awesome title
#+DATE: 2015-01-01 09:00:00 UTC+09:00
#+SLUG: this-is-an-awesome-page
#+N_TAGS: this, is, special, case
#+N_LINK: http://this.is.special.case/too
"""
with codecs.open(post.source_path, 'r', "utf8") as fd:
content = fd.read()
### convert maskmarkers to maskranges ###
maskranges = set()
for maskmarker in self.maskmarkers:
if 'compiled' in maskmarker:
match_iter = maskmarker['compiled'].finditer(content)
else:
match_iter = re.finditer(maskmarker['begin']+r'.*?'+maskmarker['end'], content, re.DOTALL|re.IGNORECASE|re.MULTILINE)
self.logger.info('Regexp was not compiled.')
for match in match_iter:
maskranges.add(match.span())
self.logger.debug('Masked section was found in: {}.'.format(post.source_path))
def check_mask_range(span):
for maskrange in maskranges:
if maskrange[0] < span[0] < maskrange[1] or maskrange[0] < span[1] < maskrange[1]:
return False
return True
def find_attr_value(match_iter):
for match in match_iter:
if check_mask_range(match.span()):
return match
else:
self.logger.debug('Masked metadata was found in: {}.'.format(post.source_path))
return None
metadata = dict()
for elm in self.attrmarkers:
for regexp in elm['regexps']:
if isinstance(regexp, type(re.compile(''))): # If regexp is compiled
match_iter = regexp.finditer(content)
else:
match_iter = re.finditer(regexp, content, re.IGNORECASE|re.MULTILINE)
self.logger.info('Regexp was not compiled.')
match = find_attr_value(match_iter)
if match:
metadata[elm['keyword']] = match.group('value')
break
match_iter = re.finditer(r'^\#\+NIKOLA[-_](?P<keyword>\w+?):\s+(?P<value>.*)$', content, re.IGNORECASE|re.MULTILINE)
match = find_attr_value(match_iter)
metadata[match.group('keyword').lower()] = match.group('value')
return metadata
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment