pluser/orgmode.py

## orgmode.py
	import re
	import nikola.utils
	logger = nikola.utils.get_logger('plugin.orgmode', nikola.utils.STDERR_HANDLER)

	def __init__(self):
		self.compile_html = self.compile_html_batch_mode

		### compile regexp ###
		for attrmarker in self.attrmarkers:
			for index, regexp in enumerate(attrmarker['regexps']):
				attrmarker['regexps'][index] = re.compile(regexp, re.MULTILINE|re.IGNORECASE)
		for maskmarker in self.maskmarkers:
			maskmarker['compiled'] = re.compile(maskmarker['begin']+r'.*?'+maskmarker['end'], re.DOTALL|re.IGNORECASE|re.MULTILINE)
		self.logger.debug('Regexps are compiled.')

	attrmarkers = (
		{'keyword': 'annotations', 'regexps': [r'^\.\.\s+annotations?:\s+(?P<value>.*)$', r'^\#\+ANNOTATIONS?:\s+(?P<value>.*)$']},
		{'keyword': 'author', 'regexps': [r'^\.\.\s+author:\s+(?P<value>.*)$', r'^\#\+AUTHOR:\s+(?P<value>.*)$']},
		{'keyword': 'category', 'regexps': [r'^\.\.\s+categor(?:y|ies):\s+(?P<value>.*)$', r'^\#\+CATEGOR(?:Y|IES):\s+(?P<value>.*)$']},
		{'keyword': 'date', 'regexps': [r'^\.\.\s+date:\s+(?P<value>.*)$', r'^\#\+DATE:\s+(?P<value>.*)$']},
		{'keyword': 'description', 'regexps': [r'^\.\.\s+description:\s+(?P<value>.*)$', r'^\#\+DESCRIPTION:\s+(?P<value>.*)$']},
		{'keyword': 'enclosure', 'regexps': [r'^\.\.\s+enclosure:\s+(?P<value>.*)$', r'^\#\+ENCLOSURE:\s+(?P<value>.*)$']},
		{'keyword': 'filters', 'regexps': [r'^\.\.\s+filters?:\s+(?P<value>.*)$', r'^\#\+FILTERS?:\s+(?P<value>.*)$']},
		{'keyword': 'hidetitle', 'regexps': [r'^\.\.\s+hidetitle:\s+(?P<value>.*)$', r'^\#\+HIDETITLE:\s+(?P<value>.*)$']},
		{'keyword': 'link', 'regexps': [r'^\.\.\s+link:\s+(?P<value>.*)$', r'^\#\+N[-_]?LINK:\s+(?P<value>.*)$']}, # #+LINK is omitted; Emacs uses this attribute as another purpose. Use #+NIKOLA_LINK instead.
		{'keyword': 'noannotations', 'regexps': [r'^\.\.\s+noannotations?:\s+(?P<value>.*)$', r'^\#\+NOANNOTATIONS?:\s+(?P<value>.*)$']},
		{'keyword': 'nocomments', 'regexps': [r'^\.\.\s+nocomments?:\s+(?P<value>.*)$', r'^\#\+NOCOMMENTS?:\s+(?P<value>.*)$']},
		{'keyword': 'password', 'regexps': [r'^\.\.\s+password:\s+(?P<value>.*)$', r'^\#\+PASSWORD:\s+(?P<value>.*)$']},
		{'keyword': 'previewimage', 'regexps': [r'^\.\.\s+previewimage:\s+(?P<value>.*)$', r'^\#\+PREVIEWIMAGE:\s+(?P<value>.*)$']},
		{'keyword': 'slug', 'regexps': [r'^\.\.\s+slug:\s+(?P<value>.*)$', r'^\#\+SLUG:\s+(?P<value>.*)$']},
		{'keyword': 'tags', 'regexps': [r'^\.\.\s+tags?:\s+(?P<value>.*)$', r'^\#\+N[-_]?TAGS?:\s+(?P<value>.*)$']}, # #+TAGS is omitted; Emacs uses this attribute more advanced. Use #+NIKOLA_TAGS instead.
		{'keyword': 'template', 'regexps': [r'^\.\.\s+template:\s+(?P<value>.*)$', r'^\#\+TEMPLATE:\s+(?P<value>.*)$']},
		{'keyword': 'title', 'regexps': [r'^\.\.\s+title:\s+(?P<value>.*)$', r'^\#\+TITLE:\s+(?P<value>.*)$']},
		{'keyword': 'type', 'regexps': [r'^\.\.\s+type:\s+(?P<value>.*)$', r'^\#\+TYPE:\s+(?P<value>.*)$']},
	)
	maskmarkers = (
		{'begin': r'^\#\+BEGIN_EXAMPLE', 'end': r'^\#\+END_EXAMPLE'},
		{'begin': r'^\#\+BEGIN_NIKOLA_IGNORE', 'end': r'^\#\+END_NIKOLA_IGNORE'},
		{'begin': r'^\#\+BEGIN_SRC', 'end': r'^\#\+END_SRC'},
	)

	def read_metadata(self, post, file_metadata_regexp=None, unslugify_titles=False, lang=None):
		"""This function parse metadata.
		Parse will be disabled in special section '#+BEGIN_NIKOLA_IGNORE ... #+END_NIKOLA_IGNORE'.
		You can write metadata in your .org decument with following syntax.
		Lower number is high priority.

		1. #+NIKOLA_TITLE: Awesome title
		   #+NIKOLA_DATE: 2015-01-01 09:00:00 UTC+09:00
		   #+NIKOLA_SLUG: this-is-an-awesome-page

		2. #+BEGIN_COMMENT
		   .. TITLE: Awesome title
		   .. DATE: 2015-01-01 09:00:00 UTC+09:00
		   .. SLUG: this-is-an-awesome-page
		   #+END_COMMENT

		3. #+TITLE: Awesome title
		   #+DATE: 2015-01-01 09:00:00 UTC+09:00
		   #+SLUG: this-is-an-awesome-page
		   #+N_TAGS: this, is, special, case
		   #+N_LINK: http://this.is.special.case/too
		"""

		with codecs.open(post.source_path, 'r', "utf8") as fd:
			content = fd.read()

		### convert maskmarkers to maskranges ###
		maskranges = set()
		for maskmarker in self.maskmarkers:
			if 'compiled' in maskmarker:
				match_iter = maskmarker['compiled'].finditer(content)
			else:
				match_iter = re.finditer(maskmarker['begin']+r'.*?'+maskmarker['end'], content, re.DOTALL|re.IGNORECASE|re.MULTILINE)
				self.logger.info('Regexp was not compiled.')
			for match in match_iter:
				maskranges.add(match.span())
				self.logger.debug('Masked section was found in: {}.'.format(post.source_path))

		def check_mask_range(span):
			for maskrange in maskranges:
				if maskrange[0] < span[0] < maskrange[1] or maskrange[0] < span[1] < maskrange[1]:
					return False
			return True

		def find_attr_value(match_iter):
			for match in match_iter:
				if check_mask_range(match.span()):
					return match
				else:
					self.logger.debug('Masked metadata was found in: {}.'.format(post.source_path))
			return None

		metadata = dict()
		for elm in self.attrmarkers:
			for regexp in elm['regexps']:
				if isinstance(regexp, type(re.compile(''))): # If regexp is compiled
					match_iter = regexp.finditer(content)
				else:
					match_iter = re.finditer(regexp, content, re.IGNORECASE|re.MULTILINE)
					self.logger.info('Regexp was not compiled.')
				match = find_attr_value(match_iter)
				if match:
					metadata[elm['keyword']] = match.group('value')
					break
		match_iter = re.finditer(r'^\#\+NIKOLA[-_](?P<keyword>\w+?):\s+(?P<value>.*)$', content, re.IGNORECASE|re.MULTILINE)
		match = find_attr_value(match_iter)
		metadata[match.group('keyword').lower()] = match.group('value')

		return metadata
	import re
	import nikola.utils
	logger = nikola.utils.get_logger('plugin.orgmode', nikola.utils.STDERR_HANDLER)

	def __init__(self):
	self.compile_html = self.compile_html_batch_mode

	### compile regexp ###
	for attrmarker in self.attrmarkers:
	for index, regexp in enumerate(attrmarker['regexps']):
	attrmarker['regexps'][index] = re.compile(regexp, re.MULTILINE\|re.IGNORECASE)
	for maskmarker in self.maskmarkers:
	maskmarker['compiled'] = re.compile(maskmarker['begin']+r'.*?'+maskmarker['end'], re.DOTALL\|re.IGNORECASE\|re.MULTILINE)
	self.logger.debug('Regexps are compiled.')

	attrmarkers = (
	{'keyword': 'annotations', 'regexps': [r'^\.\.\s+annotations?:\s+(?P<value>.)$', r'^\#\+ANNOTATIONS?:\s+(?P<value>.)$']},
	{'keyword': 'author', 'regexps': [r'^\.\.\s+author:\s+(?P<value>.)$', r'^\#\+AUTHOR:\s+(?P<value>.)$']},
	{'keyword': 'category', 'regexps': [r'^\.\.\s+categor(?:y\|ies):\s+(?P<value>.)$', r'^\#\+CATEGOR(?:Y\|IES):\s+(?P<value>.)$']},
	{'keyword': 'date', 'regexps': [r'^\.\.\s+date:\s+(?P<value>.)$', r'^\#\+DATE:\s+(?P<value>.)$']},
	{'keyword': 'description', 'regexps': [r'^\.\.\s+description:\s+(?P<value>.)$', r'^\#\+DESCRIPTION:\s+(?P<value>.)$']},
	{'keyword': 'enclosure', 'regexps': [r'^\.\.\s+enclosure:\s+(?P<value>.)$', r'^\#\+ENCLOSURE:\s+(?P<value>.)$']},
	{'keyword': 'filters', 'regexps': [r'^\.\.\s+filters?:\s+(?P<value>.)$', r'^\#\+FILTERS?:\s+(?P<value>.)$']},
	{'keyword': 'hidetitle', 'regexps': [r'^\.\.\s+hidetitle:\s+(?P<value>.)$', r'^\#\+HIDETITLE:\s+(?P<value>.)$']},
	{'keyword': 'link', 'regexps': [r'^\.\.\s+link:\s+(?P<value>.)$', r'^\#\+N[-_]?LINK:\s+(?P<value>.)$']}, # #+LINK is omitted; Emacs uses this attribute as another purpose. Use #+NIKOLA_LINK instead.
	{'keyword': 'noannotations', 'regexps': [r'^\.\.\s+noannotations?:\s+(?P<value>.)$', r'^\#\+NOANNOTATIONS?:\s+(?P<value>.)$']},
	{'keyword': 'nocomments', 'regexps': [r'^\.\.\s+nocomments?:\s+(?P<value>.)$', r'^\#\+NOCOMMENTS?:\s+(?P<value>.)$']},
	{'keyword': 'password', 'regexps': [r'^\.\.\s+password:\s+(?P<value>.)$', r'^\#\+PASSWORD:\s+(?P<value>.)$']},
	{'keyword': 'previewimage', 'regexps': [r'^\.\.\s+previewimage:\s+(?P<value>.)$', r'^\#\+PREVIEWIMAGE:\s+(?P<value>.)$']},
	{'keyword': 'slug', 'regexps': [r'^\.\.\s+slug:\s+(?P<value>.)$', r'^\#\+SLUG:\s+(?P<value>.)$']},
	{'keyword': 'tags', 'regexps': [r'^\.\.\s+tags?:\s+(?P<value>.)$', r'^\#\+N[-_]?TAGS?:\s+(?P<value>.)$']}, # #+TAGS is omitted; Emacs uses this attribute more advanced. Use #+NIKOLA_TAGS instead.
	{'keyword': 'template', 'regexps': [r'^\.\.\s+template:\s+(?P<value>.)$', r'^\#\+TEMPLATE:\s+(?P<value>.)$']},
	{'keyword': 'title', 'regexps': [r'^\.\.\s+title:\s+(?P<value>.)$', r'^\#\+TITLE:\s+(?P<value>.)$']},
	{'keyword': 'type', 'regexps': [r'^\.\.\s+type:\s+(?P<value>.)$', r'^\#\+TYPE:\s+(?P<value>.)$']},
	)
	maskmarkers = (
	{'begin': r'^\#\+BEGIN_EXAMPLE', 'end': r'^\#\+END_EXAMPLE'},
	{'begin': r'^\#\+BEGIN_NIKOLA_IGNORE', 'end': r'^\#\+END_NIKOLA_IGNORE'},
	{'begin': r'^\#\+BEGIN_SRC', 'end': r'^\#\+END_SRC'},
	)

	def read_metadata(self, post, file_metadata_regexp=None, unslugify_titles=False, lang=None):
	"""This function parse metadata.
	Parse will be disabled in special section '#+BEGIN_NIKOLA_IGNORE ... #+END_NIKOLA_IGNORE'.
	You can write metadata in your .org decument with following syntax.
	Lower number is high priority.

	1. #+NIKOLA_TITLE: Awesome title
	#+NIKOLA_DATE: 2015-01-01 09:00:00 UTC+09:00
	#+NIKOLA_SLUG: this-is-an-awesome-page

	2. #+BEGIN_COMMENT
	.. TITLE: Awesome title
	.. DATE: 2015-01-01 09:00:00 UTC+09:00
	.. SLUG: this-is-an-awesome-page
	#+END_COMMENT

	3. #+TITLE: Awesome title
	#+DATE: 2015-01-01 09:00:00 UTC+09:00
	#+SLUG: this-is-an-awesome-page
	#+N_TAGS: this, is, special, case
	#+N_LINK: http://this.is.special.case/too
	"""

	with codecs.open(post.source_path, 'r', "utf8") as fd:
	content = fd.read()

	### convert maskmarkers to maskranges ###
	maskranges = set()
	for maskmarker in self.maskmarkers:
	if 'compiled' in maskmarker:
	match_iter = maskmarker['compiled'].finditer(content)
	else:
	match_iter = re.finditer(maskmarker['begin']+r'.*?'+maskmarker['end'], content, re.DOTALL\|re.IGNORECASE\|re.MULTILINE)
	self.logger.info('Regexp was not compiled.')
	for match in match_iter:
	maskranges.add(match.span())
	self.logger.debug('Masked section was found in: {}.'.format(post.source_path))

	def check_mask_range(span):
	for maskrange in maskranges:
	if maskrange[0] < span[0] < maskrange[1] or maskrange[0] < span[1] < maskrange[1]:
	return False
	return True

	def find_attr_value(match_iter):
	for match in match_iter:
	if check_mask_range(match.span()):
	return match
	else:
	self.logger.debug('Masked metadata was found in: {}.'.format(post.source_path))
	return None

	metadata = dict()
	for elm in self.attrmarkers:
	for regexp in elm['regexps']:
	if isinstance(regexp, type(re.compile(''))): # If regexp is compiled
	match_iter = regexp.finditer(content)
	else:
	match_iter = re.finditer(regexp, content, re.IGNORECASE\|re.MULTILINE)
	self.logger.info('Regexp was not compiled.')
	match = find_attr_value(match_iter)
	if match:
	metadata[elm['keyword']] = match.group('value')
	break
	match_iter = re.finditer(r'^\#\+NIKOLA[-_](?P<keyword>\w+?):\s+(?P<value>.*)$', content, re.IGNORECASE\|re.MULTILINE)
	match = find_attr_value(match_iter)
	metadata[match.group('keyword').lower()] = match.group('value')

	return metadata