litil/gist:a0f248dbe8334f1f4874986868fc28f4

## gistfile1.txt
# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET
import re
import unicodedata

# This method adds all given synonyms into the correct dictionary entry.
def extendDictEntry(dict, key, xmlSynonyms):
   for child in xmlSynonyms:
       childText = child.text.encode('utf-8')

       if (childText not in dict[key]):
           dict[key].extend([childText])
   return dict

# This method buils the synonyms dictionary from the WoNeF file.
def buildSynonymDictionary():
    tree = ET.parse('wonef-fscore-0.1.xml')

    root = tree.getroot()
    dict = {}

    # fill synonyms dictionary
    for synset in root:
        for child in synset:
            if child.tag == "SYNONYM":
                for literal in child:
                    currLiteralText = literal.text.encode('utf-8')

                    if currLiteralText in dict:
                        # add all SYNONYM tags text into the correct entry of the map
                        extendDictEntry(dict, currLiteralText, child)
                    else:
                        # create a new entry in the map
                        dict[currLiteralText] = [currLiteralText]
                        extendDictEntry(dict, currLiteralText, child)

    return dict

def removeAccents(str):
    return ''.join(c for c in unicodedata.normalize('NFD', str.decode('utf-8'))
                  if unicodedata.category(c) != 'Mn').encode('utf-8')

# This method writes the synonym file in the Solr format
def writeSolrSynonymFile():
    dict = buildSynonymDictionary()
    file = open("solr_synonym.txt","w")
    file.write("# Solr Synonmys File \n\n")

    for key in dict:
        try:
            file.write(
                removeAccents(key) +
                " => " +
                removeAccents(", ".join(dict[key])) +
                "\n")
        except UnicodeEncodeError:
            print("UnicodeEncodeError: " + key + " - " + ", ".join(dict[key]))

    file.close()


writeSolrSynonymFile()
	# -- coding: utf-8 --
	import xml.etree.ElementTree as ET
	import re
	import unicodedata

	# This method adds all given synonyms into the correct dictionary entry.
	def extendDictEntry(dict, key, xmlSynonyms):
	for child in xmlSynonyms:
	childText = child.text.encode('utf-8')

	if (childText not in dict[key]):
	dict[key].extend([childText])
	return dict

	# This method buils the synonyms dictionary from the WoNeF file.
	def buildSynonymDictionary():
	tree = ET.parse('wonef-fscore-0.1.xml')

	root = tree.getroot()
	dict = {}

	# fill synonyms dictionary
	for synset in root:
	for child in synset:
	if child.tag == "SYNONYM":
	for literal in child:
	currLiteralText = literal.text.encode('utf-8')

	if currLiteralText in dict:
	# add all SYNONYM tags text into the correct entry of the map
	extendDictEntry(dict, currLiteralText, child)
	else:
	# create a new entry in the map
	dict[currLiteralText] = [currLiteralText]
	extendDictEntry(dict, currLiteralText, child)

	return dict

	def removeAccents(str):
	return ''.join(c for c in unicodedata.normalize('NFD', str.decode('utf-8'))
	if unicodedata.category(c) != 'Mn').encode('utf-8')

	# This method writes the synonym file in the Solr format
	def writeSolrSynonymFile():
	dict = buildSynonymDictionary()
	file = open("solr_synonym.txt","w")
	file.write("# Solr Synonmys File \n\n")

	for key in dict:
	try:
	file.write(
	removeAccents(key) +
	" => " +
	removeAccents(", ".join(dict[key])) +
	"\n")
	except UnicodeEncodeError:
	print("UnicodeEncodeError: " + key + " - " + ", ".join(dict[key]))

	file.close()


	writeSolrSynonymFile()