Skip to content

Instantly share code, notes, and snippets.

@j-min j-min/tokenize_dparser.py
Last active Dec 3, 2016

Embed
What would you like to do?
Get tokenized list from dparser
import json
import requests
def tokenize_dparser(text):
dparser_link = 'http://parser.datanada.com/parse?version=1&string='
url = dparser_link+text
response = requests.get(url)
data = json.loads(response.content.decode('utf-8')) # list of dictionaries
"""
ex) 안녕하세요
[{'deprel': 'ROOT', 'form': '안녕', 'pos': 'NNG', 'id': '1', 'head': '0'},
{'deprel': 'VP', 'form': '하', 'pos': 'XSV', 'id': '2', 'head': '1'},
{'deprel': 'VP', 'form': '세요', 'pos': 'EP+EF', 'id': '3', 'head': '2'}]
"""
return [x['form'] for x in data]
if __name__ = '__main__':
input_string = input()
tokenizer_dparser(input_string)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.