Skip to content

Instantly share code, notes, and snippets.

@d2207197
Last active August 29, 2015 14:18
Show Gist options
  • Save d2207197/7e2f03d9c4e6fc78ca58 to your computer and use it in GitHub Desktop.
Save d2207197/7e2f03d9c4e6fc78ca58 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from __future__ import unicode_literals\n",
"from pprint import pformat"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"words = '當時 只有 一 個 念頭 : 人類 的 末日 已經 來臨 !'.split()"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 1]"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"word_lens = map(len, words)\n",
"word_lens"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[0, 2, 4, 5, 6, 8, 9, 11, 12, 14, 16, 18, 19]"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"word_lens_sums = reduce(lambda s, l: s + [s[-1]+l], word_lens, [0])\n",
"word_lens_sums"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{u'一': (4,),\n",
" u'人類': (9, 10),\n",
" u'來臨': (16, 17),\n",
" u'個': (5,),\n",
" u'只有': (2, 3),\n",
" u'已經': (14, 15),\n",
" u'念頭': (6, 7),\n",
" u'末日': (12, 13),\n",
" u'當時': (0, 1),\n",
" u'的': (11,),\n",
" u'!': (18,),\n",
" u':': (8,)}\n"
]
}
],
"source": [
"# 每個詞對應的 char 位置\n",
"print pformat({word: tuple(range(start, end))\n",
" for word, start, end in zip(words, word_lens_sums, word_lens_sums[1:])}).decode('unicode-escape')"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"https://gist.github.com/7e2f03d9c4e6fc78ca58\r\n"
]
}
],
"source": [
"!gist -u https://gist.github.com/7e2f03d9c4e6fc78ca58 word_pos_to_char_pos.ipynb"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment