Skip to content

Instantly share code, notes, and snippets.

encoder.version torch.Size([1])
encoder.embed_tokens.weight torch.Size([41104, 512])
encoder.embed_positions._float_tensor torch.Size([1])
encoder.layers.0.self_attn.in_proj_weight torch.Size([1536, 512])
encoder.layers.0.self_attn.in_proj_bias torch.Size([1536])
encoder.layers.0.self_attn.out_proj.weight torch.Size([512, 512])
encoder.layers.0.self_attn.out_proj.bias torch.Size([512])
encoder.layers.0.bert_attn.k_proj_weight torch.Size([512, 768])
encoder.layers.0.bert_attn.v_proj_weight torch.Size([512, 768])
encoder.layers.0.bert_attn.q_proj_weight torch.Size([512, 512])
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
sys.path.append("bert")
import collections
import csv
import os, sys, glob, re
import jieba
from kashgari.tasks.seq_labeling import BLSTMCRFModel
from kashgari.corpus import ChinaPeoplesDailyNerCorpus
from kashgari.embeddings import BERTEmbedding
embedding = BERTEmbedding('/home/eee/sentence-alignment-classification-model/model/multi_cased_L-12_H-768_A-12', 100)
train_x, train_y = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data('train')
validate_x, validate_y = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data('validate')
test_x, test_y = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data('test')
@echan00
echan00 / gist:acde1d7e460cd9e467dfd612ea14ab66
Last active December 16, 2018 07:10
example garbled chinese text file with lots of garbled characters
有关文件可于香港上海汇丰银行有限公司办事处查阅,地址为香港皇后大道中 1 号汇丰总行大厦。
俏⑥р⎭┏䊆䢶㺂ᴿ䲆‫ޢ‬ਮ 㠪俏⑥р⎭┏䊆䢶㺂ᴿ䲆‫ޢ‬ਮδ俏⑥䁱߀ᡆ㄁ҁᴿ䲆‫ޢ‬ਮε㛗ᶧҁ⦞㄁Ṯᮮᑡ๧઀ᴮ ᵢṮᮮᑡδԛс㉗でɇᡇ‫ه‬Ɉεᐨሟ䀾ࡍ䔿᯲ㅢ 79 㠩 195 丷俏⑥р⎭┏䊆䢶㺂ᴿ䲆‫ޢ‬ਮδԛс㉗でɇ䋪䢶㺂Ɉε਀ެ䱺ኢ‫ޢ‬ਮⲺ㏒ਾ䋗ए๧㺞θ↚㏒ਾ䋗ए๧㺞ऻᤢ᯲ 2015 ᒪ 12 ᴾ 31 ᰛⲺ㏒ਾ䋽⭘䋖۫㺞㠽ᡠ㠩䂨ᰛ↘ᒪᓜⲺ㏒ਾ᭬ⴀ㺞Ƚ㏒ਾ‫ޞ‬䶘᭬ⴀ㺞Ƚ㏒ਾ㛗ᶧ⅀ⴀ䇀ऋ㺞਀㏒ਾ⨴䠇⍷䠅㺞θԛ਀ѱ㾷ᴹ䀾᭵ㆌᾸ㾷਀ެԌ䱺䁱䀙䠁䋽ᯏȾ㪙Ӂቧ㏒ਾ䋗ए๧㺞举ᢵᬊⲺ䋢Ա䋪䢶㺂㪙Ӂ举䋖䋢ṯᬐ俏⑥ᴹ䀾ᑡ‫ޢ‬ᴹ么ᐹⲺɅ俏⑥䋗ए๧઀ⓌࡽɆ਀俏⑥Ʌ‫ޢ‬ਮồׁɆ㐞㼳ⵕሜ㙂ѣ㛥Ⲻ㏒ਾ䋗ए๧㺞θќቃެ䃃⛰⛰ֵ㏒ਾ䋗ए๧㺞Ⲻᬢ‫ۏ‬уᆎ൞⭧᯲ⅰ䂆ᡌ䥥䃚㙂ቄ㠪Ⲻ䠃ཝ䥥䃚䲩䘦ᡶᗻ䴶Ⲻ‫ޝ‬䜞᧝࡬䋖䋢ȾṮᮮᑡⲺ䋢Աᡇ‫Ⲻه‬䋢Աᱥṯᬐᡇ‫Ⲻه‬ሟ䀾ቧ䂨ㅿ㏒ਾ䋗ए๧㺞Ⲳ㺞ᝅ㿁θќ᤿➝俏⑥Ʌ‫ޢ‬ਮồׁɆㅢ 405 ồ‫ੇۻ‬䯙сδ֒⛰᮪儊ε๧઀θ䲚↚ҁཌᵢ๧઀࡛❗ެԌⴤⲺȾᡇ‫ه‬уᴹቧᵢ๧઀Ⲻ‫ޝ‬ᇯੇԱ֋ެԌӰ༡䋖рᡌᢵᬊԱ֋䋢ԱȾᡇ‫ه‬ᐨṯᬐ俏⑥ᴹ䀾ᑡ‫ޢ‬ᴹ么ᐹⲺɅ俏⑥ሟ䀾ⓌࡽɆ䙨㺂ሟ䀾Ⱦ䂨ㅿⓌࡽ㾷≸ᡇ‫ه‬䚫ᆾ䚉ᗭ㿅ㇺθќ㿅ࢹ਀อ㺂ሟ䀾ԛቃ㏒ਾ䋗ए๧㺞ᱥੜуᆎ൞Ա֋䠃ཝ䥥䃚䲩䘦⦨਌ਾ⨼ؓ䅿Ⱦሟ䀾⎿਀อ㺂ぁᓅԛ⦨਌ᴿ䰒㏒ਾ䋗ए๧㺞ᡶ䔿䠇亃਀ᣡ䵨䋽ᯏⲺሟ䀾ះ䅿Ⱦᡶ䚮᫽Ⲻぁᓅ਌⊰᯲ṮᮮᑡⲺ࡚ᯭθऻᤢ䂋զ⭧᯲ⅰ䂆ᡌ䥥䃚㙂ቄ㠪㏒ਾ䋗ए๧㺞ᆎ൞䠃ཝ䥥䃚䲩䘦Ⲻ仞䳠Ⱦ൞䂋զ䂨ㅿ仞䳠ᱸθṮᮮᑡ㘹ឤ㠽䂨‫ޢ‬ਮᬢ‫ⵕۏ‬ሜ㙂ѣ㛥Ⲻ㏒ਾ䋗ए๧㺞⴮䰒Ⲻ‫ޝ‬䜞᧝࡬θԛ䁣䀾䚟⮬Ⲻሟ䀾ぁᓅθռⴤⲺќ䶔ቃ‫ޢ‬ਮ‫ޝ‬䜞᧝࡬Ⲻᴿ᭾ᙝⲲ㺞ᝅ㿁Ⱦሟ䀾Ӝऻᤢ䂋‫ܯ‬㪙Ӂᡶ⭞ᴹ䀾᭵ㆌⲺᚦ⮬ᙝ਀ᡶ֒ᴹ䀾զ䀾Ⲻਾ⨼ᙝθԛ਀䂋‫ܯ‬㏒ਾ䋗ए๧㺞Ⲻ᮪儊ࡍ๧ᯯᕅȾᡇ‫ؗ⴮ه‬θᡇ‫ه‬ᡶ⦨ᗍⲺሟ䀾ះ䅿㜳‫ૂ࠼ݻ‬䚟⮬൦⛰ᡇ‫Ⲻه‬ሟ䀾ᝅ㿁ᨆ‫ב‬ะ⽄Ⱦᝅ㿁ᡇ‫ه‬䃃⛰θ䂨ㅿ㏒ਾ䋗ए๧㺞ᐨṯᬐɅ俏⑥䋗ए๧઀ⓌࡽɆⵕሜ㙂ѣ㛥൦ਃ᱖䋪䢶㺂਀ެ䱺ኢ‫ޢ‬ਮ᯲ 2015 ᒪ 12 ᴾ 31 ᰛⲺ䋗᭵⣶⋷਀ᖲㅿᡠ㠩䂨ᰛ↘ᒪᓜⲺ䋗ए㺞⨴਀⨴䠇⍷䠅θќᐨ䚫➝俏⑥Ʌ‫ޢ‬ਮồׁɆ࿛⛰ᬢ‫ۏ‬Ⱦ 㖻ޫ૮≮䚉ᴹ䀾ᑡӁएᡶอᾣᴹ䀾ᑡ俏⑥ 2016 ᒪ 2 ᴾ 22 ᰛ 78
俏⑥р⎭┏䊆