Skip to content

Instantly share code, notes, and snippets.

@marrrcin
marrrcin / processify.py
Created February 20, 2018 07:57 — forked from schlamar/processify.py
processify
import os
import sys
import traceback
from functools import wraps
from multiprocessing import Process, Queue
def processify(func):
'''Decorator to run a function as a process.
Be sure that every argument and the return value
@marrrcin
marrrcin / imports.py
Last active January 24, 2019 12:53
Common Imports
import pandas as pd
import numpy as np
from pathlib import Path
import os
from glob import glob
import re
@marrrcin
marrrcin / polish_sentence_nltk_tokenizer.py
Last active March 16, 2020 15:38 — forked from ksopyla/polish_sentence_nltk_tokenizer.py
A curated list of Polish abbreviations for NLTK sentence tokenizer based on Wikipedia text
import nltk
# interactive download
# nltk.download()
nltk.download('punkt')
extra_abbreviations = ['ps', 'inc', 'Corp', 'Ltd', 'Co', 'pkt', 'Dz.Ap', 'Jr', 'jr', 'sp', 'Sp', 'poj', 'pseud', 'krypt', 'sygn', 'Dz.U', 'ws', 'itd', 'np', 'sanskryt', 'nr', 'gł', 'Takht', 'tzw', 't.zw', 'ewan', 'tyt', 'oryg', 't.j', 'vs', 'l.mn', 'l.poj' ]
position_abbrev = ['Ks', 'Abp', 'abp','bp','dr', 'kard', 'mgr', 'prof', 'zwycz', 'hab', 'arch', 'arch.kraj', 'B.Sc', 'Ph.D', 'lek', 'med', 'n.med', 'bł', 'św', 'hr', 'dziek' ]
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.