-
-
Save apohllo/80e4e279269e50b237f249f18221d7ee to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 83, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"import nltk\n", | |
"from nltk import word_tokenize\n", | |
"from pyMorfologik import Morfologik\n", | |
"from pyMorfologik.parsing import ListParser\n", | |
"import re\n", | |
"from collections import Counter" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"___________________________ 6 0\n", | |
"walkosze 6 100000\n", | |
"coaldale 9 200000\n", | |
"utrzmania 12 300000\n", | |
"laloubere 18 400000\n", | |
"sypiani 24 500000\n", | |
"morchella 36 600000\n", | |
"zwierzchni 60 700000\n", | |
"barkowo-obojczykowego 135 800000\n", | |
"otwieraną 564 900000\n", | |
"silnikowego 5918 1000000\n" | |
] | |
} | |
], | |
"source": [ | |
"gpath_2 = '/net/scratch/people/plgapohl/pl-v2w/Polish/'\n", | |
"dictionary = {}\n", | |
"i = 0\n", | |
"with open(gpath_2 + 'word-counts.txt', 'r') as f:\n", | |
" for line in f:\n", | |
" count, word = line.split()\n", | |
" count = int(count)\n", | |
" dictionary[word] = count\n", | |
" if i % 100000 == 0:\n", | |
" print(word, count, i)\n", | |
" i += 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"a 0\n", | |
"fosylizacyjnością 1000000\n", | |
"mężczyźniska 2000000\n", | |
"nienidziańskość 3000000\n", | |
"odwadniając 4000000\n", | |
"przemakałobyś 5000000\n", | |
"Szczurku 6000000\n", | |
"wytrzebiałbyś 7000000\n" | |
] | |
} | |
], | |
"source": [ | |
"valid_words = set()\n", | |
"i = 0\n", | |
"with open(gpath_2 + 'polimorf.txt', 'r') as f:\n", | |
" for line in f:\n", | |
" valid_words.add(line.rstrip())\n", | |
" if i % 1000000 == 0:\n", | |
" print(line.rstrip(), i)\n", | |
" i += 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 131, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"first_names = set()\n", | |
"for file in ['imiona_meskie_utf8.txt', 'imiona_zenskie_utf8.txt']:\n", | |
" with open(gpath_2 + file, 'r') as f:\n", | |
" for line in f:\n", | |
" first_names.add(line.rstrip())\n", | |
" first_names.add(line.rstrip().lower())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 132, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"surnames = set()\n", | |
"for file in ['nazwiska.txt']:\n", | |
" with open(gpath_2 + file, 'r') as f:\n", | |
" for line in f:\n", | |
" surnames.add(line.rstrip())\n", | |
" surnames.add(line.rstrip().lower())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 70, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"FREQUENCY = dictionary\n", | |
"VALID = valid_words\n", | |
"\n", | |
"def freq(word): \n", | |
" try:\n", | |
" return FREQUENCY[word]\n", | |
" except KeyError:\n", | |
" return 0\n", | |
"\n", | |
"def correction(word): \n", | |
" \"Most probable spelling correction for word.\"\n", | |
" return max(candidates(word), key=freq)\n", | |
"\n", | |
"def candidates(word): \n", | |
" \"Generate possible spelling corrections for word.\"\n", | |
" return (known([word]) or known(edits1(word)) or known(edits2(word)) or [word])\n", | |
"\n", | |
"def known(words): \n", | |
" \"The subset of `words` that appear in the dictionary of WORDS.\"\n", | |
" return set(w for w in words if w in VALID)\n", | |
"\n", | |
"def edits1(word):\n", | |
" \"All edits that are one edit away from `word`.\"\n", | |
" letters = 'abcdefghijklmnopqrstuvwxyzążśźęćńół'\n", | |
" splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]\n", | |
" deletes = [L + R[1:] for L, R in splits if R]\n", | |
" transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]\n", | |
" replaces = [L + c + R[1:] for L, R in splits if R for c in letters]\n", | |
" inserts = [L + c + R for L, R in splits for c in letters]\n", | |
" return set(deletes + transposes + replaces + inserts)\n", | |
"\n", | |
"def edits2(word): \n", | |
" \"All edits that are two edits away from `word`.\"\n", | |
" return (e2 for e1 in edits1(word) for e2 in edits1(e1))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'sie'" | |
] | |
}, | |
"execution_count": 64, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"correction('sie')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"False" | |
] | |
}, | |
"execution_count": 52, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"\".a\".isalpha()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def decapitalize(word):\n", | |
" return word[0:1].lower() + word[1:]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def most_popular_tuple(list_of_stems, frequency):\n", | |
" best_key, best_count = None, 0\n", | |
" for word in list_of_stems:\n", | |
" try:\n", | |
" current_count = frequency[word]\n", | |
" if(current_count > best_count):\n", | |
" best_key = word\n", | |
" best_count = current_count\n", | |
" except KeyError:\n", | |
" if(best_key == None):\n", | |
" best_key = word\n", | |
" return best_key" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def find_stems(word, stemmer, parser):\n", | |
" result = stemmer.stem([word], parser)\n", | |
" if(word.istitle()):\n", | |
" result += stemmer.stem([decapitalize(word)], parser)\n", | |
" stems = []\n", | |
" for tuple in result:\n", | |
" stems += list(tuple[1].keys())\n", | |
" return stems" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 75, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def select_stem(word, stemmer, parser, frequency):\n", | |
" stems = find_stems(word, stemmer, parser)\n", | |
" if(len(stems) == 0 and word.isalpha()):\n", | |
" stems = find_stems(correction(word), stemmer, parser)\n", | |
" if(len(stems) == 0):\n", | |
" return [word, word]\n", | |
" else:\n", | |
" return [word, most_popular_tuple(stems, frequency)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 152, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"DIGIT_RE = re.compile('^\\d+$')\n", | |
"NONLETTER_RE = re.compile('[\\W\\d_][\\w]|[\\w][\\W\\d_]')\n", | |
"\n", | |
"\n", | |
"def convert(line, stemmer, parser, frequency):\n", | |
" words = word_tokenize(line)\n", | |
" result = []\n", | |
" for word in words:\n", | |
" if DIGIT_RE.match(word):\n", | |
" result.append([word, word, f\"<number length='{len(word)}'/>\"])\n", | |
" elif NONLETTER_RE.search(word):\n", | |
" result.append([word, word, f\"<identifer length='{len(word)}'/>\"])\n", | |
" else:\n", | |
" word, base = select_stem(word, stemmer, parser, frequency)\n", | |
" if(word in first_names and word[0].isupper()):\n", | |
" result.append([word, base, f\"<first-name length='{len(word)}'/>\"])\n", | |
" elif(word in surnames and word[0].isupper()):\n", | |
" result.append([word, base, f\"<last-name length='{len(word)}'/>\"])\n", | |
" else:\n", | |
" result.append([word, base, \"\"])\n", | |
" return result" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 153, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[['1000', '1000', \"<number length='4'/>\"],\n", | |
" ['Alicja', 'Alicja', \"<first-name length='6'/>\"],\n", | |
" ['Kowalska', 'kowalski', \"<last-name length='8'/>\"],\n", | |
" ['ma', 'mieć', ''],\n", | |
" ['kota', 'kota', ''],\n", | |
" ['na', 'na', ''],\n", | |
" ['11aa', '11aa', \"<identifer length='4'/>\"],\n", | |
" ['12323', '12323', \"<number length='5'/>\"],\n", | |
" ['polsko-polski', 'polsko-polski', \"<identifer length='13'/>\"]]" | |
] | |
}, | |
"execution_count": 153, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"convert(\"1000 Alicja Kowalska ma kota na 11aa 12323 polsko-polski\", stemmer, parser, dictionary)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 146, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<_sre.SRE_Match object; span=(1, 3), match='a-'>" | |
] | |
}, | |
"execution_count": 146, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"NONLETTER_RE.search(\"aa-a\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 115, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"NONLETTER_RE.match('11aa')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"stemmer = Morfologik()\n", | |
"parser = ListParser()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 154, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"---------- 1 ----------\n", | |
"[['Dzień', 'dzień', ''], ['dobry', 'dobry', ''], [',', ',', ''], ['Marta', 'Marta', \"<first-name length='5'/>\"], ['Osuchowska', 'osuchowski', \"<last-name length='10'/>\"], ['.', '.', ''], ['W', 'w', ''], ['czym', 'co', ''], ['mogę', 'móc', ''], ['pomóc', 'pomóc', ''], ['?', '?', '']]\n", | |
"[['Witam', 'witać', ''], [',', ',', ''], ['chialem', 'chmal', ''], ['sie', 'si', ''], ['dowiedziec', 'dowiedzieć', ''], ['jakie', 'jaki', ''], ['posiadacie', 'posiadać', ''], ['Państwo', 'państwo', ''], ['Karty', 'karty', ''], ['Kredytowe', 'kredytowy', '']]\n", | |
"[['Czy', 'czy', ''], ['jest', 'być', ''], ['Pan', 'pan', ''], ['klientem', 'klient', ''], ['banku', 'bank', ''], ['?', '?', '']]\n", | |
"[['Jeszcze', 'jeszcze', ''], ['nie', 'nie', ''], [',', ',', ''], ['ale', 'ale', ''], ['moze', 'może', ''], ['zostane', 'zostać', '']]\n", | |
"[['Czego', 'co', ''], ['Pan', 'pan', ''], ['oczekuję', 'oczekiwać', ''], ['od', 'od', ''], ['karty', 'karty', ''], ['kredytowej', 'kredytowy', ''], ['?', '?', ''], ['W', 'w', ''], ['naszej', 'nasz', ''], ['ofercie', 'oferta', ''], ['jest', 'być', ''], ['ich', 'on', ''], ['kilka', 'kilka', '']]\n", | |
"[['interesuje', 'interesować', ''], ['mnie', 'ja', ''], ['karta', 'karta', ''], ['kredytowa', 'kredytowy', ''], [',', ',', ''], ['ktora', 'który', ''], ['bede', 'bebe', ''], ['mogl', 'móc', ''], ['uzywac', 'używać', ''], ['do', 'do', ''], ['podrozy', 'podroby', ''], ['sluzbowych', 'służbowy', '']]\n", | |
"[['pytanie', 'pytanie', ''], ['czy', 'czy', ''], ['trzeba', 'trzeba', ''], ['miec', 'mieć', ''], ['rachunek', 'rachunek', ''], ['firmowy', 'firmowy', ''], ['wtedy', 'wtedy', ''], ['u', 'u', ''], ['Państwa', 'państwo', ''], ['?', '?', '']]\n", | |
"[['Czy', 'czy', ''], ['pana', 'pan', ''], ['interesuje', 'interesować', ''], ['karta', 'karta', ''], ['firmowa', 'firmowy', ''], ['czy', 'czy', ''], ['osobista', 'osobisty', ''], ['kredytowa', 'kredytowy', ''], ['?', '?', '']]\n", | |
"[['No', 'no', ''], ['wlasnie', 'wlać', ''], ['tego', 'to', ''], ['do', 'do', ''], ['konca', 'koniec', ''], ['nie', 'nie', ''], ['wiem', 'wiedzieć', ''], [',', ',', ''], ['ale', 'ale', ''], ['chyba', 'chyba', ''], ['infywidualna', 'indywidualny', '']]\n", | |
"[['bo', 'bo', ''], ['rozumiem', 'rozumieć', ''], [',', ',', ''], ['ze', 'z', ''], ['nie', 'nie', ''], ['potrzebuje', 'potrzebować', ''], ['posiadac', 'posiadać', ''], ['rachunku', 'rachunek', ''], ['firmowego', 'firmowy', ''], ['wtedy', 'wtedy', '']]\n", | |
"[['Nie', 'nie', ''], ['jest', 'być', ''], ['konieczne', 'konieczny', ''], ['posiadanie', 'posiadać', ''], ['rachunku', 'rachunek', ''], ['w', 'w', ''], ['mBanku', 'mBank', ''], [',', ',', ''], ['aby', 'aby', ''], ['o', 'o', ''], ['nią', 'on', ''], ['wnioskować', 'wnioskować', ''], ['w', 'w', ''], ['żadnym', 'żaden', ''], ['z', 'z', ''], ['tych', 'ten', ''], ['przypadków', 'przypadek', ''], ['.', '.', '']]\n", | |
"[['Pytam', 'pytać', ''], [',', ',', ''], ['ponieważ', 'ponieważ', ''], ['mogę', 'móc', ''], ['Panu', 'pan', ''], ['zaproponować', 'zaproponować', ''], ['ciekawą', 'ciekawy', ''], ['ofertę', 'oferta', ''], [',', ',', ''], ['dla', 'dla', ''], ['kart', 'karty', ''], ['indywidualnych', 'indywidualny', ''], ['kredytowych', 'kredytowy', ''], ['.', '.', ''], ['Jaki', 'jak', ''], ['limit', 'limit', ''], ['Pana', 'pan', ''], ['interesuję', 'interesować', ''], ['?', '?', '']]\n", | |
"[['dobrze', 'dobrze', ''], [',', ',', ''], ['no', 'no', ''], ['mysle', 'mydło', ''], ['ze', 'z', ''], ['20', '20', \"<number length='2'/>\"], ['000', '000', \"<number length='3'/>\"], ['PLN', 'LN', '']]\n", | |
"[['Bylby', 'ryba', ''], ['ok', 'około', '']]\n", | |
"[['Zatem', 'zatem', ''], ['proponuję', 'proponować', ''], ['Panu', 'pan', ''], ['kartę', 'karta', ''], ['MasterCard', 'MasterCard', ''], ['Me', 'mój', ''], ['.', '.', ''], ['Jest', 'być', ''], ['to', 'to', ''], ['nazwa', 'nazwa', ''], ['karty', 'karty', ''], ['kredytowej', 'kredytowy', ''], ['.', '.', ''], ['Za', 'za', ''], ['pomocą', 'pomoc', ''], ['ponizszego', 'poniższy', ''], ['linku', 'link', ''], [',', ',', ''], ['przystępując', 'przystępować', ''], ['do', 'do', ''], ['promocji', 'promocja', ''], ['nie', 'nie', ''], ['zapłąci', 'zapłacić', ''], ['Pan', 'pan', ''], ['za', 'za', ''], ['jej', 'jej', ''], ['przyznanie', 'przyznać', ''], ['oraz', 'oraz', ''], ['wydanie', 'wydanie', ''], ['.', '.', ''], ['Dodatkowo', 'dodatkowo', ''], ['moze', 'może', ''], ['Pan', 'pan', ''], ['wybrać', 'wybrać', ''], ['własną', 'własny', ''], ['grafikę', 'grafika', ''], ['.', '.', '']]\n", | |
"[['Przesyłam', 'przesyłać', ''], ['link', 'link', ''], ['do', 'do', ''], ['wniosku', 'wniosek', ''], ['Można', 'można', ''], ['go', 'go', ''], ['zapisać', 'zapisać', ''], [',', ',', ''], ['będzie', 'być', ''], ['on', 'on', ''], ['również', 'również', ''], ['aktywny', 'aktywny', ''], ['po', 'po', ''], ['zakończeniu', 'zakończenie', ''], ['rozmowy', 'rozmowy', ''], ['–', '–', ''], ['link', 'link', ''], ['Karta', 'karta', ''], ['kredytowa', 'kredytowy', '']]\n", | |
"[['Czyli', 'czyli', ''], ['nie', 'nie', ''], ['bedac', 'badanie', ''], ['nominalnie', 'nominalnie', ''], ['klientem', 'klient', ''], ['banku', 'bank', ''], ['moge', 'może', ''], ['miec', 'mieć', ''], ['karte', 'wart', ''], ['?', '?', '']]\n", | |
"[['Czy', 'czy', ''], ['zakladamy', 'zakładać', ''], ['mi', 'mi', ''], ['konto', 'konto', ''], ['i', 'i', ''], ['dostęp', 'dostęp', ''], ['do', 'do', ''], ['Bankowości', 'bankowość', ''], ['Internetowej', 'internetowy', ''], ['?', '?', '']]\n", | |
"[['Tak', 'tak', ''], [',', ',', ''], ['oczywiście', 'oczywiście', ''], ['.', '.', ''], ['Będzie', 'być', ''], ['Pan', 'pan', ''], ['posiadał', 'posiadać', ''], ['kartę', 'karta', ''], ['i', 'i', ''], ['dostęp', 'dostęp', ''], ['do', 'do', ''], ['niej', 'on', ''], ['przez', 'przez', ''], ['internet', 'internet', ''], ['.', '.', '']]\n", | |
"[['Czyli', 'czyli', ''], ['musze', 'musza', ''], ['wypełnić', 'wypełnić', ''], ['ten', 'ten', ''], ['formularz', 'formularz', ''], ['i', 'i', ''], ['to', 'to', ''], ['wszystko', 'wszystko', ''], ['?', '?', '']]\n", | |
"[['Tak', 'tak', ''], [',', ',', ''], ['zgadza', 'zgadzać', ''], ['się', 'się', ''], ['.', '.', '']]\n", | |
"[['Proszę', 'prosić', ''], ['go', 'go', ''], ['wypełnić', 'wypełnić', ''], ['teraz-sprawdzę', 'teraz-sprawdzę', \"<identifer length='14'/>\"], ['jego', 'on', ''], ['poprawność', 'poprawność', ''], ['dla', 'dla', ''], ['Pana', 'pan', ''], ['.', '.', '']]\n", | |
"[['Raczej', 'raczej', ''], ['zrobie', 'zrób', ''], ['to', 'to', ''], ['w', 'w', ''], ['domu', 'dom', ''], ['wieczorem', 'wieczór', ''], [',', ',', ''], ['ale', 'ale', ''], ['wiem', 'wiedzieć', ''], ['jak', 'jak', ''], ['to', 'to', ''], ['dziala', 'działać', ''], [',', ',', ''], ['czyli', 'czyli', ''], ['zadzwonie', 'zadzwonić', ''], ['na', 'na', ''], ['eksperta', 'ekspert', ''], ['(', '(', ''], ['chatujac', 'chatować', ''], [')', ')', ''], ['i', 'i', ''], ['przesle', 'przesłać', ''], ['ten', 'ten', ''], ['formularz', 'formularz', '']]\n", | |
"[['bardzo', 'bardzo', ''], ['dziękuje', 'dziękować', ''], ['za', 'za', ''], ['pomoc', 'pomoc', '']]\n", | |
"[['aha', 'aha', ''], [',', ',', ''], ['a', 'a', ''], ['czy', 'czy', ''], ['ta', 'ten', ''], ['karta', 'karta', ''], ['ma', 'mieć', ''], ['jakis', 'jaki', ''], ['termin', 'termin', ''], ['splaty', 'spłata', ''], ['?', '?', '']]\n", | |
"[['Czas', 'czas', ''], ['bezodsetkowy', 'bezodsetkowy', ''], ['to', 'to', ''], ['54', '54', \"<number length='2'/>\"], ['dni', 'dzień', ''], ['.', '.', ''], ['Proszę', 'prosić', ''], ['zapisać', 'zapisać', ''], ['link', 'link', ''], ['i', 'i', ''], ['z', 'z', ''], ['niego', 'on', ''], ['skorzystać', 'skorzystać', ''], [',', ',', ''], ['aby', 'aby', ''], ['miał', 'miał', ''], ['Pan', 'pan', ''], ['zagwarantowane', 'zagwarantować', ''], ['opisane', 'opisać', ''], ['promocyjne', 'promocyjny', ''], ['warunki', 'warunki', ''], ['.', '.', '']]\n", | |
"[['Dziekuje', 'dziękować', ''], ['pięknie', 'pięknie', ''], ['za', 'za', ''], ['rozmowe', 'rozmowy', '']]\n", | |
"[['Ciesze', 'ciesać', ''], ['się', 'się', ''], [',', ',', ''], ['ze', 'z', ''], ['mogłam', 'móc', ''], ['pomóc', 'pomóc', '']]\n", | |
"[['Do', 'do', ''], ['uslyszenia', 'usłyszeć', '']]\n" | |
] | |
} | |
], | |
"source": [ | |
"for i in range(1):\n", | |
" print(f'---------- {i+1} ----------')\n", | |
" with open(f'/net/people/plgapohl/notebooks/data/ailleron-{i+1}.json') as f:\n", | |
" for line in f:\n", | |
" tuples = convert(json.loads(line)['content'], stemmer, parser, dictionary)\n", | |
" print(tuples)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'ła'" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"\"Łaaa\"[:2].lower()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment