Skip to content

Instantly share code, notes, and snippets.

@ischurov
Created February 9, 2022 13:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ischurov/3b31c3a0bfb44648a5c01354e73cd818 to your computer and use it in GitHub Desktop.
Save ischurov/3b31c3a0bfb44648a5c01354e73cd818 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "homeless-wisconsin",
"metadata": {},
"source": [
"## Наука о данных\n",
"### Совместный бакалавриат ВШЭ-РЭШ, 2021-2022 учебный год\n",
"_Илья Щуров_\n",
"\n",
"[Страница курса](http://math-info.hse.ru/s21/j)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "collected-september",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[[100, 2, 3], [10, 20, 30]]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"table = [[1, 2, 3],\n",
" [10, 20, 30]]\n",
"backup_table = table.copy()\n",
"table[0][0] = 100\n",
"backup_table"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "listed-termination",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting testfile.txt\n"
]
}
],
"source": [
"%%file testfile.txt\n",
"Hello, world!\n",
"This is a test."
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "pleasant-recruitment",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class '_io.TextIOWrapper'>\n",
"Hello, world!\n",
"\n",
"This is a test.\n",
"\n"
]
}
],
"source": [
"f = open(\"testfile.txt\")\n",
"print(type(f))\n",
"for line in f:\n",
" print(line)\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "sublime-anxiety",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Hello, world!\\n', 'This is a test.\\n']\n"
]
}
],
"source": [
"f = open(\"testfile.txt\")\n",
"print(list(f))\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "united-dodge",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class '_io.TextIOWrapper'>\n",
"Hello, world!\n",
"This is a test.\n"
]
}
],
"source": [
"f = open(\"testfile.txt\")\n",
"for line in f:\n",
" print(line, end=\"\")\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "international-scale",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hello, world!\n",
"This is a test.\n"
]
}
],
"source": [
"f = open(\"testfile.txt\")\n",
"for line in f:\n",
" print(line.rstrip())\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "indian-compound",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "I/O operation on closed file.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/var/folders/h2/9nyrt4p55kq6pdvqg02_zmj40000gn/T/ipykernel_32950/3271805783.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mline\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: I/O operation on closed file."
]
}
],
"source": [
"for line in f:\n",
" print(line)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "surprising-label",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hello, world!\n",
"This is a test.\n",
"Inside with\n",
"Is file closed? False\n",
"Outwide of with\n",
"Is file closed? True\n"
]
}
],
"source": [
"with open(\"testfile.txt\") as f:\n",
" for line in f:\n",
" print(line.rstrip())\n",
" print(\"Inside with\")\n",
" print(\"Is file closed?\", f.closed)\n",
"print(\"Outwide of with\")\n",
"print(\"Is file closed?\", f.closed)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "coastal-economy",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hello, world!\n",
"This is a test.\n",
"Once again\n"
]
}
],
"source": [
"with open(\"testfile.txt\") as f:\n",
" for line in f:\n",
" print(line.rstrip())\n",
" print(\"Once again\")\n",
" for line in f:\n",
" print(line.rstrip())"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "congressional-circular",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hello, world!\n",
"This is a test.\n",
"Once again\n",
"Hello, world!\n",
"This is a test.\n"
]
}
],
"source": [
"with open(\"testfile.txt\") as f:\n",
" for line in f:\n",
" print(line.rstrip())\n",
" print(\"Once again\")\n",
" f.seek(0)\n",
" for line in f:\n",
" print(line.rstrip())"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "equipped-affair",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hello, world!\n",
"This is a test.\n",
"Once again\n",
"ello, world!\n",
"This is a test.\n"
]
}
],
"source": [
"with open(\"testfile.txt\") as f:\n",
" for line in f:\n",
" print(line.rstrip())\n",
" print(\"Once again\")\n",
" f.seek(1)\n",
" for line in f:\n",
" print(line.rstrip())"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "relative-memorabilia",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Writing cyrillic.txt\n"
]
}
],
"source": [
"%%file cyrillic.txt\n",
"Доброе утро, страна!\n",
"Говорит Москва!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "radical-lying",
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"Здравствуй, страна!\n",
"Once again\n",
"Здравствуй, страна!\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "rational-olive",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Доброе утро, страна!\n",
"Говорит Москва!\n",
"Once again\n",
"оброе утро, страна!\n",
"Говорит Москва!\n"
]
}
],
"source": [
"with open(\"cyrillic.txt\", encoding='utf-8') as f:\n",
" for line in f:\n",
" print(line.rstrip())\n",
" print(\"Once again\")\n",
" f.seek(2)\n",
" for line in f:\n",
" print(line.rstrip())"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "falling-regulation",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"b'\\xd0\\x94\\xd0\\xbe\\xd0\\xb1\\xd1\\x80\\xd0\\xbe\\xd0\\xb5 \\xd1\\x83\\xd1\\x82\\xd1\\x80\\xd0\\xbe'"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"Доброе утро\".encode('utf-8')"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "strong-telling",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"b'\\xc4\\xee\\xe1\\xf0\\xee\\xe5 \\xf3\\xf2\\xf0\\xee'"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"Доброе утро\".encode('CP1251')"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "lasting-prediction",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Доброе утро'"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"Доброе утро\".encode('utf-8').decode('cp1251')"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "polished-powder",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Äîáðîå óòðî'"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"Доброе утро\".encode('cp1251').decode('latin-1')"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "frank-sheriff",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Доброе утро'"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'Äîáðîå óòðî'.encode('latin-1').decode('cp1251')"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "hawaiian-conference",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"b'Hello, world!'"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b'Hello, world!'"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "every-maple",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Этот файл создан искусственным интеллектом\n",
"\n",
"Его удаление приведёт к краху цивилизации\n",
"\n"
]
}
],
"source": [
"with open(\"newfile.txt\", \"w\") as f:\n",
" print(\"Этот файл создан искусственным интеллектом\", file=f)\n",
" print(\"Его удаление приведёт к краху цивилизации\", file=f)\n",
"\n",
"with open(\"newfile.txt\", encoding='utf-8') as f:\n",
" for line in f:\n",
" print(line)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "necessary-tamil",
"metadata": {},
"outputs": [],
"source": [
"f = open(\"anotherfile.txt\", \"w\")\n",
"print(\"Hello, world!\", file=f)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "considerable-costume",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f.closed"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "identified-ferry",
"metadata": {},
"outputs": [],
"source": [
"f.flush()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "liberal-amount",
"metadata": {},
"outputs": [],
"source": [
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "occasional-seeking",
"metadata": {},
"outputs": [],
"source": [
"with open(\"anotherfile.txt\", \"w\") as f:\n",
" print(\"Hello, world!\", file=f)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "modified-speaker",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Этот файл создан искусственным интеллектом\n",
"\n",
"Его удаление приведёт к краху цивилизации\n",
"\n",
"α² + β² = γ²\n",
"\n"
]
}
],
"source": [
"with open(\"newfile.txt\", \"w\", encoding='UTF-8') as f:\n",
" print(\"Этот файл создан искусственным интеллектом\", file=f)\n",
" print(\"Его удаление приведёт к краху цивилизации\", file=f)\n",
" print(\"α² + β² = γ²\", file=f)\n",
"\n",
"with open(\"newfile.txt\", encoding='UTF-8') as f:\n",
" for line in f:\n",
" print(line)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "focal-hybrid",
"metadata": {},
"outputs": [],
"source": [
"with open(\"newfile.txt\", \"w\", encoding='UTF-8') as f:\n",
" print(\"New content\", file=f)"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "prepared-variance",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"New content\n",
"\n"
]
}
],
"source": [
"with open(\"newfile.txt\") as f:\n",
" for line in f:\n",
" print(line)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "martial-evans",
"metadata": {},
"outputs": [],
"source": [
"with open(\"newfile.txt\", \"a\") as f:\n",
" print(\"More content\", file=f)"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "express-pakistan",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"New content\n",
"More content\n"
]
}
],
"source": [
"with open(\"newfile.txt\") as f:\n",
" for line in f:\n",
" print(line.rstrip())"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "legal-cutting",
"metadata": {},
"outputs": [],
"source": [
"with open(\"newfile.txt\") as f:\n",
" lines = f.readlines()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "central-planning",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['New content\\n', 'More content\\n']"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lines"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "vanilla-invalid",
"metadata": {},
"outputs": [],
"source": [
"with open(\"newfile.txt\") as f:\n",
" contents = f.read()"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "posted-woman",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'New content\\nMore content\\n'"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"contents"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "tutorial-remark",
"metadata": {},
"outputs": [],
"source": [
"import os"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "still-swing",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/Users/user/prj/svn.math-hse.info/repo/2021-22/nes-datascience'"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.getcwd()"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "demonstrated-collar",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "unknown-institution",
"metadata": {},
"outputs": [],
"source": [
"new_folder = Path(\"new_folder\")\n",
"new_folder.mkdir(exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "contained-ranking",
"metadata": {},
"outputs": [],
"source": [
"with open(new_folder / \"newfile.txt\", \"w\") as f:\n",
" print(\"New file\", file=f)\n",
" print(\"In new folder\", file=f)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "lesbian-channels",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"PosixPath('new_folder/newfile.txt')"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_folder / \"newfile.txt\""
]
},
{
"cell_type": "code",
"execution_count": 91,
"id": "jewish-twist",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"PosixPath('new_folder/../newfile.txt')"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_folder / \"..\" / \"newfile.txt\""
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "played-invite",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"New content\n",
"More content\n"
]
}
],
"source": [
"with open(new_folder / \"..\" / \"newfile.txt\") as f:\n",
" for line in f:\n",
" print(line.rstrip())"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "polyphonic-sustainability",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'New content\\nMore content\\n'"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Path(\"newfile.txt\").read_text()"
]
},
{
"cell_type": "code",
"execution_count": 96,
"id": "hybrid-rates",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"new_folder\n",
"rsconnect\n",
"first-lesson.Rmd\n",
"assignments\n",
".DS_Store\n",
"var1_mehweb.csv\n",
"Untitled.ipynb\n",
"second-lesson.html\n",
"anotherfile.txt\n",
"cyrillic.txt\n",
"first-lesson.html\n",
"second-class.nb.html\n",
"Lesson02.ipynb\n",
"first-class.nb.html\n",
"newfile.txt\n",
"Lesson04.ipynb\n",
"Lesson06.ipynb\n",
"second-lesson.Rmd\n",
".ipynb_checkpoints\n",
"Lesson01.ipynb\n",
"Lesson03.ipynb\n",
"testfile.txt\n",
"ps01.ipynb\n",
"second-lesson.nb.html\n",
"Lesson07.ipynb\n",
"Lesson05.ipynb\n"
]
}
],
"source": [
"for file in Path(\"\").iterdir():\n",
" print(file)"
]
},
{
"cell_type": "code",
"execution_count": 101,
"id": "smaller-wales",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Lesson02.ipynb\n",
"Lesson04.ipynb\n",
"Lesson06.ipynb\n",
"Lesson01.ipynb\n",
"Lesson03.ipynb\n",
"Lesson07.ipynb\n",
"Lesson05.ipynb\n"
]
}
],
"source": [
"for file in Path(\"\").glob(\"Lesson??.ipynb\"):\n",
" print(file)"
]
},
{
"cell_type": "code",
"execution_count": 103,
"id": "mediterranean-friendship",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[PosixPath('/../home'),\n",
" PosixPath('/../usr'),\n",
" PosixPath('/../.DS_Store'),\n",
" PosixPath('/../.PKInstallSandboxManager-SystemSoftware'),\n",
" PosixPath('/../bin'),\n",
" PosixPath('/../sbin'),\n",
" PosixPath('/../.file'),\n",
" PosixPath('/../etc'),\n",
" PosixPath('/../var'),\n",
" PosixPath('/../Library'),\n",
" PosixPath('/../System'),\n",
" PosixPath('/../.VolumeIcon.icns'),\n",
" PosixPath('/../.fseventsd'),\n",
" PosixPath('/../private'),\n",
" PosixPath('/../.vol'),\n",
" PosixPath('/../Users'),\n",
" PosixPath('/../Applications'),\n",
" PosixPath('/../opt'),\n",
" PosixPath('/../dev'),\n",
" PosixPath('/../Volumes'),\n",
" PosixPath('/../tmp'),\n",
" PosixPath('/../cores')]"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(Path(\"/../\").iterdir())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10",
"language": "python",
"name": "py3.10"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment