Last active
May 1, 2024 22:10
-
-
Save mols3131d/0d682ac8d8579ac974ee9041152f9459 to your computer and use it in GitHub Desktop.
openpyxl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# openpyxl\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## [Tutorial](https://openpyxl.readthedocs.io/en/stable/tutorial.html#tutorial)\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### [Create a workbook](https://openpyxl.readthedocs.io/en/stable/tutorial.html#create-a-workbook)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<openpyxl.workbook.workbook.Workbook at 0x1e1f579b8d0>" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from openpyxl import Workbook\n", | |
"\n", | |
"wb = Workbook()\n", | |
"wb" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Worksheet\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<Worksheet \"Sheet\">" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ws = wb.active\n", | |
"ws" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<Worksheet \"Mysheet\">" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ws = wb.create_sheet(\"Mysheet\", 0)\n", | |
"ws" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### title\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<Worksheet \"New Title\">" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ws.title = \"New Title\"\n", | |
"ws" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<Worksheet \"New Title\">" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ws = wb[\"New Title\"]\n", | |
"ws" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[<Worksheet \"New Title\">, <Worksheet \"Sheet\">]" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wb.worksheets" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### [Playing with data](https://openpyxl.readthedocs.io/en/stable/tutorial.html#playing-with-data)\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### [Accessing one cell](https://openpyxl.readthedocs.io/en/stable/tutorial.html#accessing-one-cell)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"c = ws[\"A4\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ws[\"A4\"] = 4" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"d = ws.cell(row=4, column=2, value=10)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### [Accessing many cells](https://openpyxl.readthedocs.io/en/stable/tutorial.html#accessing-many-cells)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"((<Cell 'New Title'.A1>, <Cell 'New Title'.B1>, <Cell 'New Title'.C1>),\n", | |
" (<Cell 'New Title'.A2>, <Cell 'New Title'.B2>, <Cell 'New Title'.C2>))" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cell_range = ws[\"A1\":\"C2\"]\n", | |
"cell_range" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"colC = ws[\"C\"]\n", | |
"col_range = ws[\"C:D\"]\n", | |
"row10 = ws[10]\n", | |
"row_range = ws[5:10]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<Cell 'New Title'.A1>\n", | |
"<Cell 'New Title'.B1>\n", | |
"<Cell 'New Title'.C1>\n", | |
"<Cell 'New Title'.A2>\n", | |
"<Cell 'New Title'.B2>\n", | |
"<Cell 'New Title'.C2>\n" | |
] | |
} | |
], | |
"source": [ | |
"for row in ws.iter_rows(min_row=1, max_col=3, max_row=2):\n", | |
" for cell in row:\n", | |
" print(cell)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<Cell 'New Title'.A1>\n", | |
"<Cell 'New Title'.A2>\n", | |
"<Cell 'New Title'.B1>\n", | |
"<Cell 'New Title'.B2>\n", | |
"<Cell 'New Title'.C1>\n", | |
"<Cell 'New Title'.C2>\n" | |
] | |
} | |
], | |
"source": [ | |
"for col in ws.iter_cols(min_row=1, max_col=3, max_row=2):\n", | |
" for cell in col:\n", | |
" print(cell)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"((<Cell 'New Title'.A1>,\n", | |
" <Cell 'New Title'.B1>,\n", | |
" <Cell 'New Title'.C1>,\n", | |
" <Cell 'New Title'.D1>),\n", | |
" (<Cell 'New Title'.A2>,\n", | |
" <Cell 'New Title'.B2>,\n", | |
" <Cell 'New Title'.C2>,\n", | |
" <Cell 'New Title'.D2>),\n", | |
" (<Cell 'New Title'.A3>,\n", | |
" <Cell 'New Title'.B3>,\n", | |
" <Cell 'New Title'.C3>,\n", | |
" <Cell 'New Title'.D3>),\n", | |
" (<Cell 'New Title'.A4>,\n", | |
" <Cell 'New Title'.B4>,\n", | |
" <Cell 'New Title'.C4>,\n", | |
" <Cell 'New Title'.D4>),\n", | |
" (<Cell 'New Title'.A5>,\n", | |
" <Cell 'New Title'.B5>,\n", | |
" <Cell 'New Title'.C5>,\n", | |
" <Cell 'New Title'.D5>),\n", | |
" (<Cell 'New Title'.A6>,\n", | |
" <Cell 'New Title'.B6>,\n", | |
" <Cell 'New Title'.C6>,\n", | |
" <Cell 'New Title'.D6>),\n", | |
" (<Cell 'New Title'.A7>,\n", | |
" <Cell 'New Title'.B7>,\n", | |
" <Cell 'New Title'.C7>,\n", | |
" <Cell 'New Title'.D7>),\n", | |
" (<Cell 'New Title'.A8>,\n", | |
" <Cell 'New Title'.B8>,\n", | |
" <Cell 'New Title'.C8>,\n", | |
" <Cell 'New Title'.D8>),\n", | |
" (<Cell 'New Title'.A9>,\n", | |
" <Cell 'New Title'.B9>,\n", | |
" <Cell 'New Title'.C9>,\n", | |
" <Cell 'New Title'.D9>),\n", | |
" (<Cell 'New Title'.A10>,\n", | |
" <Cell 'New Title'.B10>,\n", | |
" <Cell 'New Title'.C10>,\n", | |
" <Cell 'New Title'.D10>))" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tuple(ws.rows)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"((<Cell 'New Title'.A1>,\n", | |
" <Cell 'New Title'.A2>,\n", | |
" <Cell 'New Title'.A3>,\n", | |
" <Cell 'New Title'.A4>,\n", | |
" <Cell 'New Title'.A5>,\n", | |
" <Cell 'New Title'.A6>,\n", | |
" <Cell 'New Title'.A7>,\n", | |
" <Cell 'New Title'.A8>,\n", | |
" <Cell 'New Title'.A9>,\n", | |
" <Cell 'New Title'.A10>),\n", | |
" (<Cell 'New Title'.B1>,\n", | |
" <Cell 'New Title'.B2>,\n", | |
" <Cell 'New Title'.B3>,\n", | |
" <Cell 'New Title'.B4>,\n", | |
" <Cell 'New Title'.B5>,\n", | |
" <Cell 'New Title'.B6>,\n", | |
" <Cell 'New Title'.B7>,\n", | |
" <Cell 'New Title'.B8>,\n", | |
" <Cell 'New Title'.B9>,\n", | |
" <Cell 'New Title'.B10>),\n", | |
" (<Cell 'New Title'.C1>,\n", | |
" <Cell 'New Title'.C2>,\n", | |
" <Cell 'New Title'.C3>,\n", | |
" <Cell 'New Title'.C4>,\n", | |
" <Cell 'New Title'.C5>,\n", | |
" <Cell 'New Title'.C6>,\n", | |
" <Cell 'New Title'.C7>,\n", | |
" <Cell 'New Title'.C8>,\n", | |
" <Cell 'New Title'.C9>,\n", | |
" <Cell 'New Title'.C10>),\n", | |
" (<Cell 'New Title'.D1>,\n", | |
" <Cell 'New Title'.D2>,\n", | |
" <Cell 'New Title'.D3>,\n", | |
" <Cell 'New Title'.D4>,\n", | |
" <Cell 'New Title'.D5>,\n", | |
" <Cell 'New Title'.D6>,\n", | |
" <Cell 'New Title'.D7>,\n", | |
" <Cell 'New Title'.D8>,\n", | |
" <Cell 'New Title'.D9>,\n", | |
" <Cell 'New Title'.D10>))" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tuple(ws.columns)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Values only\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n", | |
"None\n" | |
] | |
} | |
], | |
"source": [ | |
"for row in ws.values:\n", | |
" for value in row:\n", | |
" print(value)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(None, None, None)\n", | |
"(None, None, None)\n" | |
] | |
} | |
], | |
"source": [ | |
"for row in ws.iter_rows(min_row=1, max_col=3, max_row=2, values_only=True):\n", | |
" print(row)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### [Data storage](https://openpyxl.readthedocs.io/en/stable/tutorial.html#data-storage)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"hello, world\n" | |
] | |
} | |
], | |
"source": [ | |
"c = ws[\"C1\"]\n", | |
"c.value = \"hello, world\"\n", | |
"print(c.value)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"3.14\n" | |
] | |
} | |
], | |
"source": [ | |
"d = ws[\"D1\"]\n", | |
"d.value = 3.14\n", | |
"print(d.value)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Saving to a file\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"wb.save(\"openpyxl_practice.xlsx\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Saving as a stream\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from tempfile import NamedTemporaryFile\n", | |
"from openpyxl import Workbook\n", | |
"\n", | |
"wb = Workbook()\n", | |
"with NamedTemporaryFile() as tmp:\n", | |
" wb.save(tmp.name)\n", | |
" tmp.seek(0)\n", | |
" stream = tmp.read()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### [Loading from a file](https://openpyxl.readthedocs.io/en/stable/tutorial.html#loading-from-a-file)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from openpyxl import load_workbook\n", | |
"\n", | |
"wb = load_workbook(filename=\"empty_book.xlsx\")\n", | |
"sheet_ranges = wb[\"range names\"]\n", | |
"print(sheet_ranges[\"D18\"].value)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Working with Images\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Inserting an image\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from openpyxl import Workbook\n", | |
"from openpyxl.drawing.image import Image\n", | |
"\n", | |
"wb = Workbook()\n", | |
"ws = wb.active\n", | |
"ws[\"A1\"] = \"You should see three logos below\"\n", | |
"\n", | |
"# create an image\n", | |
"img = Image(\"logo.png\")\n", | |
"\n", | |
"# add to worksheet and anchor next to cells\n", | |
"ws.add_image(img, \"A1\")\n", | |
"wb.save(\"logo.xlsx\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Snippet\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### worksheet_append_dict\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def worksheet_append_dict(worksheet, data: dict):\n", | |
" headers = data.keys()\n", | |
" worksheet.append(headers)\n", | |
"\n", | |
" for i in range(len(data[headers[0]])):\n", | |
" row_data = [data[header][i] for header in headers]\n", | |
" worksheet.append(row_data)\n", | |
"\n", | |
" return worksheet" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### auto_fit_column_size\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from PIL import ImageFont\n", | |
"\n", | |
"\n", | |
"def _get_text_width_pixels(text, font_size=11, font_name=\"arial.ttf\") -> float:\n", | |
" \"\"\"\n", | |
" Get the width of the text in pixels for a specific font and size.\n", | |
"\n", | |
" Args:\n", | |
" - text (str): The text whose width needs to be calculated.\n", | |
" - font_size (int, optional): The font size. Defaults to 11.\n", | |
" - font_name (str, optional): The name of the font file. Defaults to \"arial.ttf\".\n", | |
"\n", | |
" Returns:\n", | |
" - float: The width of the text in pixels.\n", | |
" \"\"\"\n", | |
"\n", | |
" font = ImageFont.truetype(font_name, font_size)\n", | |
" width = font.getlength(text)\n", | |
"\n", | |
" return width\n", | |
"\n", | |
"\n", | |
"\n", | |
"def auto_fit_column_size(\n", | |
" worksheet, columns=None, margin=2, font_size=11, font_name=\"arial.ttf\"\n", | |
"):\n", | |
" \"\"\"\n", | |
" Automatically adjusts the width of columns in the worksheet to fit the content.\n", | |
"\n", | |
" Args:\n", | |
" - worksheet: The worksheet to adjust column sizes.\n", | |
" - columns (list, optional): A list of column indices to adjust. If None, adjusts all columns. Defaults to None.\n", | |
" - margin (int, optional): The margin to add to the maximum text width. Defaults to 2.\n", | |
" - font_size (int, optional): The font size to calculate text width. Defaults to 11.\n", | |
" - font_name (str, optional): The name of the font file. Defaults to \"arial.ttf\".\n", | |
"\n", | |
" Returns:\n", | |
" - _WorkbookSheet: The modified worksheet with adjusted column sizes.\n", | |
"\n", | |
" Reference:\n", | |
" - https://gaussian37.github.io/python-etc-openpyxl/\n", | |
" \"\"\"\n", | |
"\n", | |
" for i, column_cells in enumerate(worksheet.columns):\n", | |
" is_ok = False\n", | |
"\n", | |
" if columns == None:\n", | |
" is_ok = True\n", | |
"\n", | |
" elif isinstance(columns, list) and i in columns:\n", | |
" is_ok = True\n", | |
"\n", | |
" if is_ok:\n", | |
" max_text_width = max(\n", | |
" _get_text_width_pixels(str(cell.value), font_size, font_name)\n", | |
" for cell in column_cells\n", | |
" )\n", | |
" max_text_width = (max_text_width * 2 / 10) + margin\n", | |
"\n", | |
" # 컬럼 넓이를 텍스트의 가로 길이에 margin을 더한 값으로 설정\n", | |
" worksheet.column_dimensions[column_cells[0].column_letter].width = (\n", | |
" max_text_width\n", | |
" )\n", | |
"\n", | |
" return worksheet" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "crawler", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment