Skip to content

Instantly share code, notes, and snippets.

@mols3131d
Last active May 1, 2024 22:10
Show Gist options
  • Save mols3131d/0d682ac8d8579ac974ee9041152f9459 to your computer and use it in GitHub Desktop.
Save mols3131d/0d682ac8d8579ac974ee9041152f9459 to your computer and use it in GitHub Desktop.
openpyxl
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# openpyxl\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## [Tutorial](https://openpyxl.readthedocs.io/en/stable/tutorial.html#tutorial)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### [Create a workbook](https://openpyxl.readthedocs.io/en/stable/tutorial.html#create-a-workbook)\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<openpyxl.workbook.workbook.Workbook at 0x1e1f579b8d0>"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from openpyxl import Workbook\n",
"\n",
"wb = Workbook()\n",
"wb"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Worksheet\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Worksheet \"Sheet\">"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ws = wb.active\n",
"ws"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Worksheet \"Mysheet\">"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ws = wb.create_sheet(\"Mysheet\", 0)\n",
"ws"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### title\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Worksheet \"New Title\">"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ws.title = \"New Title\"\n",
"ws"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Worksheet \"New Title\">"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ws = wb[\"New Title\"]\n",
"ws"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<Worksheet \"New Title\">, <Worksheet \"Sheet\">]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wb.worksheets"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### [Playing with data](https://openpyxl.readthedocs.io/en/stable/tutorial.html#playing-with-data)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### [Accessing one cell](https://openpyxl.readthedocs.io/en/stable/tutorial.html#accessing-one-cell)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"c = ws[\"A4\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ws[\"A4\"] = 4"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"d = ws.cell(row=4, column=2, value=10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### [Accessing many cells](https://openpyxl.readthedocs.io/en/stable/tutorial.html#accessing-many-cells)\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((<Cell 'New Title'.A1>, <Cell 'New Title'.B1>, <Cell 'New Title'.C1>),\n",
" (<Cell 'New Title'.A2>, <Cell 'New Title'.B2>, <Cell 'New Title'.C2>))"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cell_range = ws[\"A1\":\"C2\"]\n",
"cell_range"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"colC = ws[\"C\"]\n",
"col_range = ws[\"C:D\"]\n",
"row10 = ws[10]\n",
"row_range = ws[5:10]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Cell 'New Title'.A1>\n",
"<Cell 'New Title'.B1>\n",
"<Cell 'New Title'.C1>\n",
"<Cell 'New Title'.A2>\n",
"<Cell 'New Title'.B2>\n",
"<Cell 'New Title'.C2>\n"
]
}
],
"source": [
"for row in ws.iter_rows(min_row=1, max_col=3, max_row=2):\n",
" for cell in row:\n",
" print(cell)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Cell 'New Title'.A1>\n",
"<Cell 'New Title'.A2>\n",
"<Cell 'New Title'.B1>\n",
"<Cell 'New Title'.B2>\n",
"<Cell 'New Title'.C1>\n",
"<Cell 'New Title'.C2>\n"
]
}
],
"source": [
"for col in ws.iter_cols(min_row=1, max_col=3, max_row=2):\n",
" for cell in col:\n",
" print(cell)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((<Cell 'New Title'.A1>,\n",
" <Cell 'New Title'.B1>,\n",
" <Cell 'New Title'.C1>,\n",
" <Cell 'New Title'.D1>),\n",
" (<Cell 'New Title'.A2>,\n",
" <Cell 'New Title'.B2>,\n",
" <Cell 'New Title'.C2>,\n",
" <Cell 'New Title'.D2>),\n",
" (<Cell 'New Title'.A3>,\n",
" <Cell 'New Title'.B3>,\n",
" <Cell 'New Title'.C3>,\n",
" <Cell 'New Title'.D3>),\n",
" (<Cell 'New Title'.A4>,\n",
" <Cell 'New Title'.B4>,\n",
" <Cell 'New Title'.C4>,\n",
" <Cell 'New Title'.D4>),\n",
" (<Cell 'New Title'.A5>,\n",
" <Cell 'New Title'.B5>,\n",
" <Cell 'New Title'.C5>,\n",
" <Cell 'New Title'.D5>),\n",
" (<Cell 'New Title'.A6>,\n",
" <Cell 'New Title'.B6>,\n",
" <Cell 'New Title'.C6>,\n",
" <Cell 'New Title'.D6>),\n",
" (<Cell 'New Title'.A7>,\n",
" <Cell 'New Title'.B7>,\n",
" <Cell 'New Title'.C7>,\n",
" <Cell 'New Title'.D7>),\n",
" (<Cell 'New Title'.A8>,\n",
" <Cell 'New Title'.B8>,\n",
" <Cell 'New Title'.C8>,\n",
" <Cell 'New Title'.D8>),\n",
" (<Cell 'New Title'.A9>,\n",
" <Cell 'New Title'.B9>,\n",
" <Cell 'New Title'.C9>,\n",
" <Cell 'New Title'.D9>),\n",
" (<Cell 'New Title'.A10>,\n",
" <Cell 'New Title'.B10>,\n",
" <Cell 'New Title'.C10>,\n",
" <Cell 'New Title'.D10>))"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tuple(ws.rows)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((<Cell 'New Title'.A1>,\n",
" <Cell 'New Title'.A2>,\n",
" <Cell 'New Title'.A3>,\n",
" <Cell 'New Title'.A4>,\n",
" <Cell 'New Title'.A5>,\n",
" <Cell 'New Title'.A6>,\n",
" <Cell 'New Title'.A7>,\n",
" <Cell 'New Title'.A8>,\n",
" <Cell 'New Title'.A9>,\n",
" <Cell 'New Title'.A10>),\n",
" (<Cell 'New Title'.B1>,\n",
" <Cell 'New Title'.B2>,\n",
" <Cell 'New Title'.B3>,\n",
" <Cell 'New Title'.B4>,\n",
" <Cell 'New Title'.B5>,\n",
" <Cell 'New Title'.B6>,\n",
" <Cell 'New Title'.B7>,\n",
" <Cell 'New Title'.B8>,\n",
" <Cell 'New Title'.B9>,\n",
" <Cell 'New Title'.B10>),\n",
" (<Cell 'New Title'.C1>,\n",
" <Cell 'New Title'.C2>,\n",
" <Cell 'New Title'.C3>,\n",
" <Cell 'New Title'.C4>,\n",
" <Cell 'New Title'.C5>,\n",
" <Cell 'New Title'.C6>,\n",
" <Cell 'New Title'.C7>,\n",
" <Cell 'New Title'.C8>,\n",
" <Cell 'New Title'.C9>,\n",
" <Cell 'New Title'.C10>),\n",
" (<Cell 'New Title'.D1>,\n",
" <Cell 'New Title'.D2>,\n",
" <Cell 'New Title'.D3>,\n",
" <Cell 'New Title'.D4>,\n",
" <Cell 'New Title'.D5>,\n",
" <Cell 'New Title'.D6>,\n",
" <Cell 'New Title'.D7>,\n",
" <Cell 'New Title'.D8>,\n",
" <Cell 'New Title'.D9>,\n",
" <Cell 'New Title'.D10>))"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tuple(ws.columns)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Values only\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n",
"None\n"
]
}
],
"source": [
"for row in ws.values:\n",
" for value in row:\n",
" print(value)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(None, None, None)\n",
"(None, None, None)\n"
]
}
],
"source": [
"for row in ws.iter_rows(min_row=1, max_col=3, max_row=2, values_only=True):\n",
" print(row)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### [Data storage](https://openpyxl.readthedocs.io/en/stable/tutorial.html#data-storage)\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"hello, world\n"
]
}
],
"source": [
"c = ws[\"C1\"]\n",
"c.value = \"hello, world\"\n",
"print(c.value)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3.14\n"
]
}
],
"source": [
"d = ws[\"D1\"]\n",
"d.value = 3.14\n",
"print(d.value)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Saving to a file\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"wb.save(\"openpyxl_practice.xlsx\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Saving as a stream\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from tempfile import NamedTemporaryFile\n",
"from openpyxl import Workbook\n",
"\n",
"wb = Workbook()\n",
"with NamedTemporaryFile() as tmp:\n",
" wb.save(tmp.name)\n",
" tmp.seek(0)\n",
" stream = tmp.read()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### [Loading from a file](https://openpyxl.readthedocs.io/en/stable/tutorial.html#loading-from-a-file)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from openpyxl import load_workbook\n",
"\n",
"wb = load_workbook(filename=\"empty_book.xlsx\")\n",
"sheet_ranges = wb[\"range names\"]\n",
"print(sheet_ranges[\"D18\"].value)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Working with Images\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Inserting an image\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from openpyxl import Workbook\n",
"from openpyxl.drawing.image import Image\n",
"\n",
"wb = Workbook()\n",
"ws = wb.active\n",
"ws[\"A1\"] = \"You should see three logos below\"\n",
"\n",
"# create an image\n",
"img = Image(\"logo.png\")\n",
"\n",
"# add to worksheet and anchor next to cells\n",
"ws.add_image(img, \"A1\")\n",
"wb.save(\"logo.xlsx\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Snippet\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### worksheet_append_dict\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def worksheet_append_dict(worksheet, data: dict):\n",
" headers = data.keys()\n",
" worksheet.append(headers)\n",
"\n",
" for i in range(len(data[headers[0]])):\n",
" row_data = [data[header][i] for header in headers]\n",
" worksheet.append(row_data)\n",
"\n",
" return worksheet"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### auto_fit_column_size\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from PIL import ImageFont\n",
"\n",
"\n",
"def _get_text_width_pixels(text, font_size=11, font_name=\"arial.ttf\") -> float:\n",
" \"\"\"\n",
" Get the width of the text in pixels for a specific font and size.\n",
"\n",
" Args:\n",
" - text (str): The text whose width needs to be calculated.\n",
" - font_size (int, optional): The font size. Defaults to 11.\n",
" - font_name (str, optional): The name of the font file. Defaults to \"arial.ttf\".\n",
"\n",
" Returns:\n",
" - float: The width of the text in pixels.\n",
" \"\"\"\n",
"\n",
" font = ImageFont.truetype(font_name, font_size)\n",
" width = font.getlength(text)\n",
"\n",
" return width\n",
"\n",
"\n",
"\n",
"def auto_fit_column_size(\n",
" worksheet, columns=None, margin=2, font_size=11, font_name=\"arial.ttf\"\n",
"):\n",
" \"\"\"\n",
" Automatically adjusts the width of columns in the worksheet to fit the content.\n",
"\n",
" Args:\n",
" - worksheet: The worksheet to adjust column sizes.\n",
" - columns (list, optional): A list of column indices to adjust. If None, adjusts all columns. Defaults to None.\n",
" - margin (int, optional): The margin to add to the maximum text width. Defaults to 2.\n",
" - font_size (int, optional): The font size to calculate text width. Defaults to 11.\n",
" - font_name (str, optional): The name of the font file. Defaults to \"arial.ttf\".\n",
"\n",
" Returns:\n",
" - _WorkbookSheet: The modified worksheet with adjusted column sizes.\n",
"\n",
" Reference:\n",
" - https://gaussian37.github.io/python-etc-openpyxl/\n",
" \"\"\"\n",
"\n",
" for i, column_cells in enumerate(worksheet.columns):\n",
" is_ok = False\n",
"\n",
" if columns == None:\n",
" is_ok = True\n",
"\n",
" elif isinstance(columns, list) and i in columns:\n",
" is_ok = True\n",
"\n",
" if is_ok:\n",
" max_text_width = max(\n",
" _get_text_width_pixels(str(cell.value), font_size, font_name)\n",
" for cell in column_cells\n",
" )\n",
" max_text_width = (max_text_width * 2 / 10) + margin\n",
"\n",
" # 컬럼 넓이를 텍스트의 가로 길이에 margin을 더한 값으로 설정\n",
" worksheet.column_dimensions[column_cells[0].column_letter].width = (\n",
" max_text_width\n",
" )\n",
"\n",
" return worksheet"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "crawler",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment