Last active
August 16, 2023 08:33
-
-
Save onlyforbopi/81af3995362f3743066f418825291785 to your computer and use it in GitHub Desktop.
Python.Excel.ExcelHandling #python #Python #Excel #excel #ExcelHandling #openpyxl #pandas #PythonModules
#modules Features: Openpyxl
pandas 1. Get sheets of workbook : workbooksheets.py (Openpyxl)
2. Iterate over wo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ########## | |
# CONVERTING BETWEEN COLUMN LETTERS (IE A2) AND NUMBERS IE (1, 2) | |
# Two methods : | |
# get_column_letter | |
# column_index_from_string | |
def xlsizerow(file_in, sheet_name): | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
return sheet.max_row | |
def xlsizecolumn(file_in, sheet_name): | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
return sheet.max_row | |
def xlsizerc(file_in, sheet_name): | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
return (sheet.max_row, sheet.max_column) | |
def xlsizea1(file_in, sheet_name): | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
sheet_max_column = get_column_letter(sheet.max_column) | |
sheet_max_row = sheet.max_row | |
return (sheet_max_row, sheet_max_column) | |
# This way we can easily convert between high column numbers ie AAC and actual numbers. | |
import openpyxl | |
from openpyxl.cell import get_column_letter, column_index_from_string | |
print(get_column_letter(1)) | |
print(get_column_letter(27)) | |
print(get_column_letter(277)) | |
# With a similar way we can get the letter of max column | |
wb = openpyxl.load_workbook('example.xlsx') | |
sheet = wb.get_sheet_by_name('Sheet1') | |
print(get_column_letter(sheet.max_column)) | |
# The reverse process ie from numbers to 'A1' is done like: | |
print(column_index_from_string('A')) | |
print(column_index_from_string('AA')) | |
# ############### | |
# SLICE WORKSHEET OBJECTS | |
import openpyxl | |
wb = openpyxl.load_workbook('example.xlsx') | |
sheet = wb.get_sheet_by_name('Sheet1') | |
# We slice the sheet by drawing a rectangle from A1-C3 | |
# We peform the slice by declaring we want the cell objects in a specific area | |
# into a "Generator" object, we can use tuple() if we want to visualize it. | |
# The tuple containts 3 tuples, one for each row, each element of the tuple is a cell. | |
print(tuple(sheet['A1':'C3'])) | |
def xlslicer(file_in, sheet_name, start_coords, end_coords): | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
return tuple(sheet[start_coords:end_coords]) | |
# We iterate over the rows of the slice | |
for rowobj in sheet['A1':'C3']: | |
for cellobj in rowobj: | |
print(cellobj.coordinate, cellobj.value) | |
print("Row end\n") | |
# Iterate using rows/columns attribute (iterate over rows) | |
print(sheet.columns[1]) | |
for cellobj in sheet.columns[1]: | |
print(cellobj.value) | |
print(sheet.rows[1]) | |
for colobj in sheet.rows[1]: | |
print(colobj.value) | |
# Just the ".rows" will put the entire spreadsheet in a tuple | |
# This is an example of how you can iterate over that tuple. | |
# Rows attribute - tuples | |
print("@@") | |
mytup = sheet.rows | |
print(mytup) | |
for in_row in mytup: | |
for in_cell in in_row: | |
print(in_cell.value,) | |
def xlrowcheckint(file_in, sheet_name, column_input): | |
have_it = [] | |
not_have_it = [] | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
for rowobj in sheet.columns[column_input]: | |
value = str(rowobj.value) | |
if not value.isdigit(): | |
print("Found a non digit") | |
not_have_it.append(rowobj.row) | |
else: | |
print("Digit") | |
have_it.append(rowobj.row) | |
return (have_it, not_have_it) | |
def xlrowcheckalpha(file_in, sheet_name, column_input): | |
have_it = [] | |
not_have_it = [] | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
for rowobj in sheet.columns[column_input]: | |
value = str(rowobj.value) | |
if not value.isalphabetic(): | |
print("Found a non digit") | |
not_have_it.append(rowobj.row) | |
else: | |
print("Digit") | |
have_it.append(rowobj.row) | |
return (have_it, not_have_it) | |
# ################################# | |
# Small recap | |
# | |
# So far we know: | |
# | |
# 1. how to import module | |
# 2. call the openpyxl.load_workbook() function | |
# 3. workbook | |
# 4. get_active_sheet() / get_sheet_by_name() | |
# 5. Worksheet object | |
# 6. cell() | |
# 7. Get a cell object | |
# 8. Read the Cell object's value attribute | |
# ####################################### | |
# READING DATA FROM A SPREADSHEET | |
# # #### | |
# # Reading data and assigning variables | |
# import sys | |
# import os | |
# import openpyxl | |
# import pprint | |
# file_in = sys.argv[1] | |
# wb = openpyxl.load_workbook(file_in) | |
# # | |
# sheet = wb.get_sheet_by_name('Population by Census Tract') | |
# # | |
# countyData = {} | |
# for row in range(2, sheet.max_row + 1, 1): | |
# state = sheet['B' + str(row)].value | |
# county = sheet['C' + str(row)].value | |
# pop = sheet['D' + str(row)].value | |
# print(state, county, pop) | |
# #### | |
# CREATING AND SAVING EXCEL DOCUMENTS | |
# the openpyxl.Workbook() functions creates a new blank workbook object. | |
import openpyxl | |
wb = openpyxl.Workbook() | |
wb.get_sheet_names() | |
sheet = wb.get_active_sheet() | |
sheet.title | |
sheet.title = 'Spam Bacon Eggs Sheet' | |
wb.get_sheet_names() | |
# Anytime the workbook is modified, the changes wont be saved unless | |
# the save() method is called. | |
import openpyxl | |
wb = openpyxl.load_workbook('example.xlsx') | |
sheet = wb.get_active_sheet() | |
sheet.title = 'Spam Spam Spam' | |
wb.save('example_copy.xlsx') | |
# # | |
# CREATING AND REMOVING SHEETS | |
# The create_sheet() method returns a new Worksheet | |
# object named SheetX, | |
# which by default is set to be the last sheet in the workbook. | |
import openpyxl | |
wb = openpyxl.Workbook() | |
wb.get_sheet_names() | |
wb.create_sheet() | |
wb.get_sheet_names() | |
wb.create_sheet(index=0, title='First Sheet') | |
wb.get_sheet_names() | |
# # | |
# REMOVING SHEETS | |
wb.get_sheet_names() | |
wb.remove_sheet(wb.get_sheet_by_name('Middle Sheet')) | |
wb.remove_sheet(wb.get_sheet_by_name('Sheet1')) | |
wb.get_sheet_names() | |
wb.save('example_copy_2.xlsx') | |
# ###################################################### | |
# WRITING VALUES TO CELLS | |
import openpyxl | |
wb = openpyxl.Workbook() | |
sheet = wb.get_sheet_by_name('Sheet') | |
sheet['A1'] = 'Hello world' | |
sheet['A1'].value | |
# ###################################################### | |
# Setting the font styles of cells | |
import openpyxl | |
from openpyxl.styles import Font, Style | |
wb = openpyxl.Workbook() | |
sheet = wb.get_sheet_by_name('Sheet') | |
italic24Font = Font(size=24, italic=True) | |
styleObj = Style(font=italic24Font) | |
sheet['A'].style = styleObj | |
sheet['A1'] = 'Hello world!' | |
wb.save('styled.xlsx') | |
# ####################################################### | |
# More advanced font styles | |
import openpyxl | |
from openpyxl.styles import Font, Style | |
wb = openpyxl.Workbook() | |
sheet = wb.get_sheet_by_name('Sheet') | |
fontObj1 = Font(name='Times New Roman', bold=True) | |
styleObj1 = Style(font=fontObj1) | |
sheet['A1'].style/styleObj | |
sheet['A1'] = 'Bold Times New Roman' | |
fontObj2 = Font(size=24, italic=True) | |
styleObj2 = Style(font=fontObj2) | |
sheet['B3'].style/styleObj | |
sheet['B3'] = '24 pt Italic' | |
wb.save('styles.xlsx') | |
# ######################################################## | |
# Programmatically add formulas to excel | |
>>> sheet['B9'] = '=SUM(B1:B8)' | |
>>> import openpyxl | |
>>> wb = openpyxl.Workbook() | |
>>> sheet = wb.get_active_sheet() | |
>>> sheet['A1'] = 200 | |
>>> sheet['A2'] = 300 | |
>>> sheet['A3'] = '=SUM(A1:A2)' | |
>>> wb.save('writeFormula.xlsx') | |
You can also read the formula in a cell just as you would any value. | |
However, if you want to see the result of the calculation for the formula | |
instead of the literal formula, you must pass True for the data_only keyword | |
argument to load_workbook(). This means a Workbook object can show either | |
the formulas or the result of the formulas but not both. (But you can have | |
multiple Workbook objects loaded for the same spreadsheet file.) Enter the | |
following into the interactive shell to see the difference between loading a | |
workbook with and without the data_only keyword argument: | |
>>> import openpyxl | |
>>> wbFormulas = openpyxl.load_workbook('writeFormula.xlsx') | |
>>> sheet = wbFormulas.get_active_sheet() | |
>>> sheet['A3'].value | |
'=SUM(A1:A2)' | |
>>> wbDataOnly = openpyxl.load_workbook('writeFormula.xlsx', data_only=True) | |
>>> sheet = wbDataOnly.get_active_sheet() | |
>>> sheet['A3'].value | |
500 | |
# ######################################################### | |
# Adjusting rows and columns | |
# Row height / column width | |
>>> import openpyxl | |
>>> wb = openpyxl.Workbook() | |
>>> sheet = wb.get_active_sheet() | |
>>> sheet['A1'] = 'Tall row' | |
>>> sheet['B2'] = 'Wide column' | |
>>> sheet.row_dimensions[1].height = 70 | |
>>> sheet.column_dimensions['B'].width = 20 | |
>>> wb.save('dimensions.xlsx') | |
# Merging / Unmerging cells | |
>>> import openpyxl | |
>>> wb = openpyxl.Workbook() | |
>>> sheet = wb.get_active_sheet() | |
>>> sheet.merge_cells('A1:D3') | |
>>> sheet['A1'] = 'Twelve cells merged together.' | |
>>> sheet.merge_cells('C5:D5') | |
>>> sheet['C5'] = 'Two merged cells.' | |
>>> wb.save('merged.xlsx') | |
# Unmerge | |
>>> import openpyxl | |
>>> wb = openpyxl.load_workbook('merged.xlsx') | |
>>> sheet = wb.get_active_sheet() | |
>>> sheet.unmerge_cells('A1:D3') | |
>>> sheet.unmerge_cells('C5:D5') | |
>>> wb.save('merged.xlsx') | |
# Freeze panes | |
# sheet.freeze_panes = 'A2' Row 1 | |
# sheet.freeze_panes = 'B1' Column A | |
# sheet.freeze_panes = 'C1' Columns A and B | |
# sheet.freeze_panes = 'C2' Row 1 and columns A and B | |
# sheet.freeze_panes = 'A1' or | |
# sheet.freeze_panes = None | |
>>> import openpyxl | |
>>> wb = openpyxl.load_workbook('produceSales.xlsx') | |
>>> sheet = wb.get_active_sheet() | |
>>> sheet.freeze_panes = 'A2' | |
>>> wb.save('freezeExample.xlsx') | |
print("#####################") | |
print (xlgetcelvalue(file_in, 'Sheet1', 'A1')) | |
print(xlgetrowcol(file_in, 'Sheet1', 'A1')) | |
print(xlgetrowcolnum(file_in, 'Sheet1', 1, 1)) | |
print (xlsheetnames(file_in)) | |
print(xliterrowintolol(file_in, 'Sheet1')) | |
print(xlsizecolumn(file_in, 'Sheet1')) | |
print(xlsizerow(file_in, 'Sheet1')) | |
print(xlsizerc(file_in, 'Sheet1')) | |
print(xlsizea1(file_in, 'Sheet1')) | |
print(xlslicer(file_in, 'Sheet1', 'A1', 'C3')) | |
for rowobj in xlslicer(file_in, 'Sheet1', 'A1', 'C3'): | |
for cellobj in rowobj: | |
print(cellobj.coordinate, cellobj.value, " ",end='') | |
print("\n") | |
xlrowcheckint(file_in, 'Sheet1', 2) | |
xlrowcheckint(file_in, 'Sheet1', 1) | |
print(xlrowcheckint(file_in, 'Sheet1', 2)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Theory: | |
# openpyxl works by handling workbook, and worksheet objects. | |
# Necessary module | |
import openpyxl | |
# DECLARE WORKBOOK OBJECT | |
# declaring workbook object - same as file object but for excel files | |
wb = openpyxl.load_workbook(file_in) | |
print(type(wb)) | |
# <class 'openpyxl.workbook.workbook.Workbook'> | |
# #### | |
# Getting specific sheets from the workbook | |
print("@@") | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
wb.get_sheet_names() | |
print(wb.get_sheet_names()) | |
# Function Form | |
def xlsheetnames(file_in): | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
return wb.get_sheet_names() | |
# #### | |
# Iterate over a workbook worksheets (Object) | |
print("@@") | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
for sheet_temp in openpyxl.load_workbook(file_in): | |
print(str(sheet_temp)) | |
# #### | |
# Iterate over a workbook worksheets (Name) | |
print("@@") | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
for sheet_temp in wb.get_sheet_names(): | |
print(str(sheet_temp)) | |
# #### | |
# Getting single specific sheet into variable by name | |
print("@@") | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
wb.get_sheet_names() | |
print(wb.get_sheet_names()) | |
# Declaring worksheet object | |
# Worksheet objects are used to control specific sheets. | |
sheet = wb.get_sheet_by_name('Sheet3') | |
print(sheet) | |
print(type(sheet)) | |
# #### | |
# Get the title of an active sheet | |
print("@@") | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
wb.get_sheet_names() | |
print(wb.get_sheet_names()) | |
# Declare worksheet object and assign sheet 3 to it | |
sheet = wb.get_sheet_by_name('Sheet3') | |
print(sheet) | |
print(type(sheet)) | |
print(sheet.title) | |
# #### | |
# Get the active sheet | |
print("@@") | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
print(wb.active) | |
def xlactivesheet(file_in): | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
return wb.active |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ############################################# | |
# REVIEW OF BASICS | |
print("@@") | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
print("All sheetnames: " + str(wb.sheetnames)) | |
print("Specific sheet: " + str(wb["Sheet2"])) | |
print("Iterating over sheets - simple: " ) | |
for sheet in wb: | |
print(sheet.title) | |
print("Active sheet is: " + str(wb.active.title)) | |
print("Active sheet object is: " + str(wb.active)) | |
# ################################################################# | |
# ################################################################# | |
# Cell attributes: | |
# value | |
# column | |
# row | |
# coordinate | |
# Cell methods: | |
# cell() | |
# ##### | |
# ACCESSING CELLS | |
print("@@") | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name('Sheet1') | |
print("Sheet object: " + str(sheet)) | |
cell_v = sheet['A1'] | |
print ("Cell object: " + str(cell_v)) | |
# ###### | |
# GETTING THE CELL VALUE | |
print("@@") | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name('Sheet1') | |
print("Sheet object: " + str(sheet)) | |
cell_v = sheet['A1'] | |
print ("Cell object: " + str(cell_v)) | |
cell_value = cell_v.value | |
print(sheet['A1'].value) | |
print ("Cell value: " + str(cell_value)) | |
def xlgetcelvalue(file_in, sheet_name, cell_name): | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
cell_v = sheet[cell_name] | |
cell_value = cell_v.value | |
return cell_value | |
# ####### | |
# GETTING ROW AND COLUMN FROM CELL OBJECT | |
print("@@") | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name('Sheet1') | |
print("Sheet object: " + str(sheet)) | |
cell_v = sheet['B1'] | |
print ("Cell object: " + str(cell_v)) | |
print ("Cell value: " + str(cell_v.value)) | |
cell_v_row = cell_v.row | |
cell_v_column = cell_v.column | |
print("Cell is at Row: " + str(cell_v_row) + " and column: " + str(cell_v_column)) | |
print ("Cell at " + str(cell_v.coordinate) + " has value: " + str(cell_v.value)) | |
def xlgetrowcol(file_in, sheet_name, cell_name): | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
cellm = sheet[cell_name] | |
return (cellm.row, cellm.column) | |
# ###### | |
# USING THE CELL() METHOD TO ACCESS CELLS | |
print("@@") | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name('Sheet1') | |
print(sheet.cell(row=1, column=2)) | |
print(sheet.cell(row=1, column=2).value) | |
def xlgetrowcolnum(file_in, sheet_name, row_in, column_in): | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
return sheet.cell(row=row_in, column=column_in).value | |
# ####### | |
# DETERMINE THE SIZE OF THE SHEET | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name('Sheet1') | |
print(sheet.max_row) | |
print(sheet.max_column) | |
def xlsizerow(file_in, sheet_name): | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
return sheet.max_row | |
def xlsizecolumn(file_in, sheet_name): | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
return sheet.max_row | |
def xlsizerc(file_in, sheet_name): | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
return (sheet.max_row, sheet.max_column) | |
def xlsizea1(file_in, sheet_name): | |
import openpyxl | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
sheet_max_column = get_column_letter(sheet.max_column) | |
sheet_max_row = sheet.max_row | |
return (sheet_max_row, sheet_max_column) | |
# This way we can easily convert between high column numbers ie AAC and actual numbers. | |
import openpyxl | |
from openpyxl.cell import get_column_letter, column_index_from_string | |
print(get_column_letter(1)) | |
print(get_column_letter(27)) | |
print(get_column_letter(277)) | |
# With a similar way we can get the letter of max column | |
wb = openpyxl.load_workbook('example.xlsx') | |
sheet = wb.get_sheet_by_name('Sheet1') | |
print(get_column_letter(sheet.max_column)) | |
# The reverse process ie from numbers to 'A1' is done like: | |
print(column_index_from_string('A')) | |
print(column_index_from_string('AA')) | |
# ###### | |
# ITERATE OVER EXCEL ROWS INTO LIST OF LISTS | |
# iterate over rows - need to know the last row "8" and specify increment | |
for i in range(1, 8, 1): | |
print(i, sheet.cell(row=i, column=2).value, sheet.cell(row=i, column=3).value) | |
def xliterrowintolol(file_in, sheet_name): | |
import openpyxl | |
output_list = [] | |
central_list = [] | |
wb = openpyxl.load_workbook(file_in) | |
sheet = wb.get_sheet_by_name(sheet_name) | |
# for i in range(start, end, increment): | |
# output_list.append(sheet.cell(row=i, column=1).value) | |
for i in sheet.rows: | |
for j in i: | |
print(i) | |
print(j.value) | |
output_list.append(j.value) | |
central_list.append(output_list) | |
output_list = [] | |
return central_list | |
# iterate over rows - using max_row attribute to find length of sheet | |
for i in range(1, sheet.max_row + 1, 1): | |
print(i, sheet.cell(row=i, column=2).value, sheet.cell(row=i, column=3).value) | |
# iterate over columns - using max column attribute to find length of sheet | |
for i in range(1, sheet.max_column + 1, 1): | |
print(i, sheet.cell(row=2, column=i).value, sheet.cell(row=3, column=i).value) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment