Skip to content

Instantly share code, notes, and snippets.

@onlyforbopi
Last active August 16, 2023 08:33
Show Gist options
  • Save onlyforbopi/81af3995362f3743066f418825291785 to your computer and use it in GitHub Desktop.
Save onlyforbopi/81af3995362f3743066f418825291785 to your computer and use it in GitHub Desktop.
Python.Excel.ExcelHandling #python #Python #Excel #excel #ExcelHandling #openpyxl #pandas #PythonModules #modules Features: Openpyxl pandas 1. Get sheets of workbook : workbooksheets.py (Openpyxl) 2. Iterate over wo
# ##########
# CONVERTING BETWEEN COLUMN LETTERS (IE A2) AND NUMBERS IE (1, 2)
# Two methods :
# get_column_letter
# column_index_from_string
def xlsizerow(file_in, sheet_name):
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
return sheet.max_row
def xlsizecolumn(file_in, sheet_name):
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
return sheet.max_row
def xlsizerc(file_in, sheet_name):
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
return (sheet.max_row, sheet.max_column)
def xlsizea1(file_in, sheet_name):
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
sheet_max_column = get_column_letter(sheet.max_column)
sheet_max_row = sheet.max_row
return (sheet_max_row, sheet_max_column)
# This way we can easily convert between high column numbers ie AAC and actual numbers.
import openpyxl
from openpyxl.cell import get_column_letter, column_index_from_string
print(get_column_letter(1))
print(get_column_letter(27))
print(get_column_letter(277))
# With a similar way we can get the letter of max column
wb = openpyxl.load_workbook('example.xlsx')
sheet = wb.get_sheet_by_name('Sheet1')
print(get_column_letter(sheet.max_column))
# The reverse process ie from numbers to 'A1' is done like:
print(column_index_from_string('A'))
print(column_index_from_string('AA'))
# ###############
# SLICE WORKSHEET OBJECTS
import openpyxl
wb = openpyxl.load_workbook('example.xlsx')
sheet = wb.get_sheet_by_name('Sheet1')
# We slice the sheet by drawing a rectangle from A1-C3
# We peform the slice by declaring we want the cell objects in a specific area
# into a "Generator" object, we can use tuple() if we want to visualize it.
# The tuple containts 3 tuples, one for each row, each element of the tuple is a cell.
print(tuple(sheet['A1':'C3']))
def xlslicer(file_in, sheet_name, start_coords, end_coords):
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
return tuple(sheet[start_coords:end_coords])
# We iterate over the rows of the slice
for rowobj in sheet['A1':'C3']:
for cellobj in rowobj:
print(cellobj.coordinate, cellobj.value)
print("Row end\n")
# Iterate using rows/columns attribute (iterate over rows)
print(sheet.columns[1])
for cellobj in sheet.columns[1]:
print(cellobj.value)
print(sheet.rows[1])
for colobj in sheet.rows[1]:
print(colobj.value)
# Just the ".rows" will put the entire spreadsheet in a tuple
# This is an example of how you can iterate over that tuple.
# Rows attribute - tuples
print("@@")
mytup = sheet.rows
print(mytup)
for in_row in mytup:
for in_cell in in_row:
print(in_cell.value,)
def xlrowcheckint(file_in, sheet_name, column_input):
have_it = []
not_have_it = []
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
for rowobj in sheet.columns[column_input]:
value = str(rowobj.value)
if not value.isdigit():
print("Found a non digit")
not_have_it.append(rowobj.row)
else:
print("Digit")
have_it.append(rowobj.row)
return (have_it, not_have_it)
def xlrowcheckalpha(file_in, sheet_name, column_input):
have_it = []
not_have_it = []
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
for rowobj in sheet.columns[column_input]:
value = str(rowobj.value)
if not value.isalphabetic():
print("Found a non digit")
not_have_it.append(rowobj.row)
else:
print("Digit")
have_it.append(rowobj.row)
return (have_it, not_have_it)
# #################################
# Small recap
#
# So far we know:
#
# 1. how to import module
# 2. call the openpyxl.load_workbook() function
# 3. workbook
# 4. get_active_sheet() / get_sheet_by_name()
# 5. Worksheet object
# 6. cell()
# 7. Get a cell object
# 8. Read the Cell object's value attribute
# #######################################
# READING DATA FROM A SPREADSHEET
# # ####
# # Reading data and assigning variables
# import sys
# import os
# import openpyxl
# import pprint
# file_in = sys.argv[1]
# wb = openpyxl.load_workbook(file_in)
# #
# sheet = wb.get_sheet_by_name('Population by Census Tract')
# #
# countyData = {}
# for row in range(2, sheet.max_row + 1, 1):
# state = sheet['B' + str(row)].value
# county = sheet['C' + str(row)].value
# pop = sheet['D' + str(row)].value
# print(state, county, pop)
# ####
# CREATING AND SAVING EXCEL DOCUMENTS
# the openpyxl.Workbook() functions creates a new blank workbook object.
import openpyxl
wb = openpyxl.Workbook()
wb.get_sheet_names()
sheet = wb.get_active_sheet()
sheet.title
sheet.title = 'Spam Bacon Eggs Sheet'
wb.get_sheet_names()
# Anytime the workbook is modified, the changes wont be saved unless
# the save() method is called.
import openpyxl
wb = openpyxl.load_workbook('example.xlsx')
sheet = wb.get_active_sheet()
sheet.title = 'Spam Spam Spam'
wb.save('example_copy.xlsx')
# #
# CREATING AND REMOVING SHEETS
# The create_sheet() method returns a new Worksheet
# object named SheetX,
# which by default is set to be the last sheet in the workbook.
import openpyxl
wb = openpyxl.Workbook()
wb.get_sheet_names()
wb.create_sheet()
wb.get_sheet_names()
wb.create_sheet(index=0, title='First Sheet')
wb.get_sheet_names()
# #
# REMOVING SHEETS
wb.get_sheet_names()
wb.remove_sheet(wb.get_sheet_by_name('Middle Sheet'))
wb.remove_sheet(wb.get_sheet_by_name('Sheet1'))
wb.get_sheet_names()
wb.save('example_copy_2.xlsx')
# ######################################################
# WRITING VALUES TO CELLS
import openpyxl
wb = openpyxl.Workbook()
sheet = wb.get_sheet_by_name('Sheet')
sheet['A1'] = 'Hello world'
sheet['A1'].value
# ######################################################
# Setting the font styles of cells
import openpyxl
from openpyxl.styles import Font, Style
wb = openpyxl.Workbook()
sheet = wb.get_sheet_by_name('Sheet')
italic24Font = Font(size=24, italic=True)
styleObj = Style(font=italic24Font)
sheet['A'].style = styleObj
sheet['A1'] = 'Hello world!'
wb.save('styled.xlsx')
# #######################################################
# More advanced font styles
import openpyxl
from openpyxl.styles import Font, Style
wb = openpyxl.Workbook()
sheet = wb.get_sheet_by_name('Sheet')
fontObj1 = Font(name='Times New Roman', bold=True)
styleObj1 = Style(font=fontObj1)
sheet['A1'].style/styleObj
sheet['A1'] = 'Bold Times New Roman'
fontObj2 = Font(size=24, italic=True)
styleObj2 = Style(font=fontObj2)
sheet['B3'].style/styleObj
sheet['B3'] = '24 pt Italic'
wb.save('styles.xlsx')
# ########################################################
# Programmatically add formulas to excel
>>> sheet['B9'] = '=SUM(B1:B8)'
>>> import openpyxl
>>> wb = openpyxl.Workbook()
>>> sheet = wb.get_active_sheet()
>>> sheet['A1'] = 200
>>> sheet['A2'] = 300
>>> sheet['A3'] = '=SUM(A1:A2)'
>>> wb.save('writeFormula.xlsx')
You can also read the formula in a cell just as you would any value.
However, if you want to see the result of the calculation for the formula
instead of the literal formula, you must pass True for the data_only keyword
argument to load_workbook(). This means a Workbook object can show either
the formulas or the result of the formulas but not both. (But you can have
multiple Workbook objects loaded for the same spreadsheet file.) Enter the
following into the interactive shell to see the difference between loading a
workbook with and without the data_only keyword argument:
>>> import openpyxl
>>> wbFormulas = openpyxl.load_workbook('writeFormula.xlsx')
>>> sheet = wbFormulas.get_active_sheet()
>>> sheet['A3'].value
'=SUM(A1:A2)'
>>> wbDataOnly = openpyxl.load_workbook('writeFormula.xlsx', data_only=True)
>>> sheet = wbDataOnly.get_active_sheet()
>>> sheet['A3'].value
500
# #########################################################
# Adjusting rows and columns
# Row height / column width
>>> import openpyxl
>>> wb = openpyxl.Workbook()
>>> sheet = wb.get_active_sheet()
>>> sheet['A1'] = 'Tall row'
>>> sheet['B2'] = 'Wide column'
>>> sheet.row_dimensions[1].height = 70
>>> sheet.column_dimensions['B'].width = 20
>>> wb.save('dimensions.xlsx')
# Merging / Unmerging cells
>>> import openpyxl
>>> wb = openpyxl.Workbook()
>>> sheet = wb.get_active_sheet()
>>> sheet.merge_cells('A1:D3')
>>> sheet['A1'] = 'Twelve cells merged together.'
>>> sheet.merge_cells('C5:D5')
>>> sheet['C5'] = 'Two merged cells.'
>>> wb.save('merged.xlsx')
# Unmerge
>>> import openpyxl
>>> wb = openpyxl.load_workbook('merged.xlsx')
>>> sheet = wb.get_active_sheet()
>>> sheet.unmerge_cells('A1:D3')
>>> sheet.unmerge_cells('C5:D5')
>>> wb.save('merged.xlsx')
# Freeze panes
# sheet.freeze_panes = 'A2' Row 1
# sheet.freeze_panes = 'B1' Column A
# sheet.freeze_panes = 'C1' Columns A and B
# sheet.freeze_panes = 'C2' Row 1 and columns A and B
# sheet.freeze_panes = 'A1' or
# sheet.freeze_panes = None
>>> import openpyxl
>>> wb = openpyxl.load_workbook('produceSales.xlsx')
>>> sheet = wb.get_active_sheet()
>>> sheet.freeze_panes = 'A2'
>>> wb.save('freezeExample.xlsx')
print("#####################")
print (xlgetcelvalue(file_in, 'Sheet1', 'A1'))
print(xlgetrowcol(file_in, 'Sheet1', 'A1'))
print(xlgetrowcolnum(file_in, 'Sheet1', 1, 1))
print (xlsheetnames(file_in))
print(xliterrowintolol(file_in, 'Sheet1'))
print(xlsizecolumn(file_in, 'Sheet1'))
print(xlsizerow(file_in, 'Sheet1'))
print(xlsizerc(file_in, 'Sheet1'))
print(xlsizea1(file_in, 'Sheet1'))
print(xlslicer(file_in, 'Sheet1', 'A1', 'C3'))
for rowobj in xlslicer(file_in, 'Sheet1', 'A1', 'C3'):
for cellobj in rowobj:
print(cellobj.coordinate, cellobj.value, " ",end='')
print("\n")
xlrowcheckint(file_in, 'Sheet1', 2)
xlrowcheckint(file_in, 'Sheet1', 1)
print(xlrowcheckint(file_in, 'Sheet1', 2))
# Theory:
# openpyxl works by handling workbook, and worksheet objects.
# Necessary module
import openpyxl
# DECLARE WORKBOOK OBJECT
# declaring workbook object - same as file object but for excel files
wb = openpyxl.load_workbook(file_in)
print(type(wb))
# <class 'openpyxl.workbook.workbook.Workbook'>
# ####
# Getting specific sheets from the workbook
print("@@")
import openpyxl
wb = openpyxl.load_workbook(file_in)
wb.get_sheet_names()
print(wb.get_sheet_names())
# Function Form
def xlsheetnames(file_in):
import openpyxl
wb = openpyxl.load_workbook(file_in)
return wb.get_sheet_names()
# ####
# Iterate over a workbook worksheets (Object)
print("@@")
import openpyxl
wb = openpyxl.load_workbook(file_in)
for sheet_temp in openpyxl.load_workbook(file_in):
print(str(sheet_temp))
# ####
# Iterate over a workbook worksheets (Name)
print("@@")
import openpyxl
wb = openpyxl.load_workbook(file_in)
for sheet_temp in wb.get_sheet_names():
print(str(sheet_temp))
# ####
# Getting single specific sheet into variable by name
print("@@")
import openpyxl
wb = openpyxl.load_workbook(file_in)
wb.get_sheet_names()
print(wb.get_sheet_names())
# Declaring worksheet object
# Worksheet objects are used to control specific sheets.
sheet = wb.get_sheet_by_name('Sheet3')
print(sheet)
print(type(sheet))
# ####
# Get the title of an active sheet
print("@@")
import openpyxl
wb = openpyxl.load_workbook(file_in)
wb.get_sheet_names()
print(wb.get_sheet_names())
# Declare worksheet object and assign sheet 3 to it
sheet = wb.get_sheet_by_name('Sheet3')
print(sheet)
print(type(sheet))
print(sheet.title)
# ####
# Get the active sheet
print("@@")
import openpyxl
wb = openpyxl.load_workbook(file_in)
print(wb.active)
def xlactivesheet(file_in):
import openpyxl
wb = openpyxl.load_workbook(file_in)
return wb.active
# #############################################
# REVIEW OF BASICS
print("@@")
import openpyxl
wb = openpyxl.load_workbook(file_in)
print("All sheetnames: " + str(wb.sheetnames))
print("Specific sheet: " + str(wb["Sheet2"]))
print("Iterating over sheets - simple: " )
for sheet in wb:
print(sheet.title)
print("Active sheet is: " + str(wb.active.title))
print("Active sheet object is: " + str(wb.active))
# #################################################################
# #################################################################
# Cell attributes:
# value
# column
# row
# coordinate
# Cell methods:
# cell()
# #####
# ACCESSING CELLS
print("@@")
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name('Sheet1')
print("Sheet object: " + str(sheet))
cell_v = sheet['A1']
print ("Cell object: " + str(cell_v))
# ######
# GETTING THE CELL VALUE
print("@@")
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name('Sheet1')
print("Sheet object: " + str(sheet))
cell_v = sheet['A1']
print ("Cell object: " + str(cell_v))
cell_value = cell_v.value
print(sheet['A1'].value)
print ("Cell value: " + str(cell_value))
def xlgetcelvalue(file_in, sheet_name, cell_name):
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
cell_v = sheet[cell_name]
cell_value = cell_v.value
return cell_value
# #######
# GETTING ROW AND COLUMN FROM CELL OBJECT
print("@@")
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name('Sheet1')
print("Sheet object: " + str(sheet))
cell_v = sheet['B1']
print ("Cell object: " + str(cell_v))
print ("Cell value: " + str(cell_v.value))
cell_v_row = cell_v.row
cell_v_column = cell_v.column
print("Cell is at Row: " + str(cell_v_row) + " and column: " + str(cell_v_column))
print ("Cell at " + str(cell_v.coordinate) + " has value: " + str(cell_v.value))
def xlgetrowcol(file_in, sheet_name, cell_name):
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
cellm = sheet[cell_name]
return (cellm.row, cellm.column)
# ######
# USING THE CELL() METHOD TO ACCESS CELLS
print("@@")
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name('Sheet1')
print(sheet.cell(row=1, column=2))
print(sheet.cell(row=1, column=2).value)
def xlgetrowcolnum(file_in, sheet_name, row_in, column_in):
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
return sheet.cell(row=row_in, column=column_in).value
# #######
# DETERMINE THE SIZE OF THE SHEET
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name('Sheet1')
print(sheet.max_row)
print(sheet.max_column)
def xlsizerow(file_in, sheet_name):
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
return sheet.max_row
def xlsizecolumn(file_in, sheet_name):
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
return sheet.max_row
def xlsizerc(file_in, sheet_name):
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
return (sheet.max_row, sheet.max_column)
def xlsizea1(file_in, sheet_name):
import openpyxl
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
sheet_max_column = get_column_letter(sheet.max_column)
sheet_max_row = sheet.max_row
return (sheet_max_row, sheet_max_column)
# This way we can easily convert between high column numbers ie AAC and actual numbers.
import openpyxl
from openpyxl.cell import get_column_letter, column_index_from_string
print(get_column_letter(1))
print(get_column_letter(27))
print(get_column_letter(277))
# With a similar way we can get the letter of max column
wb = openpyxl.load_workbook('example.xlsx')
sheet = wb.get_sheet_by_name('Sheet1')
print(get_column_letter(sheet.max_column))
# The reverse process ie from numbers to 'A1' is done like:
print(column_index_from_string('A'))
print(column_index_from_string('AA'))
# ######
# ITERATE OVER EXCEL ROWS INTO LIST OF LISTS
# iterate over rows - need to know the last row "8" and specify increment
for i in range(1, 8, 1):
print(i, sheet.cell(row=i, column=2).value, sheet.cell(row=i, column=3).value)
def xliterrowintolol(file_in, sheet_name):
import openpyxl
output_list = []
central_list = []
wb = openpyxl.load_workbook(file_in)
sheet = wb.get_sheet_by_name(sheet_name)
# for i in range(start, end, increment):
# output_list.append(sheet.cell(row=i, column=1).value)
for i in sheet.rows:
for j in i:
print(i)
print(j.value)
output_list.append(j.value)
central_list.append(output_list)
output_list = []
return central_list
# iterate over rows - using max_row attribute to find length of sheet
for i in range(1, sheet.max_row + 1, 1):
print(i, sheet.cell(row=i, column=2).value, sheet.cell(row=i, column=3).value)
# iterate over columns - using max column attribute to find length of sheet
for i in range(1, sheet.max_column + 1, 1):
print(i, sheet.cell(row=2, column=i).value, sheet.cell(row=3, column=i).value)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment