Skip to content

Instantly share code, notes, and snippets.

@suya55
Created April 16, 2020 13:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save suya55/ba1bb681d0147e620f4dc278b40307b9 to your computer and use it in GitHub Desktop.
Save suya55/ba1bb681d0147e620f4dc278b40307b9 to your computer and use it in GitHub Desktop.
mysql row size checker
#!/bin/bash
#
# usage: mysqldump --no-data | check_row_size.sh
#
#
#
# https://dev.mysql.com/doc/refman/8.0/en/column-count-limit.html#row-size-limits
#
# The maximum row size for an InnoDB table, which applies to data stored locally within a database page, is slightly less than half a page for 4KB, 8KB, 16KB, and 32KB innodb_page_size settings.
# For example, the maximum row size is slightly less than 8KB for the default 16KB InnoDB page size.
#
#
# MariaDB [(none)]> show variables like 'innodb_page_size';
#+------------------+-------+
#| Variable_name | Value |
#+------------------+-------+
#| innodb_page_size | 16384 |
#+------------------+-------+
#1 row in set (0.00 sec)
#
#
# Options:
# 1. Change default innodb_page_size to 32k
# 2. Change storage engine to DYNAMIC for tables
# 3. ?
#
#===========================================================================================
# Functions
#===========================================================================================
RETVAL=0
calc_row_size() {
local -n TABLE_FIELDS=$1
local -n TABLE_CHARSET=$2
local FIELD_TYPE=""
local FIELD_SIZE=""
local FIELD=""
local ROW_SIZE=0
local IFS=$'|' # To split the vars using set
for FIELD in "${TABLE_FIELDS[@]}"
do
set $FIELD
FIELD_NAME=$1
FIELD_TYPE=$2
FIELD_SIZE=$3
calc_field_size_in_bytes $FIELD_TYPE $FIELD_SIZE $TABLE_CHARSET
ROW_SIZE=$((ROW_SIZE + RETVAL))
[ $DEBUG -gt 0 ] && echo "DEBUG1: Field name: $FIELD_NAME type: $FIELD_TYPE lenght: $FIELD_SIZE size: $RETVAL bytes Row size: $ROW_SIZE"
done
RETVAL=$ROW_SIZE
}
calc_field_size_in_bytes() {
local TYPE=$1
local SIZE=$2
local CHARSET=$3
case $FIELD_TYPE in
varchar)
# https://adayinthelifeof.nl/2010/12/04/about-using-utf-8-fields-in-mysql/
# Max 3 bytes per utf-8 chat in mysql
case $CHARSET in
utf8)
RETVAL=$((SIZE * 3)) # 3 bytes per character for utf8
;;
latin1)
RETVAL=$((SIZE)) # 1 byte per character for latin1
;;
*)
echo "Unknown charset ($CHARSET), please fix the script"
exit 1
;;
esac
;;
smallint|int|bigint|tinyint|varbinary)
RETVAL=$SIZE
;;
blob)
# https://dev.mysql.com/doc/refman/8.0/en/column-count-limit.html#row-size-limits
# BLOB and TEXT columns only contribute 9 to 12 bytes toward the row size limit because their contents are stored separately from the rest of the row.
RETVAL=9
;;
text)
RETVAL=12
;;
timestamp)
RETVAL=4
;;
decimal)
# https://dev.mysql.com/doc/refman/8.0/en/storage-requirements.html#data-types-storage-reqs-numeric
# Each multiple of nine digits requires four bytes, and the “leftover” digits require some fraction of four bytes.
if [[ $SIZE =~ ([0-9]+),([0-9]+) ]]
then
INTEGER_PART=${BASH_REMATCH[1]}
FRACTIONAL_PART=${BASH_REMATCH[2]}
INTEGER_BYTES=$((INTEGER_PART / 9 * 4))
REMAINDER=$((INTEGER_PART % 9))
case $REMAINDER in
0) INTEGER_BYTES=$((INTEGER_BYTES + 0)); ;;
1) INTEGER_BYTES=$((INTEGER_BYTES + 1)); ;;
2) INTEGER_BYTES=$((INTEGER_BYTES + 1)); ;;
3) INTEGER_BYTES=$((INTEGER_BYTES + 2)); ;;
4) INTEGER_BYTES=$((INTEGER_BYTES + 2)); ;;
5) INTEGER_BYTES=$((INTEGER_BYTES + 3)); ;;
6) INTEGER_BYTES=$((INTEGER_BYTES + 3)); ;;
7) INTEGER_BYTES=$((INTEGER_BYTES + 4)); ;;
8) INTEGER_BYTES=$((INTEGER_BYTES + 4)); ;;
esac
FRACTIONAL_BYTES=$((FRACTIONAL_PART / 9 * 4))
REMAINDER=$((FRACTIONAL_PART % 9))
case $REMAINDER in
0) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 0)); ;;
1) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 1)); ;;
2) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 1)); ;;
3) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 2)); ;;
4) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 2)); ;;
5) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 3)); ;;
6) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 3)); ;;
7) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 4)); ;;
8) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 4)); ;;
esac
[ $DEBUG -gt 0 ] && echo "DEBUG1: Calulation of decimal: SIZE: $SIZE INTEGER_PART:$INTEGER_PART FRACTIONAL_PART:$FRACTIONAL_PART TOTAL = INTEGER_BYTES($INTEGER_BYTES) + FRACTIONAL_BYTES($FRACTIONAL_BYTES)"
RETVAL=$((INTEGER_BYTES + FRACTIONAL_BYTES))
else
echo "Seems like SIZE ($SIZE) for a decimal field doesn't match pattern ([0-9]+),([0-9]+). Please investigate"
exit 1
fi
;;
*)
echo "Found a field type that is not handled: $TYPE. Please fix before proceeding."
exit 1
;;
esac
}
#===========================================================================================
# INIT
#===========================================================================================
INSIDE_CREATE_TABLE_STATEMENT=false # True if we are within a create table statement
TABLE_NAME='' # Current table name
ROW_SIZE=0 # Current row size being calculated
DEBUG=0
VERBOSE=0
MAX_SIZE=8126 # Default
declare -a FIELDS # List of fields from the current CREATE TABLE statement
#===========================================================================================
# Parameters
#===========================================================================================
OPTIND=1 # Reset in case getopts has been used previously in the shell.
while getopts "hvdt:" opt; do
case "$opt" in
h)
echo "Usage: mysqldump --no-data | ./check_row_size [-v|-d] [-t threshold]"
exit 0
;;
v) VERBOSE=1
;;
d) DEBUG=2
;;
t) MAX_SIZE=$OPTARG
;;
esac
done
#===========================================================================================
# MAIN Loop - parses schema then calc row_size based on charset
#===========================================================================================
while IFS= read -r LINE
do
[ $DEBUG -gt 1 ] && echo "DEBUG2: Read: $LINE"
# Are we within a CREATE TABLE statement?
if [ $INSIDE_CREATE_TABLE_STATEMENT == "false" ]
then
# Nope, is the current line a 'CREATE TABLE' statement?
if [[ $LINE =~ ^"CREATE TABLE \`"([^\`]+) ]]
then
[ $DEBUG -gt 0 ] && echo "CREATE TABLE FOUND!: $TABLE_NAME"
TABLE_NAME=${BASH_REMATCH[1]} # What has been caught between pattern parenthesis
INSIDE_CREATE_TABLE_STATEMENT='true'
FIELDS=()
fi
continue # Ok, next line
fi
# Is this a create table field definition line?
if [[ $LINE =~ ^' '+'`'([^'`']+)'` '([a-z]+)'('([^')']+) ]]
then
FIELD_NAME=${BASH_REMATCH[1]}
FIELD_TYPE=${BASH_REMATCH[2]}
FIELD_SIZE=${BASH_REMATCH[3]}
FIELDS+=( "$FIELD_NAME|$FIELD_TYPE|$FIELD_SIZE" )
continue
fi
# Have we reached the end of the CREATE TABLE statement?
if [[ $LINE =~ ^") ENGINE=InnoDB DEFAULT CHARSET="([^ ]+) ]]
then
CHARSET=${BASH_REMATCH[1]}
[ $DEBUG -gt 0 ] && echo "End of CREATE TABLE statement"
calc_row_size FIELDS CHARSET
ROW_SIZE=$RETVAL
if [ $ROW_SIZE -gt $MAX_SIZE ]
then
echo "Table: $TABLE_NAME has a row size: $ROW_SIZE Bytes > $MAX_SIZE Bytes Charset: $CHARSET"
# and is going to cause problem if the we upgrade to tables in ROW_FORMAT compact. See https://mariadb.com/kb/en/library/troubleshooting-row-size-too-large-errors-with-innodb/ for more details."
fi
INSIDE_CREATE_TABLE_STATEMENT='false'
fi
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment