Last active
December 30, 2023 22:39
-
-
Save astockwell/6489489 to your computer and use it in GitHub Desktop.
PHP Serialization Fix. For use after a find/replace/sed on MySQL dump, to repair String lengths in serialized PHP objects. Optimized and refactored for memory-safe production use in Python (~7s for 250MB dump, ~400K replacements). Written also in Perl (~7s for 250MB), Ruby (~8s for 250MB) and Go (~20s for 250MB) for benchmark comparison. Python …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"fmt" | |
"io" | |
"os" | |
"os/exec" | |
"regexp" | |
"strconv" | |
"strings" | |
) | |
var counter int = 0 | |
func main() { | |
// setup regexp | |
re, err := regexp.Compile(`s:(\d+)(:\\?\")(.*?)(\\?\";)`) | |
if err != nil { | |
println(err) | |
} | |
defer exec.Command("mv", "db.sql~", "db.sql").Run() | |
defer exec.Command("rm", "db.sql").Run() | |
exec.Command("rm", "db.sql", "db.sql~").Run() | |
exec.Command("cp", "pf.sql", "db.sql").Run() | |
// open source file | |
s, err := os.Open("db.sql") | |
if err != nil { | |
panic(err) | |
} | |
// close fi on exit and check for its returned error | |
defer func() { | |
if err := s.Close(); err != nil { | |
panic(err) | |
} | |
}() | |
d, err := os.Create("db.sql~") | |
if err != nil { | |
println(err) | |
} | |
defer func() { | |
if err := d.Close(); err != nil { | |
panic(err) | |
} | |
}() | |
r := bufio.NewReaderSize(s, 4096) | |
// h := md5.New() | |
for { | |
// read a chunk | |
line, err := r.ReadString('\n') | |
if err != nil && err != io.EOF { | |
panic(err) | |
} | |
if line == "" { | |
fmt.Println(counter, "lines!", "done with this!") | |
break | |
} | |
// write a chunk | |
d.WriteString(re.ReplaceAllStringFunc(line, replace)) | |
counter = counter + 1 | |
} | |
} | |
func replace(matches string) string { | |
str := strings.Split(matches, ":") | |
str_len := len(str[2]) | |
if str[2][str_len-3:str_len] == "\\\";" { | |
return str[0] + ":" + strconv.Itoa(str_len-5) + ":" + str[2] | |
} else if str[2][str_len-2:str_len] == "\";" { | |
return str[0] + ":" + strconv.Itoa(str_len-3) + ":" + str[2] | |
} | |
// Something's wrong, return what we started with | |
return matches | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
my $src_filepath = "pf.sql"; | |
my $dest_filepath = "db.sql"; | |
sub fix_numbers | |
{ | |
# Get the subroutine's argument. | |
my ($orig_num) = $_[0]; | |
my ($string) = $_[2]; | |
my ($statement) = $_[1] . $_[2] . $_[3]; | |
# Get length | |
my $len = length $string; | |
if(defined($len)) | |
{ | |
# Got a replacement; return it. | |
return "s:" . $len . $statement; | |
} | |
# No replacement; return original text. | |
return "s:" . $orig_num . $statement; | |
} | |
sub main | |
{ | |
open(my $SRC, '<', $src_filepath) or die "Can't open $src_filepath: $!"; | |
open(my $DEST, '>', $dest_filepath ) or die "Can't open file $dest_filepath to write: $!"; | |
while (my $line = readline($SRC)) { | |
## ... process the line in here | |
$line =~ s/s:(\d+)(:\\?\")(.*?)(\\?\";)/fix_numbers($1, $2, $3, $4)/eig; | |
print {$DEST} $line; | |
} | |
close($SRC); | |
close($DEST); | |
} | |
main(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# *** TESTS *** | |
#returns properly formatted string | |
#doesnt lose any characters | |
#both escaped/nonescaped quotes work | |
#works with quotes (esc/nonesc) within string | |
#returns proper # | |
#correctly handles escape sequences | |
import os, re | |
# Regexp to match a PHP serialized string's signature | |
serialized_token = re.compile(r"s:(\d+)(:\\?\")(.*?)(\\?\";)") | |
# Raw PHP escape sequences | |
escape_sequences = (r'\n', r'\r', r'\t', r'\v', r'\"', r'\.') | |
# Return the serialized string with the corrected string length | |
def _fix_serialization_instance(matches): | |
target_str = matches.group(3) | |
ts_len = len(target_str) | |
# PHP Serialization counts escape sequences as 1 character, so subtract 1 for each escape sequence found | |
esc_seq_count = 0 | |
for sequence in escape_sequences: | |
esc_seq_count += target_str.count(sequence) | |
ts_len -= esc_seq_count | |
output = 's:{0}{1}{2}{3}'.format(ts_len, matches.group(2), target_str, matches.group(4)) | |
return output | |
# Accepts a file or a string | |
# Iterate over a file in memory-safe way to correct all instances of serialized strings (dumb replacement) | |
def fix_serialization(file): | |
try: | |
with open(file,'r') as s: | |
d = open(file + "~",'w') | |
for line in s: | |
line = re.sub(serialized_token, _fix_serialization_instance, line) | |
d.write(line) | |
d.close() | |
s.close() | |
os.remove(file) | |
os.rename(file+'~',file) | |
print "file serialized" | |
return True | |
except: | |
# Force python to see escape sequences as part of a raw string (NOTE: Python V3 uses `unicode-escape` instead) | |
raw_file = file.encode('string-escape') | |
# Simple input test to see if the user is trying to pass a string directly | |
if isinstance(file,str) and re.search(serialized_token, raw_file): | |
output = re.sub(serialized_token, _fix_serialization_instance, raw_file) | |
print output | |
print "string serialized" | |
return output | |
else: | |
print "Error Occurred: Not a valid input?" | |
exit() | |
# EXAMPLES | |
# fix_serialization('s:2:\"http://murphy.psstudi\r\nosdev.com/wp-content/uploads/2013/03/logo-2.jpg\";') | |
# fix_serialization('test.txt') | |
# fix_serialization('texxxxt.txt') | |
if __name__ == "__main__": | |
import sys | |
try: | |
fix_serialization(sys.argv[1]) | |
except: | |
print "No File specified, use `python serialize_fix.py [filename]`" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# | |
# Contributed by Mic Alexander, https://github.com/micalexander | |
# | |
Encoding.default_external = Encoding::UTF_8 | |
Encoding.default_internal = Encoding::UTF_8 | |
def fix_serialization file | |
Encoding.default_external = Encoding::UTF_8 | |
Encoding.default_internal = Encoding::UTF_8 | |
string = File.read file | |
fixed = fix_text string | |
open file, 'w' do |io| | |
io.write fixed | |
end | |
return file | |
end | |
# php escapes: | |
# "\\" #Backslash, '"' Double quotes, "\'" Single quotes, "\a" Bell/alert, | |
# "\b" Backspace, "\r" Carriage Return, "\n" New Line, "\s" Space, "\t" Tab | |
def fix_text string | |
pattern = /(s\s*:\s*)(\d+)((\s*:\\*["&])(.*?)(\\?\"\s*;))/ | |
php_escapes = /(\\"|\\'|\\\\|\\a|\\b|\\n|\\r|\\s|\\t|\\v)/ | |
string.gsub( pattern ) do |match| | |
head = $1 | |
tail = $3 | |
count = $5.bytesize - $5.scan(php_escapes).length | |
"#{head}#{count}#{tail}" | |
end | |
end | |
# test_1 = fix_serialization "s:12:\"robots\\.txt$\";" | |
# raise test_1 unless "s:12:\"robots\\.txt$\";" == test_1 | |
fix_serialization 'kd-2013-11-01-09-27-production.sql' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
a:5:{s:2:"width";i:1200;s:2:"height";i:650;s:1:"file";s:86:"2012/12/home-agave-renamed.jpg";s:5:"sizes";} | |
a:4:{s:1:"thumbnail";a:4:{s:2:"file";s:60:"home-agave-renamed-430x290.jpg";s:4:"width";i:430;s:6:"height";i:290;s:9:"mime-type";s:10:"image/jpeg";}s:6:"medium";a:4:{s:4:"file";s:30:"home-agave-renamed-460x249.jpg";s:5:"width";i:460;s:6:"height";i:249;s:9:"mime-type";s:10:"image/jpeg";}s:5:"large";a:4:{s:4:"file";s:30:"home-agave-renamed-900x487.jpg";s:5:"width";i:900;s:6:"height";i:487;s:9:"mime-type";s:10:"image/jpeg";}s:8:"detail-2";a:4:{s:4:"file";s:30:"home-agave-renamed-460x517.jpg";s:5:"width";i:460;s:6:"height";i:517;s:9:"mime-type";s:10:"image/jpeg";}}s:10:"image_meta";a:10:{s:8:"aperture";i:0;s:6:"credit";s:0:"";s:6:"camera";s:0:"";s:7:"caption";s:0:"";s:17:"created_timestamp";i:0;s:9:"copyright";s:0:"";s:12:"focal_length";i:0;s:3:"iso";i:0;s:13:"shutter_speed";i:0;s:5:"title";s:0:"";}} | |
(156,'ttrust_options','a:23:{s:11:\"ttrust_logo\"; | |
s:68:\"http://example.com/wp-content/uploads/2013/03/logo-2.jpg\"; | |
s:0:\"\"; | |
s:163:\"3140 W. Buckeye Road, Phoenix, AZ 85009 | (602) 353-5435\r\n<br /><br />\r\nSupport for this website was provided by a grant from the Robert Wood Johnson Foundation.\"; | |
s:1:\"0\"; | |
}','yes'),( | |
(157,'ttrust_options','a:23:{s:11:\"ttrust_logo\"; | |
s:71:\"http://example.com/wp-content/uploads/2013/03/logo-2.jpg\"; | |
s:0:\"\"; | |
s:167:\"3140 W. Buckeye Road, Phoenix, AZ 85009 | (602) 353-5435\r\n<br /><br />\r\nSupport for this website was provided by a grant from the Robert Wood Johnson Foundation.\"; | |
s:0:\"\"; | |
}','yes') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey the python script is not working