Skip to content

Instantly share code, notes, and snippets.

@jul
Last active June 21, 2018 12:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jul/91c12821af62693c8671d111eb165dc0 to your computer and use it in GitHub Desktop.
Save jul/91c12821af62693c8671d111eb165dc0 to your computer and use it in GitHub Desktop.
seek is sick ?
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
from sys import version
import platform
import locale
print("platfrom is %s" % platform.platform())
print("default locale is "+repr( locale.getdefaultlocale()))
print("locale is "+repr( locale.getlocale()))
print("python version " + version)
f= open("bug.txt", "w", encoding="utf8")
content = '''0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
'''
utf2seek = lambda s:len(bytearray(s, encoding="utf8"))
to5 = utf2seek("01234")
period = utf2seek("0123456789")
sizeofcrlf = utf2seek('''
''')
print("period is %d" % period)
print("offset to 5 is %d" % to5)
print("sizeof return is ... %s " % sizeofcrlf)
print("let us try to use fseek to replace ALL 5 with X")
print()
f.write(content)
f.close()
f= open("bug.txt", "r", encoding="utf8")
print("""
original content
================
""")
print(f.read())
f.close()
f = open("bug.txt", "rb+")
mire = bytearray("X", encoding="utf8")
f.seek(0,0)
###first line should be ok all 5 are replaced by X
for i in range(0,9):
f.seek(to5+i*period)
f.write(mire)
sizeofline = utf2seek("""0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789""")
### second line is okay
magic = 1
for i in range(0,9):
f.seek(to5+i*period+sizeofline+sizeofcrlf+magic)
f.write(mire)
cosl = correct_offset_since_line2 = 20*period + 2 * sizeofcrlf + 2* magic
### 4th line should have another offset of 1 since no magic is there
for j in range (2,10):
for i in range(0,9):
f.seek(cosl + to5+i*period+(j-2)*sizeofline+(j-2)*sizeofcrlf)
f.write(mire)
f.close()
f= open("bug.txt", "r", encoding="utf8")
print("""
final content where 5 should be replaced by X correctly only on 1st->3rd line and then 1 offset per CRLF
========================================================================================================
""")
print(f.read())
f.close()
print("conclusion the seek problem happens if and only if there is a CRLF on windows for me with default settings")
print()
print("""what happens if we force eof to '\n'?""")
f= open("bug2.txt", "w", encoding="utf8", newline='\n')
content = '''0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
'''
utf2seek = lambda s:len(bytearray(s, encoding="utf8"))
to5 = utf2seek("01234")
period = utf2seek("0123456789")
sizeofcrlf = utf2seek('''
''')
print("period is %d" % period)
print("offset to 5 is %d" % to5)
print("sizeof return is ... %s " % sizeofcrlf)
print("let us try to use fseek to replace ALL 5 with X")
print()
f.write(content)
f.close()
f= open("bug2.txt", "r", encoding="utf8", newline='\n')
print("""
original content
================
""")
print(f.read())
f.close()
f = open("bug2.txt", "rb+")
mire = bytearray("X", encoding="utf8")
f.seek(0,0)
###first line should be ok all 5 are replaced by X
for i in range(0,9):
f.seek(to5+i*period)
f.write(mire)
sizeofline = utf2seek("""0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789""")
### second line is okay
magic = 1
for i in range(0,9):
f.seek(to5+i*period+sizeofline+sizeofcrlf+magic)
f.write(mire)
cosl = correct_offset_since_line2 = 20*period + 2 * sizeofcrlf
### 4th line should have another offset of 1 since no magic is there
for j in range (2,10):
for i in range(0,9):
f.seek(cosl + to5+i*period+(j-2)*sizeofline+(j-2)*sizeofcrlf)
f.write(mire)
f.close()
f= open("bug2.txt", "r", encoding="utf8", )
print("""
final content where 5 should be replaced by X correctly only on 1st->3rd line and then 1 offset per CRLF
========================================================================================================
""")
print(f.read())
f.close()
print("conclusion the seek problem happens if and only if there is a CRLF on windows for me with default settings")
f.close()
print("EXPECTED RESULTS")
print('''platfrom is Windows-8.1-6.3.9600-SP0
default locale is ('en_US', 'cp1252')
locale is (None, None)
python version 3.6.5rc1 (v3.6.5rc1:f03c5148cf, Mar 14 2018, 02:23:56) [MSC v.1913 32 bit (Intel)]
period is 10
offset to 5 is 5
sizeof return is ... 1
let us try to use fseek to replace ALL 5 with X
original content
================
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
final content where 5 should be replaced by X correctly only on 1st->3rd line and then 1 offset per CRLF
========================================================================================================
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789
0123X567890123X567890123X567890123X567890123X567890123X567890123X567890123X567890123X567890123456789
012X456789012X456789012X456789012X456789012X456789012X456789012X456789012X456789012X4567890123456789
01X345678901X345678901X345678901X345678901X345678901X345678901X345678901X345678901X34567890123456789
0X234567890X234567890X234567890X234567890X234567890X234567890X234567890X234567890X234567890123456789
X123456789X123456789X123456789X123456789X123456789X123456789X123456789X123456789X1234567890123456789
X012345678X012345678X012345678X012345678X012345678X012345678X012345678X012345678X01234567890123456789X
01234567X901234567X901234567X901234567X901234567X901234567X901234567X901234567X901234567890123456789
conclusion the seek problem happens if and only if there is a CRLF on windows for me with default settings
what happens if we force eof to '
'?
period is 10
offset to 5 is 5
sizeof return is ... 1
let us try to use fseek to replace ALL 5 with X
original content
================
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
final content where 5 should be replaced by X correctly only on 1st->3rd line and then 1 offset per CRLF
========================================================================================================
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789
012345X789012345X789012345X789012345X789012345X789012345X789012345X789012345X789012345X7890123456789
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789
''')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment