Last active
June 21, 2018 12:38
-
-
Save jul/91c12821af62693c8671d111eb165dc0 to your computer and use it in GitHub Desktop.
seek is sick ?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# -*- coding: utf-8 -*- | |
from sys import version | |
import platform | |
import locale | |
print("platfrom is %s" % platform.platform()) | |
print("default locale is "+repr( locale.getdefaultlocale())) | |
print("locale is "+repr( locale.getlocale())) | |
print("python version " + version) | |
f= open("bug.txt", "w", encoding="utf8") | |
content = '''0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
''' | |
utf2seek = lambda s:len(bytearray(s, encoding="utf8")) | |
to5 = utf2seek("01234") | |
period = utf2seek("0123456789") | |
sizeofcrlf = utf2seek(''' | |
''') | |
print("period is %d" % period) | |
print("offset to 5 is %d" % to5) | |
print("sizeof return is ... %s " % sizeofcrlf) | |
print("let us try to use fseek to replace ALL 5 with X") | |
print() | |
f.write(content) | |
f.close() | |
f= open("bug.txt", "r", encoding="utf8") | |
print(""" | |
original content | |
================ | |
""") | |
print(f.read()) | |
f.close() | |
f = open("bug.txt", "rb+") | |
mire = bytearray("X", encoding="utf8") | |
f.seek(0,0) | |
###first line should be ok all 5 are replaced by X | |
for i in range(0,9): | |
f.seek(to5+i*period) | |
f.write(mire) | |
sizeofline = utf2seek("""0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789""") | |
### second line is okay | |
magic = 1 | |
for i in range(0,9): | |
f.seek(to5+i*period+sizeofline+sizeofcrlf+magic) | |
f.write(mire) | |
cosl = correct_offset_since_line2 = 20*period + 2 * sizeofcrlf + 2* magic | |
### 4th line should have another offset of 1 since no magic is there | |
for j in range (2,10): | |
for i in range(0,9): | |
f.seek(cosl + to5+i*period+(j-2)*sizeofline+(j-2)*sizeofcrlf) | |
f.write(mire) | |
f.close() | |
f= open("bug.txt", "r", encoding="utf8") | |
print(""" | |
final content where 5 should be replaced by X correctly only on 1st->3rd line and then 1 offset per CRLF | |
======================================================================================================== | |
""") | |
print(f.read()) | |
f.close() | |
print("conclusion the seek problem happens if and only if there is a CRLF on windows for me with default settings") | |
print() | |
print("""what happens if we force eof to '\n'?""") | |
f= open("bug2.txt", "w", encoding="utf8", newline='\n') | |
content = '''0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
''' | |
utf2seek = lambda s:len(bytearray(s, encoding="utf8")) | |
to5 = utf2seek("01234") | |
period = utf2seek("0123456789") | |
sizeofcrlf = utf2seek(''' | |
''') | |
print("period is %d" % period) | |
print("offset to 5 is %d" % to5) | |
print("sizeof return is ... %s " % sizeofcrlf) | |
print("let us try to use fseek to replace ALL 5 with X") | |
print() | |
f.write(content) | |
f.close() | |
f= open("bug2.txt", "r", encoding="utf8", newline='\n') | |
print(""" | |
original content | |
================ | |
""") | |
print(f.read()) | |
f.close() | |
f = open("bug2.txt", "rb+") | |
mire = bytearray("X", encoding="utf8") | |
f.seek(0,0) | |
###first line should be ok all 5 are replaced by X | |
for i in range(0,9): | |
f.seek(to5+i*period) | |
f.write(mire) | |
sizeofline = utf2seek("""0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789""") | |
### second line is okay | |
magic = 1 | |
for i in range(0,9): | |
f.seek(to5+i*period+sizeofline+sizeofcrlf+magic) | |
f.write(mire) | |
cosl = correct_offset_since_line2 = 20*period + 2 * sizeofcrlf | |
### 4th line should have another offset of 1 since no magic is there | |
for j in range (2,10): | |
for i in range(0,9): | |
f.seek(cosl + to5+i*period+(j-2)*sizeofline+(j-2)*sizeofcrlf) | |
f.write(mire) | |
f.close() | |
f= open("bug2.txt", "r", encoding="utf8", ) | |
print(""" | |
final content where 5 should be replaced by X correctly only on 1st->3rd line and then 1 offset per CRLF | |
======================================================================================================== | |
""") | |
print(f.read()) | |
f.close() | |
print("conclusion the seek problem happens if and only if there is a CRLF on windows for me with default settings") | |
f.close() | |
print("EXPECTED RESULTS") | |
print('''platfrom is Windows-8.1-6.3.9600-SP0 | |
default locale is ('en_US', 'cp1252') | |
locale is (None, None) | |
python version 3.6.5rc1 (v3.6.5rc1:f03c5148cf, Mar 14 2018, 02:23:56) [MSC v.1913 32 bit (Intel)] | |
period is 10 | |
offset to 5 is 5 | |
sizeof return is ... 1 | |
let us try to use fseek to replace ALL 5 with X | |
original content | |
================ | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
final content where 5 should be replaced by X correctly only on 1st->3rd line and then 1 offset per CRLF | |
======================================================================================================== | |
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789 | |
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789 | |
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789 | |
0123X567890123X567890123X567890123X567890123X567890123X567890123X567890123X567890123X567890123456789 | |
012X456789012X456789012X456789012X456789012X456789012X456789012X456789012X456789012X4567890123456789 | |
01X345678901X345678901X345678901X345678901X345678901X345678901X345678901X345678901X34567890123456789 | |
0X234567890X234567890X234567890X234567890X234567890X234567890X234567890X234567890X234567890123456789 | |
X123456789X123456789X123456789X123456789X123456789X123456789X123456789X123456789X1234567890123456789 | |
X012345678X012345678X012345678X012345678X012345678X012345678X012345678X012345678X01234567890123456789X | |
01234567X901234567X901234567X901234567X901234567X901234567X901234567X901234567X901234567890123456789 | |
conclusion the seek problem happens if and only if there is a CRLF on windows for me with default settings | |
what happens if we force eof to ' | |
'? | |
period is 10 | |
offset to 5 is 5 | |
sizeof return is ... 1 | |
let us try to use fseek to replace ALL 5 with X | |
original content | |
================ | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 | |
final content where 5 should be replaced by X correctly only on 1st->3rd line and then 1 offset per CRLF | |
======================================================================================================== | |
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789 | |
012345X789012345X789012345X789012345X789012345X789012345X789012345X789012345X789012345X7890123456789 | |
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789 | |
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789 | |
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789 | |
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789 | |
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789 | |
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789 | |
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789 | |
01234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X678901234X67890123456789 | |
''') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment