lesismal/converter.py

## converter.py
# -*- coding:utf-8 -*-
#!/usr/bin/python3

import os
import datetime

class PIConverter:
    def __init__(self, maxRead=0, minNum=100000, maxNum=99999999):
        self.maxRead = maxRead
        self.minNum = minNum
        self.maxNum = maxNum
        self.positions = [-1]*(self.maxNum+1-self.minNum)

    def convert(self, srcFile, dstFile):
        fsrc = open(srcFile,'r')
        fsrc.read(2)
        try:
            lastStr = ""
            readSize = 1024*8
            currPos = 0
            readed = 0

            starttime = datetime.datetime.now()

            offset = len(str(self.minNum)) - 1
            minNumLen = len(str(self.minNum))
            maxNumLen = len(str(self.maxNum))
            if self.maxRead <= 0:
                self.maxRead = os.path.getsize(srcFile)
            if readSize > self.maxRead:
                readSize = self.maxRead

            print("readSize: {}".format(readSize))
            while readed < self.maxRead:
                s = fsrc.read(readSize)
                s = lastStr + s # 这里可以再优化下
                currPos -= len(lastStr)
                for i in range(len(s)-maxNumLen):
                    strLen = minNumLen
                    while strLen <= maxNumLen:
                        subs = s[i:i+strLen]
                        strLen += 1
                        num = int(subs)
                        index = num - self.minNum
                        if self.positions[index] < 0:
                            self.positions[index] = currPos + i

                if len(s) == 0:
                    break

                lastStr = s[len(s)-5:]
                currPos += readSize
                readed += readSize
                # if readed % (1024*1024*8) == 0:
                #    print("total read: {}, time used: {}s".format(readed, (datetime.datetime.now() - starttime).seconds))

            print("total read: {}, time used: {}s".format(readed, (datetime.datetime.now() - starttime).seconds))
            # print("done")

            try:
                fdst = open(dstFile,'w+')
                i = 0
                # print(":::", i, self.positions[i])
                for i in range(len(self.positions)):
                    if self.positions[i] >= 0:
                        print("num: {} at {}".format(i+self.minNum, self.positions[i]))
                        fdst.write(str(i)+":"+str(self.positions[i])+"\n")
            finally:
                fdst.close()
        finally:
            fsrc.close()

    def find(self, n):
        if n < self.minNum or n > 99999999:
            return -1
        return self.positions[n - self.minNum]

maxRead = 16 # 0则读取整个文件
piConverter = PIConverter(maxRead)

# 把已经统计出来的生成更小的文件
piConverter.convert("./pi-billion.txt", "./pi-position.txt")

# converter 初始化太慢了，所以最好还是先 piConverter.convert 把已经统计出来的生成更小的文件，finder.py 用该文件初始化和做查找
print("piConverter find 141592 at pos:", piConverter.find(141592))
print("piConverter find 415926 at pos:", piConverter.find(415926))

## finder.py
# -*- coding:utf-8 -*-
#!/usr/bin/python3

class PIFinder:
    def __init__(self, fname, minNum=100000, maxNum=99999999):
        self.minNum = minNum
        self.maxNum = maxNum
        self.positions = [0]*(self.maxNum+1-self.minNum)
        f = open(fname,'r')
        try:
            for line in f:
                strs = line.split(":")
                i, pos = int(strs[0]), int(strs[1])
                self.positions[i] = pos
        finally:
            f.close()

    def find(self, n):
        if n < self.minNum or n > 99999999:
            return -1
        return self.positions[n - self.minNum]

piFinder = PIFinder("./pi-position.txt")
print("piFinder find 141592 at pos:", piFinder.find(141592))
print("piFinder find 415926 at pos:", piFinder.find(415926))
	# -- coding:utf-8 --
	#!/usr/bin/python3

	import os
	import datetime

	class PIConverter:
	def __init__(self, maxRead=0, minNum=100000, maxNum=99999999):
	self.maxRead = maxRead
	self.minNum = minNum
	self.maxNum = maxNum
	self.positions = [-1]*(self.maxNum+1-self.minNum)

	def convert(self, srcFile, dstFile):
	fsrc = open(srcFile,'r')
	fsrc.read(2)
	try:
	lastStr = ""
	readSize = 1024*8
	currPos = 0
	readed = 0

	starttime = datetime.datetime.now()

	offset = len(str(self.minNum)) - 1
	minNumLen = len(str(self.minNum))
	maxNumLen = len(str(self.maxNum))
	if self.maxRead <= 0:
	self.maxRead = os.path.getsize(srcFile)
	if readSize > self.maxRead:
	readSize = self.maxRead

	print("readSize: {}".format(readSize))
	while readed < self.maxRead:
	s = fsrc.read(readSize)
	s = lastStr + s # 这里可以再优化下
	currPos -= len(lastStr)
	for i in range(len(s)-maxNumLen):
	strLen = minNumLen
	while strLen <= maxNumLen:
	subs = s[i:i+strLen]
	strLen += 1
	num = int(subs)
	index = num - self.minNum
	if self.positions[index] < 0:
	self.positions[index] = currPos + i

	if len(s) == 0:
	break

	lastStr = s[len(s)-5:]
	currPos += readSize
	readed += readSize
	# if readed % (102410248) == 0:
	# print("total read: {}, time used: {}s".format(readed, (datetime.datetime.now() - starttime).seconds))

	print("total read: {}, time used: {}s".format(readed, (datetime.datetime.now() - starttime).seconds))
	# print("done")

	try:
	fdst = open(dstFile,'w+')
	i = 0
	# print(":::", i, self.positions[i])
	for i in range(len(self.positions)):
	if self.positions[i] >= 0:
	print("num: {} at {}".format(i+self.minNum, self.positions[i]))
	fdst.write(str(i)+":"+str(self.positions[i])+"\n")
	finally:
	fdst.close()
	finally:
	fsrc.close()

	def find(self, n):
	if n < self.minNum or n > 99999999:
	return -1
	return self.positions[n - self.minNum]

	maxRead = 16 # 0则读取整个文件
	piConverter = PIConverter(maxRead)

	# 把已经统计出来的生成更小的文件
	piConverter.convert("./pi-billion.txt", "./pi-position.txt")

	# converter 初始化太慢了，所以最好还是先 piConverter.convert 把已经统计出来的生成更小的文件，finder.py 用该文件初始化和做查找
	print("piConverter find 141592 at pos:", piConverter.find(141592))
	print("piConverter find 415926 at pos:", piConverter.find(415926))
	# -- coding:utf-8 --
	#!/usr/bin/python3

	class PIFinder:
	def __init__(self, fname, minNum=100000, maxNum=99999999):
	self.minNum = minNum
	self.maxNum = maxNum
	self.positions = [0]*(self.maxNum+1-self.minNum)
	f = open(fname,'r')
	try:
	for line in f:
	strs = line.split(":")
	i, pos = int(strs[0]), int(strs[1])
	self.positions[i] = pos
	finally:
	f.close()

	def find(self, n):
	if n < self.minNum or n > 99999999:
	return -1
	return self.positions[n - self.minNum]

	piFinder = PIFinder("./pi-position.txt")
	print("piFinder find 141592 at pos:", piFinder.find(141592))
	print("piFinder find 415926 at pos:", piFinder.find(415926))