Skip to content

Instantly share code, notes, and snippets.

@hieu292
Created October 2, 2016 08:31
Show Gist options
  • Save hieu292/57c6a5d590dcd5f41dfaa62e3da29408 to your computer and use it in GitHub Desktop.
Save hieu292/57c6a5d590dcd5f41dfaa62e3da29408 to your computer and use it in GitHub Desktop.
func Read_File_Stream_In_Ram(nameFile){
//read stream in memory
//...some stuff to work
yield data;
}
func Split_Chunk_Data(log){
nameFile = log.name
dataReceived = []
data = Read_File_Stream_In_Ram(nameFile)
dataReceived.push(data)
while(len(dataReceived) > 0){
start_timestamp = data[0]["unix_timestamp"]
end_timestamp = data[len(data) -1]["unix_timestamp"]
index = generate_index(start_timestamp,end_timestamp) //ex: "log-1-startTime-EndTime"
// sort data chunk by name_book
dataSorted = data.sort((a, b) => {return (a.name_book.toUpperCase() > b.name_book.toUpperCase())}
// write to disk, name file is nameFile and index, data is dataSorted
Write_File(nameFile, index, dataSorted)
}
log.loaded = true;
return log;
}
func extract_Info(query, logFile){
listChunkName = get_list_name_of_chunk_file(logFile) // output format: ["log-1-Jan01-Jan30.txt", "log-1-Feb01-Feb30.txt", ...]
for chunkName in listChunkName:
start_timestamp = get_startTime_from_ChunkName(chunkName)
end_timestamp = get_endTime_from_ChunkName(chunkName)
if( query.startTime < get_endTime(chunkName) || query.endTime > get_startTime(chunkName)):
//search book on this chunk
search_book(query, chunkName)
}
func search_book(query, chunkName){
bookName = query.nameBook
data = Read_File(chunkName)
lengthData = len(data)
data = search(sortedData, 1, lengthData, bookName)
//Write Result to disk
Write_File(query, data)
}
func search(sortedData, lowerPoint, upperPoint, bookName){
if(len(sortedData) == 0)
return;
middle = (lowerPoint + upperPoint)/2
if(bookName > sortedData[middle]):
lowerPoint = middle + 1
data = sortedData.splice(lowerPoint, upperPoint)
search(data, lowerPoint, upperPoint, bookName)
if(bookName < sortedData[middle]):
upperPoint = middle - 1
data = sortedData.splice(lowerPoint, upperPoint)
search(data, lowerPoint, upperPoint, bookName)
if(bookName == sortedData[middle]):
yield sortedData[middle]
//continue to search
upperPoint1 = middle - 1
lowerPoint1 = middle + 1
data1 = sortedData.splice(lowerPoint, upperPoint1)
data2 = sortedData.splice(lowerPoint1, upperPoint)
search(data1, lowerPoint, upperPoint1, bookName)
search(data2, lowerPoint1, upperPoint, bookName)
}
func main(){
// Split large file and sort by Name book
loaded_Log = []
name_array_log = [{name: 'log-1.txt', loaded: false}, {'log-2.txt', loaded: false},...]
for log in name_array_log:
result = Split_Chunk_Data(log)
loaded_Log.push(result)
//Check log file and call extract informations
logFile = 'log-n.text'
query = { namebook : 'Book x', startTime: 'Aug 10', endTime: 'Aug 12'}
for log in loaded_Log:
if(log.name == logFile) and log.loaded:
extract_Info(query, logFile)
//read report file
Read_File(query)
}
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment