Skip to content

Instantly share code, notes, and snippets.

@abitofhelp
Last active July 28, 2018 05:42
Show Gist options
  • Save abitofhelp/3442cd4476ec34c1b571225deb0f73df to your computer and use it in GitHub Desktop.
Save abitofhelp/3442cd4476ec34c1b571225deb0f73df to your computer and use it in GitHub Desktop.
This gist shows how to read from a file using chunks and calculating the MD5 hash on-the-fly.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018 A Bit of Help, Inc. - All Rights Reserved, Worldwide.
// Use of this source code is governed by a MIT license that can be found in the LICENSE file.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Package main is the entry point for the application
// and is responsible for configuring the environment.
package main
import (
"crypto/md5"
"fmt"
"io"
"os"
)
// Const kMaxBufferSize is the maximum number of bytes that will
// be read from the file in each iteration through the reading loop.
const kMaxBufferSize = 4096
// Type WriteChunksFunction defines the handler that will be invoked when doWrite is not nil.
type WriteChunksFunction func(buf []byte) (nBytesWritten uint64, err error)
// Function ReadFileInChunks will read the contents of a file at
// fromPath in maxBufSize chunks, calculate the MD5 hash on the fly, and
// the data can be written somewhere via the doWrite() parameter.
// Parameter fromPath is the full path to the file that will be read.
// Parameter maxBufSize is the maximum number of bytes that will be read from the
// file in each iteration through the reading loop.
// Function doWrite can take a buffer containing maxBufSize bytes and write them somewhere.
// Returns nil on success, otherwise an error.
// Returns the (number of bytes read, number of chunks processed, checksum hex string, and nil) for success;
// Otherwise, (0, 0, "", error).
// Remarks include that whatever doWrite is doing will not be cleaned up when there is an error, unless doWrite does it.
func ReadFileInChunks(
fromPath string,
maxBufSize uint64,
doWrite WriteChunksFunction) (nBytesRead uint64, nChunks uint64, checksum string, err error) {
if fromPath == "" {
return 0, 0, checksum, fmt.Errorf("the fromPath to the file that will be read cannot be an empty string")
}
if maxBufSize == 0 {
return 0, 0, checksum, fmt.Errorf("the size of the buffer used for reading from the file cannot be zero")
}
// Open the file at fromPath for reading...
file, err := os.Open(fromPath)
if err != nil {
return 0, 0, checksum, err
}
// Automatically close the file when exiting this method.
defer file.Close()
// Create the buffer that will be used for reading a chunk of maxBufSize bytes at a time.
buffer := make([]byte, maxBufSize)
// Number of bytes read for each file.Read()
bytesRead := 0
// Create the MD5 hasher so we can calculate the MD5 value on the fly...
hasher := md5.New()
hasher.Reset()
// Loop through the file, reading chunks of data, and providing each chunk to doWrite().
for {
// Read a chuck of bytes from the file...
// If the length of the file is not a whole multiple of the buffer size,
// the last iteration will read the remaining number of bytes into the buffer.
bytesRead, err = file.Read(buffer)
if err != nil {
if err != io.EOF {
return 0, 0, checksum, err
}
// All done reading the file.
break
}
// Update the cumulative counters...
nBytesRead += uint64(bytesRead)
nChunks++
// Update the MD5 hasher...
hasher.Write(buffer[:bytesRead])
if doWrite != nil {
bytesWritten, errw := doWrite(buffer[:bytesRead])
if errw != nil {
return 0, 0, checksum, errw
}
if bytesWritten != uint64(bytesRead) {
return 0, 0, checksum, fmt.Errorf("the number of bytes read %d does not equal the number of bytes writted %d\n", bytesRead, bytesWritten)
}
}
}
// Generate the MD5 hex string...
checksum = hex.EncodeToString(hasher.Sum(nil))
return nBytesRead, nChunks, checksum, nil
}
// Entry point for the application.
// Command-line: exename inputfilepath outputfilepath
func main() {
if len(os.Args) != 3 {
fmt.Fprintf(os.Stderr, "Invalid command line: exename inputfilepath outputfilepath")
}
fromPath := os.Args[1]
toPath := os.Args[2]
outFile, err := os.Create(toPath)
if err != nil {
fmt.Println(err)
os.Exit(-1)
}
// Automatically close the file when exiting this method.
defer outFile.Close()
// Read the fromPath file in chunks and write the chunks to a new file.
nBytesRead, nChunks, checksum, err := ReadFileInChunks(fromPath, kMaxBufferSize, func(buf []byte) (nBytesWritten uint64, err error) {
bytesWritten, err := outFile.Write(buf)
return uint64(bytesWritten), err
})
fmt.Printf("Done: nBytesRead: %d, nChunks: %d, Checksum: %s, Error: %v\n", nBytesRead, nChunks, checksum, err)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment