Skip to content

Instantly share code, notes, and snippets.

@christoofar
Created April 28, 2024 20:34
Show Gist options
  • Save christoofar/b20e4b036229746910c2915a6e3d7c38 to your computer and use it in GitHub Desktop.
Save christoofar/b20e4b036229746910c2915a6e3d7c38 to your computer and use it in GitHub Desktop.
How to make a proprietary binary free-form format using sentinel bytes
package archive
import (
"errors"
"strings"
"time"
)
// ArchiveRecord is a struct that represents a header record in the archive file
// The archive file is a binary file that contains multiple ArchiveRecords,
// each of which represents a file that has been archived. Following the header
// record is the file data itself.
//
// After the file data, look for the 0x1C marker to find the next ArchiveRecord
// and file data for that file.
//
// You can use the FileSize field to determine how many bytes to read for the file data and for skipping to the next ArchiveRecord.
type ArchiveRecord struct { // ** Byte offsets
Marker byte // 00 - This will always be 0x1C
FileName [256]byte // 01-256 - The filename of the file being archived
Fnlf byte // 257 - Force a linefeed 0x0A after the filename so it looks better in Notepad
FileTime int64 // 257-264 - The time the file was archived (Unix timestamp, in GMT/UTC time zone)
FileSize int64 // 265-272 - The size of the file being archived in bytes
Fntimelf byte // 273 - Force a linefeed 0x0A after the file size so it looks better in Notepad
PassValidation byte // 274 - 0x00 = File passed validation, 0x01 = File failed validation
ValidationFailReason [256]byte // 275-530 - The reason the file failed validation
Fnvalidationlf byte // 531 - Force a linefeed 0x0A after the validation reason so it looks better in Notepad
TrailingMarker byte // 532 - This will always be 0x1F
Data []byte // 533+ - The data of the file being archived
}
/*
*
* Error types
*
*/
// ErrInvalidArchiveRecord is an error type that represents an invalid archive record. This is probably
// due to corruption in the archive or you are parsing the file incorrectly. If you are reading the file, the
// start of every record begins with 0x1C and the end of every record header ends with 0x1F at position 532.
// The remaining bytes are the file data until the next 0x1C marker is reached (which belongs to the next record).
var ErrInvalidArchiveRecord = errors.New("archive header must be 533 bytes long")
// ErrInvalidArchiveSentinelValues is an error type that represents invalid sentinel values in the archive record
// which is an indication that there is data corruption in the archive file
var ErrInvalidArchiveSentinelValues = errors.New(
`invalid archive sentinel values expected 0x1C and 0x1F in the first and last byte of the record respectively`)
// ErrInvalidSizeMismatch is an error type that is raised when the total number of bytes in the file record does not match
// what is stored in the FileSize field of the record header. This is an indication of data corruption in the archive file
// or that the file was stored incorrectly, without placing the file size in the header.
var ErrInvalidSizeMismatch = errors.New("invalid size mismatch comparing the size of the file bytes to the FileSize field in the record header")
/*
*
*
* Read/Write functions
*
*
*/
// ReadFileName returns the FileName field of the ArchiveRecord as a string
func (r *ArchiveRecord) ReadFileName() string {
return strings.TrimSpace(string(r.FileName[:]))
}
// ReadFileTime returns the FileTime field of the ArchiveRecord as a time.Time
func (r *ArchiveRecord) ReadFileTime() time.Time {
return time.Unix(r.FileTime, 0)
}
// ReadFileSize returns the FileSize field of the ArchiveRecord as an int64
func (r *ArchiveRecord) ReadFileSize() int64 {
return r.FileSize
}
// ReadPassValidation returns the PassValidation field of the ArchiveRecord as a bool
func (r *ArchiveRecord) ReadPassValidation() bool {
return r.PassValidation == 0
}
// ReadValidationFailReason returns the ValidationFailReason field of the ArchiveRecord as a string
func (r *ArchiveRecord) ReadValidationFailReason() string {
return strings.TrimSpace(string(r.ValidationFailReason[:]))
}
// SetFileName sets the FileName field of the ArchiveRecord. The filename should be 256 characters or less.
// If the filename is longer than 256 characters, the string will be truncated.
func (r *ArchiveRecord) SetFileName(name string) {
if len(name) > 256 {
name = name[:256]
}
// if len is less than 256, the rest of the array will be spaces
if len(name) < 256 {
for i := len(name); i < 256; i++ {
name += " "
}
}
copy(r.FileName[:], name)
}
// SetFileTime sets the FileTime field of the ArchiveRecord. The time will be converted to UTC and stored
// as a Unix timestamp.
func (r *ArchiveRecord) SetFileTime(t time.Time) {
r.FileTime = t.UTC().Unix()
}
// SetFileSize sets the FileSize field of the ArchiveRecord. The size should be the size of the file being archived.
func (r *ArchiveRecord) SetFileSize(size int64) {
r.FileSize = size
}
// SetPassValidation sets the PassValidation field of the ArchiveRecord. If pass is true, the file passed validation.
func (r *ArchiveRecord) SetPassValidation(pass bool, reason string) {
if len(reason) > 256 {
reason = reason[:256]
}
// if len is less than 256, the rest of the array will be spaces
if len(reason) < 256 {
for i := len(reason); i < 256; i++ {
reason += " "
}
}
if pass {
r.PassValidation = 0
copy(r.ValidationFailReason[:], reason)
} else {
r.PassValidation = 1
}
}
/*
*
*
* Serialization functions
*
*
*/
// ToBytes converts the ArchiveRecord to a byte slice
func (r *ArchiveRecord) ToBytes() ([]byte, error) {
data := make([]byte, 533)
data[0] = r.Marker
copy(data[1:257], r.FileName[:])
data[257] = r.Fnlf
copy(data[258:266], int64ToBytes(r.FileTime))
copy(data[266:274], int64ToBytes(r.FileSize))
data[274] = r.Fntimelf
data[275] = r.PassValidation
copy(data[276:532], r.ValidationFailReason[:])
data[532] = r.TrailingMarker
// Anything after byte 533 is the file data
data = append(data, r.Data...)
// Set the FileSize automatically to the length of data being stored
r.FileSize = int64(len(r.Data))
return data, nil
}
// FromBytes converts a byte slice to an ArchiveRecord. The byte slice should contain a single ArchiveRecord.
// Which exactly 533 bytes long for the record header, followed by the file data. This function also checks
// the sentinel values at the beginning and end of the record to ensure the record is valid.
func (r *ArchiveRecord) FromBytes(data []byte) error {
// The smallest a record can be is 533 bytes
if len(data) < 533 {
return ErrInvalidArchiveRecord
}
r.Marker = data[0]
copy(r.FileName[:], data[1:257])
r.Fnlf = data[257]
r.FileTime = bytesToInt64(data[258:266])
r.FileSize = bytesToInt64(data[266:274])
r.Fntimelf = data[274]
r.PassValidation = data[275]
copy(r.ValidationFailReason[:], data[276:532])
r.TrailingMarker = data[532]
// Anything after the 533rd byte is the file data
r.Data = data[533:]
// Check the sentinel values
if r.Marker != 0x1C || r.TrailingMarker != 0x1F {
return ErrInvalidArchiveSentinelValues
}
// Check that the size of the file data matches the FileSize field
if int64(len(r.Data)) != r.FileSize {
return ErrInvalidSizeMismatch
}
return nil
}
// ArchiveRecordFromBytes creates an ArchiveRecord from a byte slice. The byte slice should contain a single ArchiveRecord.
func ArchiveRecordFromBytes(data []byte) (*ArchiveRecord, error) {
record := NewArchiveRecord()
err := record.FromBytes(data)
if err != nil {
return nil, err
}
return record, nil
}
/*
*
*
* Constructor functions
*
*
*/
// NewArchiveRecord creates a new ArchiveRecord with default values
func NewArchiveRecord() *ArchiveRecord {
record := &ArchiveRecord{
Marker: 0x1C, // Each record starts with 0x1C which is the ASCII File Separator
FileName: [256]byte{},
FileTime: time.Now().UTC().Unix(),
Fnlf: 0x0A, // Linefeed after the filename
FileSize: 0, // Size of the data being archived, which follows this structure
Fntimelf: 0x0A, // Linefeed after the file time
PassValidation: 0,
ValidationFailReason: [256]byte{}, // Reason the file failed validation
Fnvalidationlf: 0x0A, // Linefeed after the validation reason
TrailingMarker: 0x1F, // Each record ends with 0x1F which is the ASCII Unit Separator
}
for i := 0; i < 256; i++ {
record.FileName[i] = 0x20 // Fill the filename with spaces
}
for i := 0; i < 256; i++ {
record.ValidationFailReason[i] = 0x20 // Fill the validation fail reason with spaces
}
return record
}
/*
*
*
* Helper functions
*
*
*/
// int64ToBytes converts an int64 to a byte slice (for storing unix timestamps in the ArchiveRecord struct)
func int64ToBytes(i int64) []byte {
b := make([]byte, 8)
b[0] = byte(i)
b[1] = byte(i >> 8)
b[2] = byte(i >> 16)
b[3] = byte(i >> 24)
b[4] = byte(i >> 32)
b[5] = byte(i >> 40)
b[6] = byte(i >> 48)
b[7] = byte(i >> 56)
return b
}
// bytesToInt64 converts a byte slice to an int64 (for reading unix timestamps from the ArchiveRecord struct)
func bytesToInt64(b []byte) int64 {
return int64(b[0]) | int64(b[1])<<8 | int64(b[2])<<16 | int64(b[3])<<24 | int64(b[4])<<32 | int64(b[5])<<40 | int64(b[6])<<48 | int64(b[7])<<56
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment