Skip to content

Instantly share code, notes, and snippets.

@Kittoes0124
Last active August 12, 2018 15:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Kittoes0124/606ba934caf14a50133a78ebc666ce67 to your computer and use it in GitHub Desktop.
Save Kittoes0124/606ba934caf14a50133a78ebc666ce67 to your computer and use it in GitHub Desktop.
#include <errno.h>
#include <limits.h>
#include <io.h>
#include <share.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BITS_16 2
#define BITS_64 8
#define DATA_BUFFER_SIZE 4096
#define INDEX_BLOCK_SIZE 2048
#define INDEX_BUFFER_SIZE ((INDEX_BLOCK_SIZE * BITS_16) + BITS_64)
#define DATA_FILE_NAME "X.csv"
#define INDEX_FILE_NAME "X.dat"
#define QUOTE_CHAR '"'
static inline bool fillBuffer(FILE *, char *, size_t, size_t *, size_t *);
static inline bool nextChar(FILE *, char *, size_t, size_t *, size_t *);
static inline bool tryWriteLittleEndian16(char *, size_t, size_t, unsigned long long);
static inline bool tryWriteLittleEndian64(char *, size_t, size_t, unsigned long long);
int main(void) {
char dataBuffer[DATA_BUFFER_SIZE];
FILE *dataFile = NULL;
char indexBuffer[INDEX_BUFFER_SIZE];
FILE *indexFile = NULL;
bool isQuotedSequence = false;
unsigned long long lineLength = 0;
unsigned long long lineMod = 0;
unsigned long long lineNumber = 0;
size_t numBytesRead = 0;
size_t numBytesUsed = 0;
unsigned long long position = 0;
int returnCode = EXIT_SUCCESS;
dataFile = _fsopen(DATA_FILE_NAME, "rbS", _SH_DENYWR);
if (NULL == dataFile) { goto error; }
indexFile = _fsopen(INDEX_FILE_NAME, "abS", _SH_DENYWR);
if (NULL == indexFile) { goto error; }
_fseeki64(indexFile, 0, SEEK_END);
if (0 < _ftelli64(indexFile)) {
errno = EEXIST;
goto error;
}
while ((numBytesUsed < numBytesRead) || fillBuffer(dataFile, dataBuffer, sizeof(dataBuffer), &numBytesRead, &numBytesUsed)) {
char currentChar = dataBuffer[numBytesUsed++];
lineLength++;
if (QUOTE_CHAR == currentChar) {
if (QUOTE_CHAR == nextChar(dataFile, dataBuffer, sizeof(dataBuffer), &numBytesRead, &numBytesUsed)) {
lineLength++;
numBytesUsed++;
}
else {
isQuotedSequence = !isQuotedSequence;
}
continue;
}
if (('\n' == currentChar) || ('\r' == currentChar)) {
if (('\r' == currentChar) && ('\n' == nextChar(dataFile, dataBuffer, sizeof(dataBuffer), &numBytesRead, &numBytesUsed))) {
lineLength++;
numBytesUsed++;
}
lineMod = ((INDEX_BLOCK_SIZE - 1) & lineNumber);
position += lineLength;
if (!tryWriteLittleEndian16(indexBuffer, sizeof(indexBuffer), (lineMod * BITS_16), lineLength)) { goto error; }
if ((INDEX_BLOCK_SIZE - 1) == lineMod) {
if (!tryWriteLittleEndian64(indexBuffer, sizeof(indexBuffer), ((lineMod * BITS_16) + BITS_16), position)) { goto error; }
fwrite(indexBuffer, 1, sizeof(indexBuffer), indexFile);
if (0 != ferror(indexFile)) { goto error; }
if (EOF == fflush(indexFile)) { goto error; }
}
lineLength = 0;
lineNumber++;
}
}
if (0 < lineLength) {
lineMod = ((INDEX_BLOCK_SIZE - 1) & lineNumber);
if (!tryWriteLittleEndian16(indexBuffer, sizeof(indexBuffer), (lineMod * BITS_16), lineLength)) { goto error; }
fwrite(indexBuffer, 1, ((lineMod * BITS_16) + BITS_16), indexFile);
if (0 != ferror(indexFile)) { goto error; }
if (EOF == fflush(indexFile)) { goto error; }
}
goto exit;
error:
strerror_s(dataBuffer, sizeof(dataBuffer), errno);
fprintf_s(stderr, "%s\n", dataBuffer);
returnCode = EXIT_FAILURE;
exit:
if (NULL != indexFile) {
fclose(indexFile);
}
if (NULL != dataFile) {
fclose(dataFile);
}
return returnCode;
}
static inline bool fillBuffer(FILE *file, char *bufferData, size_t bufferSize, size_t *numBytesRead, size_t *numBytesUsed) {
*numBytesRead = fread(bufferData, 1, bufferSize, file);
*numBytesUsed = 0;
return (0 < *numBytesRead);
}
static inline bool nextChar(FILE *file, char *bufferData, size_t bufferSize, size_t *numBytesRead, size_t *numBytesUsed) {
return ((*numBytesUsed < *numBytesRead) || fillBuffer(file, bufferData, bufferSize, numBytesRead, numBytesUsed));
}
static inline bool tryWriteLittleEndian16(char *bufferData, size_t bufferSize, size_t bufferOffset, unsigned long long value) {
if (USHRT_MAX < value) {
errno = ERANGE;
return false;
}
if (bufferSize < (bufferOffset + BITS_16)) {
errno = EFAULT;
return false;
}
bufferData[(bufferOffset + 0)] = ((value >> 0) & 0x00FF);
bufferData[(bufferOffset + 1)] = ((value >> 8) & 0x00FF);
return true;
}
static inline bool tryWriteLittleEndian64(char *bufferData, size_t bufferSize, size_t bufferOffset, unsigned long long value) {
if (ULLONG_MAX < value) {
errno = ERANGE;
return false;
}
if (bufferSize < (bufferOffset + BITS_64)) {
errno = EFAULT;
return false;
}
bufferData[(bufferOffset + 0)] = ((value >> 0) & 0x00FF);
bufferData[(bufferOffset + 1)] = ((value >> 8) & 0x00FF);
bufferData[(bufferOffset + 2)] = ((value >> 16) & 0x00FF);
bufferData[(bufferOffset + 3)] = ((value >> 24) & 0x00FF);
bufferData[(bufferOffset + 4)] = ((value >> 32) & 0x00FF);
bufferData[(bufferOffset + 5)] = ((value >> 40) & 0x00FF);
bufferData[(bufferOffset + 6)] = ((value >> 48) & 0x00FF);
bufferData[(bufferOffset + 7)] = ((value >> 56) & 0x00FF);
return true;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment