-
-
Save Kittoes0124/606ba934caf14a50133a78ebc666ce67 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <errno.h> | |
#include <limits.h> | |
#include <io.h> | |
#include <share.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#define BITS_16 2 | |
#define BITS_64 8 | |
#define DATA_BUFFER_SIZE 4096 | |
#define INDEX_BLOCK_SIZE 2048 | |
#define INDEX_BUFFER_SIZE ((INDEX_BLOCK_SIZE * BITS_16) + BITS_64) | |
#define DATA_FILE_NAME "X.csv" | |
#define INDEX_FILE_NAME "X.dat" | |
#define QUOTE_CHAR '"' | |
static inline bool fillBuffer(FILE *, char *, size_t, size_t *, size_t *); | |
static inline bool nextChar(FILE *, char *, size_t, size_t *, size_t *); | |
static inline bool tryWriteLittleEndian16(char *, size_t, size_t, unsigned long long); | |
static inline bool tryWriteLittleEndian64(char *, size_t, size_t, unsigned long long); | |
int main(void) { | |
char dataBuffer[DATA_BUFFER_SIZE]; | |
FILE *dataFile = NULL; | |
char indexBuffer[INDEX_BUFFER_SIZE]; | |
FILE *indexFile = NULL; | |
bool isQuotedSequence = false; | |
unsigned long long lineLength = 0; | |
unsigned long long lineMod = 0; | |
unsigned long long lineNumber = 0; | |
size_t numBytesRead = 0; | |
size_t numBytesUsed = 0; | |
unsigned long long position = 0; | |
int returnCode = EXIT_SUCCESS; | |
dataFile = _fsopen(DATA_FILE_NAME, "rbS", _SH_DENYWR); | |
if (NULL == dataFile) { goto error; } | |
indexFile = _fsopen(INDEX_FILE_NAME, "abS", _SH_DENYWR); | |
if (NULL == indexFile) { goto error; } | |
_fseeki64(indexFile, 0, SEEK_END); | |
if (0 < _ftelli64(indexFile)) { | |
errno = EEXIST; | |
goto error; | |
} | |
while ((numBytesUsed < numBytesRead) || fillBuffer(dataFile, dataBuffer, sizeof(dataBuffer), &numBytesRead, &numBytesUsed)) { | |
char currentChar = dataBuffer[numBytesUsed++]; | |
lineLength++; | |
if (QUOTE_CHAR == currentChar) { | |
if (QUOTE_CHAR == nextChar(dataFile, dataBuffer, sizeof(dataBuffer), &numBytesRead, &numBytesUsed)) { | |
lineLength++; | |
numBytesUsed++; | |
} | |
else { | |
isQuotedSequence = !isQuotedSequence; | |
} | |
continue; | |
} | |
if (('\n' == currentChar) || ('\r' == currentChar)) { | |
if (('\r' == currentChar) && ('\n' == nextChar(dataFile, dataBuffer, sizeof(dataBuffer), &numBytesRead, &numBytesUsed))) { | |
lineLength++; | |
numBytesUsed++; | |
} | |
lineMod = ((INDEX_BLOCK_SIZE - 1) & lineNumber); | |
position += lineLength; | |
if (!tryWriteLittleEndian16(indexBuffer, sizeof(indexBuffer), (lineMod * BITS_16), lineLength)) { goto error; } | |
if ((INDEX_BLOCK_SIZE - 1) == lineMod) { | |
if (!tryWriteLittleEndian64(indexBuffer, sizeof(indexBuffer), ((lineMod * BITS_16) + BITS_16), position)) { goto error; } | |
fwrite(indexBuffer, 1, sizeof(indexBuffer), indexFile); | |
if (0 != ferror(indexFile)) { goto error; } | |
if (EOF == fflush(indexFile)) { goto error; } | |
} | |
lineLength = 0; | |
lineNumber++; | |
} | |
} | |
if (0 < lineLength) { | |
lineMod = ((INDEX_BLOCK_SIZE - 1) & lineNumber); | |
if (!tryWriteLittleEndian16(indexBuffer, sizeof(indexBuffer), (lineMod * BITS_16), lineLength)) { goto error; } | |
fwrite(indexBuffer, 1, ((lineMod * BITS_16) + BITS_16), indexFile); | |
if (0 != ferror(indexFile)) { goto error; } | |
if (EOF == fflush(indexFile)) { goto error; } | |
} | |
goto exit; | |
error: | |
strerror_s(dataBuffer, sizeof(dataBuffer), errno); | |
fprintf_s(stderr, "%s\n", dataBuffer); | |
returnCode = EXIT_FAILURE; | |
exit: | |
if (NULL != indexFile) { | |
fclose(indexFile); | |
} | |
if (NULL != dataFile) { | |
fclose(dataFile); | |
} | |
return returnCode; | |
} | |
static inline bool fillBuffer(FILE *file, char *bufferData, size_t bufferSize, size_t *numBytesRead, size_t *numBytesUsed) { | |
*numBytesRead = fread(bufferData, 1, bufferSize, file); | |
*numBytesUsed = 0; | |
return (0 < *numBytesRead); | |
} | |
static inline bool nextChar(FILE *file, char *bufferData, size_t bufferSize, size_t *numBytesRead, size_t *numBytesUsed) { | |
return ((*numBytesUsed < *numBytesRead) || fillBuffer(file, bufferData, bufferSize, numBytesRead, numBytesUsed)); | |
} | |
static inline bool tryWriteLittleEndian16(char *bufferData, size_t bufferSize, size_t bufferOffset, unsigned long long value) { | |
if (USHRT_MAX < value) { | |
errno = ERANGE; | |
return false; | |
} | |
if (bufferSize < (bufferOffset + BITS_16)) { | |
errno = EFAULT; | |
return false; | |
} | |
bufferData[(bufferOffset + 0)] = ((value >> 0) & 0x00FF); | |
bufferData[(bufferOffset + 1)] = ((value >> 8) & 0x00FF); | |
return true; | |
} | |
static inline bool tryWriteLittleEndian64(char *bufferData, size_t bufferSize, size_t bufferOffset, unsigned long long value) { | |
if (ULLONG_MAX < value) { | |
errno = ERANGE; | |
return false; | |
} | |
if (bufferSize < (bufferOffset + BITS_64)) { | |
errno = EFAULT; | |
return false; | |
} | |
bufferData[(bufferOffset + 0)] = ((value >> 0) & 0x00FF); | |
bufferData[(bufferOffset + 1)] = ((value >> 8) & 0x00FF); | |
bufferData[(bufferOffset + 2)] = ((value >> 16) & 0x00FF); | |
bufferData[(bufferOffset + 3)] = ((value >> 24) & 0x00FF); | |
bufferData[(bufferOffset + 4)] = ((value >> 32) & 0x00FF); | |
bufferData[(bufferOffset + 5)] = ((value >> 40) & 0x00FF); | |
bufferData[(bufferOffset + 6)] = ((value >> 48) & 0x00FF); | |
bufferData[(bufferOffset + 7)] = ((value >> 56) & 0x00FF); | |
return true; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment