Created
March 30, 2016 23:56
-
-
Save jnbek/356a9461612f123be674e23e1dfb216c to your computer and use it in GitHub Desktop.
Tiny CSV Reader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Tiny CSV Reader */ | |
/* Copyright (C) 2015, Deligiannidis Konstantinos | |
This program is free software: you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation, either version 3 of the License, or | |
(at your option) any later version. | |
This program is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
You should have received a copy of the GNU General Public License | |
along with this program. If not, see <http://w...content-available-to-author-only...u.org/licenses/>. */ | |
#include <stdio.h> | |
#include <string.h> | |
#include <stdlib.h> | |
/* For more that 100 columns or lines (when delimiter = \n), minor modifications are needed. */ | |
int getcols( const char * const line, const char * const delim, char ***out_storage ) | |
{ | |
const char *start_ptr, *end_ptr, *iter; | |
char **out; | |
int i; //For "for" loops in the old c style. | |
int tokens_found = 1, delim_size, line_size; //Calculate "line_size" indirectly, without strlen() call. | |
int start_idx[100], end_idx[100]; //Store the indexes of tokens. Example "Power;": loc('P')=1, loc(';')=6 | |
//Change 100 with MAX_TOKENS or use malloc() for more than 100 tokens. Example: "b1;b2;b3;...;b200" | |
if ( *out_storage != NULL ) return -4; //This SHOULD be NULL: Not Already Allocated | |
if ( !line || !delim ) return -1; //NULL pointers Rejected Here | |
if ( (delim_size = strlen( delim )) == 0 ) return -2; //Delimiter not provided | |
start_ptr = line; //Start visiting input. We will distinguish tokens in a single pass, for good performance. | |
//Then we are allocating one unified memory region & doing one memory copy. | |
while ( ( end_ptr = strstr( start_ptr, delim ) ) ) { | |
start_idx[ tokens_found -1 ] = start_ptr - line; //Store the Index of current token | |
end_idx[ tokens_found - 1 ] = end_ptr - line; //Store Index of first character that will be replaced with | |
//'\0'. Example: "arg1||arg2||end" -> "arg1\0|arg2\0|end" | |
tokens_found++; //Accumulate the count of tokens. | |
start_ptr = end_ptr + delim_size; //Set pointer to the next c-string within the line | |
} | |
for ( iter = start_ptr; (*iter!='\0') ; iter++ ); | |
start_idx[ tokens_found -1 ] = start_ptr - line; //Store the Index of current token: of last token here. | |
end_idx[ tokens_found -1 ] = iter - line; //and the last element that will be replaced with \0 | |
line_size = iter - line; //Saving CPU cycles: Indirectly Count the size of *line without using strlen(); | |
int size_ptr_region = (1 + tokens_found)*sizeof( char* ); //The size to store pointers to c-strings + 1 (*NULL). | |
out = (char**) malloc( size_ptr_region + ( line_size + 1 ) + 5 ); //Fit everything there...it is all memory. | |
//It reserves a contiguous space for both (char**) pointers AND string region. 5 Bytes for "Out of Range" tests. | |
*out_storage = out; //Update the char** pointer of the caller function. | |
//"Out of Range" TEST. Verify that the extra reserved characters will not be changed. Assign Some Values. | |
//char *extra_chars = (char*) out + size_ptr_region + ( line_size + 1 ); | |
//extra_chars[0] = 1; extra_chars[1] = 2; extra_chars[2] = 3; extra_chars[3] = 4; extra_chars[4] = 5; | |
for ( i = 0; i < tokens_found; i++ ) //Assign adresses first part of the allocated memory pointers that point to | |
out[ i ] = (char*) out + size_ptr_region + start_idx[ i ]; //the second part of the memory, reserved for Data. | |
out[ tokens_found ] = (char*) NULL; //[ ptr1, ptr2, ... , ptrN, (char*) NULL, ... ]: We just added the (char*) NULL. | |
//Now assign the Data: c-strings. (\0 terminated strings): | |
char *str_region = (char*) out + size_ptr_region; //Region inside allocated memory which contains the String Data. | |
memcpy( str_region, line, line_size ); //Copy input with delimiter characters: They will be replaced with \0. | |
//Now we should replace: "arg1||arg2||arg3" with "arg1\0|arg2\0|arg3". Don't worry for characters after '\0' | |
//They are not used in standard c lbraries. | |
for( i = 0; i < tokens_found; i++) str_region[ end_idx[ i ] ] = '\0'; | |
//"Out of Range" TEST. Wait until Assigned Values are Printed back. | |
//for ( int i=0; i < 5; i++ ) printf("c=%x ", extra_chars[i] ); printf("\n"); | |
// *out memory should now contain (example data): | |
//[ ptr1, ptr2,...,ptrN, (char*) NULL, "token1\0", "token2\0",...,"tokenN\0", 5 bytes for tests ] | |
// |__________________________________^ ^ ^ ^ | |
// |_______________________________________| | | | |
// |_____________________________________________| These 5 Bytes should be intact. | |
return tokens_found; | |
} | |
int main() | |
{ | |
char in_line[] = "Arg1;;Th;s is not Del;m;ter;;Arg3;;;;Final"; | |
char delim[] = ";;"; | |
char **columns; | |
int i; | |
printf("Example1:\n"); | |
columns = NULL; //Should be NULL to indicate that it is not assigned to allocated memory. Otherwise return -4; | |
int cols_found = getcols( in_line, delim, &columns); | |
for ( i = 0; i < cols_found; i++ ) printf("Column[ %d ] = %s\n", i, columns[ i ] ); //<- (1st way). | |
// (2nd way) // for ( i = 0; columns[ i ]; i++) printf("start_idx[ %d ] = %s\n", i, columns[ i ] ); | |
free( columns ); //Release the Single Contiguous Memory Space. | |
columns = NULL; //Pointer = NULL to indicate it does not reserve space and that is ready for the next malloc(). | |
printf("\n\nExample2, Nested:\n\n"); | |
char example_file[] = "ID;Day;Month;Year;Telephone;email;Date of registration\n" | |
"1;Sunday;january;2009;123-124-456;jitter@go.xyz;2015-05-13\n" | |
"2;Monday;March;2011;(+30)333-22-55;buffer@wl.it;2009-05-23"; | |
char **rows; | |
int j; | |
rows = NULL; //getcols() requires it to be NULL. (Avoid dangling pointers, leaks e.t.c). | |
getcols( example_file, "\n", &rows); | |
for ( i = 0; rows[ i ]; i++) { | |
{ | |
printf("Line[ %d ] = %s\n", i, rows[ i ] ); | |
char **columnX = NULL; | |
getcols( rows[ i ], ";", &columnX); | |
for ( j = 0; columnX[ j ]; j++) printf(" Col[ %d ] = %s\n", j, columnX[ j ] ); | |
free( columnX ); | |
} | |
} | |
free( rows ); | |
rows = NULL; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment