Last active
September 3, 2020 05:22
-
-
Save Costava/427f3083d154f9f3b7bb5224df8c4703 to your computer and use it in GitHub Desktop.
Prints a list of the lines in file1 that are not in file2 and a list of the lines in file2 that are not in file1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// COMPILING: | |
// gcc -std=c99 -Wall --output uniquelines uniquelines.c | |
// USAGE: | |
// ./uniquelines path/to/file1 path/to/file2 | |
// DESCRIPTION: | |
// Prints a list of the lines in file1 that are not in file2 | |
// and a list of the lines in file2 that are not in file1 | |
// Within a file, lines are separated by a single-character `DELIMITER` | |
// defined below | |
// Lines may be a maximum of (BUF_LEN - 1) characters long (longer lines cause | |
// the program to give up and exit) | |
// Uses only the C standard library | |
#define DELIMITER '\n' | |
#define BUF_LEN 2048 | |
// Turn off Microsoft's "use _s" warnings | |
#ifdef _WIN32 | |
#define _CRT_SECURE_NO_WARNINGS | |
#endif | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
// Equivalent to malloc but prints to stderr and exits if there is an error | |
void *easy_malloc(size_t size) { | |
void *const ptr = malloc(size); | |
if (ptr == NULL) { | |
fprintf(stderr, "%s: Failed to malloc %zu bytes\n", __func__, size); | |
exit(1); | |
} | |
return ptr; | |
} | |
// Equivalent to fgetc but prints to stderr and exits if there is an error | |
int easy_fgetc(FILE *const fp) { | |
const int c = fgetc(fp); | |
if (c == EOF) { | |
const int ferr = ferror(fp); | |
if (ferr != 0) { | |
fprintf(stderr, "%s: ferror: %d\n", __func__, ferr); | |
exit(1); | |
} | |
} | |
return c; | |
} | |
// Read from fp into buf until EOF or DELIMITER | |
// Does not put DELIMITER in buf | |
// buf_len is the number of characters put into buf | |
// not counting the nul terminator (which is put in buf) | |
// Prints to stderr and exits if line exceeds BUF_MAX_LEN | |
void easy_read_line( | |
FILE *const fp, | |
char *const buf, | |
const int BUF_MAX_LEN, | |
int *const buf_len) | |
{ | |
*buf_len = 0; | |
if (BUF_MAX_LEN == 0) { | |
return; | |
} | |
while (true) {// While reading characters from current line | |
const int c = easy_fgetc(fp); | |
if (c == EOF || c == DELIMITER) { | |
break; | |
} | |
buf[*buf_len] = c; | |
*buf_len += 1; | |
// (BUF_MAX_LEN - 1) because need space for nul terminator in buf | |
if (*buf_len >= BUF_MAX_LEN - 1) { | |
// Line is longer than buffer can handle | |
// We give up | |
fprintf(stderr, "%s: File has line that will not " | |
"fit in buffer of length %d\n", __func__, BUF_MAX_LEN); | |
exit(1); | |
} | |
} | |
buf[*buf_len] = '\0'; | |
} | |
// Equivalent to fopen but prints to stderr and exits if there is an error | |
FILE *easy_fopen(char *filename, char *mode) { | |
FILE *const fp = fopen(filename, mode); | |
if (fp == NULL) { | |
fprintf(stderr, "%s: Failed to fopen: %s\n", __func__, filename); | |
exit(1); | |
} | |
return fp; | |
} | |
// Closes fp | |
// Prints to stderr if there is an error | |
void easy_fclose(FILE *fp) { | |
if (fclose(fp) != 0) { | |
fprintf(stderr, "%s: Failed to fclose\n", __func__); | |
} | |
} | |
// String node for a linked list | |
struct str_node { | |
char *str; | |
struct str_node *prev; | |
struct str_node *next; | |
}; | |
// Append node to the linked list | |
// list is a pointer to a pointer to the first node in the list | |
// or is a pointer to NULL if the list is empty | |
void append_str_node(struct str_node **list, struct str_node *node) { | |
if (*list == NULL) { | |
*list = node; | |
return; | |
} | |
struct str_node *last = *list; | |
while (true) { | |
if (last->next == NULL) { | |
break; | |
} | |
last = last->next; | |
} | |
last->next = node; | |
node->prev = last; | |
} | |
// Remove node from the linked list it is in | |
// Do not pass node as NULL | |
void remove_self(struct str_node *node) { | |
if (node->prev != NULL) { | |
node->prev->next = node->next; | |
} | |
if (node->next != NULL) { | |
node->next->prev = node->prev; | |
} | |
} | |
// front is the first node in the list | |
// or is NULL if the list is empty | |
// Returns a pointer to the first node with str | |
// Returns NULL if str not found on any node | |
struct str_node *find_str_node(struct str_node *front, char *const str) { | |
while (true) { | |
if (front == NULL) { | |
return NULL; | |
} | |
if (strcmp(front->str, str) == 0) { | |
return front; | |
} | |
front = front->next; | |
} | |
} | |
// front is the first node in the list | |
// or is NULL if the list is empty | |
// Prints every str in the list | |
void print_list(struct str_node *front) { | |
while (true) { | |
if (front == NULL) { | |
return; | |
} | |
printf("str: %s\n", front->str); | |
front = front->next; | |
} | |
} | |
int main(int argc, char **argv) { | |
if (argc != 3) { | |
printf("USAGE: ./uniquelines path/to/file1 path/to/file2\n"); | |
exit(1); | |
} | |
// For keeping track of the unique lines in each file | |
struct str_node *list1 = NULL; | |
struct str_node *list2 = NULL; | |
// Open both files | |
FILE *const fp1 = easy_fopen(argv[1], "rb"); | |
FILE *const fp2 = easy_fopen(argv[2], "rb"); | |
char buf[BUF_LEN]; | |
int bytes_read; | |
// Read all fp1 lines | |
while (true) { | |
easy_read_line(fp1, buf, BUF_LEN, &bytes_read); | |
if (bytes_read == 0) { | |
break; | |
} | |
struct str_node *const new_str_node = | |
easy_malloc(sizeof(struct str_node)); | |
new_str_node->prev = NULL; | |
new_str_node->next = NULL; | |
new_str_node->str = easy_malloc(bytes_read + 1); | |
strcpy(new_str_node->str, buf); | |
append_str_node(&list1, new_str_node); | |
} | |
// Read all fp2 lines | |
while (true) { | |
easy_read_line(fp2, buf, BUF_LEN, &bytes_read); | |
if (bytes_read == 0) { | |
break; | |
} | |
struct str_node *const found_node = find_str_node(list1, buf); | |
if (found_node == NULL) { | |
// Unique line found | |
// Add node to list2 | |
struct str_node *const new_str_node = | |
easy_malloc(sizeof(struct str_node)); | |
new_str_node->prev = NULL; | |
new_str_node->next = NULL; | |
new_str_node->str = easy_malloc(bytes_read + 1); | |
strcpy(new_str_node->str, buf); | |
append_str_node(&list2, new_str_node); | |
} | |
else { | |
// Line exists in file1 | |
// Remove the line from the file1 list | |
// Do NOT add the line to the file2 list | |
remove_self(found_node); | |
free(found_node->str); | |
free(found_node); | |
} | |
} | |
easy_fclose(fp1); | |
easy_fclose(fp2); | |
printf("#### LIST 1:\n"); | |
print_list(list1); | |
printf("#### LIST 2:\n"); | |
print_list(list2); | |
return 0; | |
} | |
/* zlib License | |
Copyright (c) 2020 Costava | |
This software is provided 'as-is', without any express or implied | |
warranty. In no event will the authors be held liable for any damages | |
arising from the use of this software. | |
Permission is granted to anyone to use this software for any purpose, | |
including commercial applications, and to alter it and redistribute it | |
freely, subject to the following restrictions: | |
1. The origin of this software must not be misrepresented; you must not | |
claim that you wrote the original software. If you use this software | |
in a product, an acknowledgment in the product documentation would be | |
appreciated but is not required. | |
2. Altered source versions must be plainly marked as such, and must not be | |
misrepresented as being the original software. | |
3. This notice may not be removed or altered from any source distribution. | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment