Skip to content

Instantly share code, notes, and snippets.

@Costava
Last active September 3, 2020 05:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Costava/427f3083d154f9f3b7bb5224df8c4703 to your computer and use it in GitHub Desktop.
Save Costava/427f3083d154f9f3b7bb5224df8c4703 to your computer and use it in GitHub Desktop.
Prints a list of the lines in file1 that are not in file2 and a list of the lines in file2 that are not in file1
// COMPILING:
// gcc -std=c99 -Wall --output uniquelines uniquelines.c
// USAGE:
// ./uniquelines path/to/file1 path/to/file2
// DESCRIPTION:
// Prints a list of the lines in file1 that are not in file2
// and a list of the lines in file2 that are not in file1
// Within a file, lines are separated by a single-character `DELIMITER`
// defined below
// Lines may be a maximum of (BUF_LEN - 1) characters long (longer lines cause
// the program to give up and exit)
// Uses only the C standard library
#define DELIMITER '\n'
#define BUF_LEN 2048
// Turn off Microsoft's "use _s" warnings
#ifdef _WIN32
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Equivalent to malloc but prints to stderr and exits if there is an error
void *easy_malloc(size_t size) {
void *const ptr = malloc(size);
if (ptr == NULL) {
fprintf(stderr, "%s: Failed to malloc %zu bytes\n", __func__, size);
exit(1);
}
return ptr;
}
// Equivalent to fgetc but prints to stderr and exits if there is an error
int easy_fgetc(FILE *const fp) {
const int c = fgetc(fp);
if (c == EOF) {
const int ferr = ferror(fp);
if (ferr != 0) {
fprintf(stderr, "%s: ferror: %d\n", __func__, ferr);
exit(1);
}
}
return c;
}
// Read from fp into buf until EOF or DELIMITER
// Does not put DELIMITER in buf
// buf_len is the number of characters put into buf
// not counting the nul terminator (which is put in buf)
// Prints to stderr and exits if line exceeds BUF_MAX_LEN
void easy_read_line(
FILE *const fp,
char *const buf,
const int BUF_MAX_LEN,
int *const buf_len)
{
*buf_len = 0;
if (BUF_MAX_LEN == 0) {
return;
}
while (true) {// While reading characters from current line
const int c = easy_fgetc(fp);
if (c == EOF || c == DELIMITER) {
break;
}
buf[*buf_len] = c;
*buf_len += 1;
// (BUF_MAX_LEN - 1) because need space for nul terminator in buf
if (*buf_len >= BUF_MAX_LEN - 1) {
// Line is longer than buffer can handle
// We give up
fprintf(stderr, "%s: File has line that will not "
"fit in buffer of length %d\n", __func__, BUF_MAX_LEN);
exit(1);
}
}
buf[*buf_len] = '\0';
}
// Equivalent to fopen but prints to stderr and exits if there is an error
FILE *easy_fopen(char *filename, char *mode) {
FILE *const fp = fopen(filename, mode);
if (fp == NULL) {
fprintf(stderr, "%s: Failed to fopen: %s\n", __func__, filename);
exit(1);
}
return fp;
}
// Closes fp
// Prints to stderr if there is an error
void easy_fclose(FILE *fp) {
if (fclose(fp) != 0) {
fprintf(stderr, "%s: Failed to fclose\n", __func__);
}
}
// String node for a linked list
struct str_node {
char *str;
struct str_node *prev;
struct str_node *next;
};
// Append node to the linked list
// list is a pointer to a pointer to the first node in the list
// or is a pointer to NULL if the list is empty
void append_str_node(struct str_node **list, struct str_node *node) {
if (*list == NULL) {
*list = node;
return;
}
struct str_node *last = *list;
while (true) {
if (last->next == NULL) {
break;
}
last = last->next;
}
last->next = node;
node->prev = last;
}
// Remove node from the linked list it is in
// Do not pass node as NULL
void remove_self(struct str_node *node) {
if (node->prev != NULL) {
node->prev->next = node->next;
}
if (node->next != NULL) {
node->next->prev = node->prev;
}
}
// front is the first node in the list
// or is NULL if the list is empty
// Returns a pointer to the first node with str
// Returns NULL if str not found on any node
struct str_node *find_str_node(struct str_node *front, char *const str) {
while (true) {
if (front == NULL) {
return NULL;
}
if (strcmp(front->str, str) == 0) {
return front;
}
front = front->next;
}
}
// front is the first node in the list
// or is NULL if the list is empty
// Prints every str in the list
void print_list(struct str_node *front) {
while (true) {
if (front == NULL) {
return;
}
printf("str: %s\n", front->str);
front = front->next;
}
}
int main(int argc, char **argv) {
if (argc != 3) {
printf("USAGE: ./uniquelines path/to/file1 path/to/file2\n");
exit(1);
}
// For keeping track of the unique lines in each file
struct str_node *list1 = NULL;
struct str_node *list2 = NULL;
// Open both files
FILE *const fp1 = easy_fopen(argv[1], "rb");
FILE *const fp2 = easy_fopen(argv[2], "rb");
char buf[BUF_LEN];
int bytes_read;
// Read all fp1 lines
while (true) {
easy_read_line(fp1, buf, BUF_LEN, &bytes_read);
if (bytes_read == 0) {
break;
}
struct str_node *const new_str_node =
easy_malloc(sizeof(struct str_node));
new_str_node->prev = NULL;
new_str_node->next = NULL;
new_str_node->str = easy_malloc(bytes_read + 1);
strcpy(new_str_node->str, buf);
append_str_node(&list1, new_str_node);
}
// Read all fp2 lines
while (true) {
easy_read_line(fp2, buf, BUF_LEN, &bytes_read);
if (bytes_read == 0) {
break;
}
struct str_node *const found_node = find_str_node(list1, buf);
if (found_node == NULL) {
// Unique line found
// Add node to list2
struct str_node *const new_str_node =
easy_malloc(sizeof(struct str_node));
new_str_node->prev = NULL;
new_str_node->next = NULL;
new_str_node->str = easy_malloc(bytes_read + 1);
strcpy(new_str_node->str, buf);
append_str_node(&list2, new_str_node);
}
else {
// Line exists in file1
// Remove the line from the file1 list
// Do NOT add the line to the file2 list
remove_self(found_node);
free(found_node->str);
free(found_node);
}
}
easy_fclose(fp1);
easy_fclose(fp2);
printf("#### LIST 1:\n");
print_list(list1);
printf("#### LIST 2:\n");
print_list(list2);
return 0;
}
/* zlib License
Copyright (c) 2020 Costava
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment