Skip to content

Instantly share code, notes, and snippets.

@benaryorg
Created July 6, 2014 20:50
Show Gist options
  • Save benaryorg/d43080c78dd456d08110 to your computer and use it in GitHub Desktop.
Save benaryorg/d43080c78dd456d08110 to your computer and use it in GitHub Desktop.
C++ program to split a file into single words
/*
* This little program splits a whole file into single words,
* which get saved in a list.
* A word is everything between two non-printable characters.
*
* Made by @benaryorg (2014)
* License: WTFPL
*/
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cctype>
#include <iostream>
#include <fstream>
#include <list>
#define BUF_SIZE 4096 //Should be longer than the longest word, else the word will get split.
using namespace std;
int main(int argc,char **argv)
{
if(argc!=2)
{
cerr<<"Wrong Parameters!"<<endl;
cerr<<"Usage: "<<*argv<<" file"<<endl;
return -1;
}
ifstream file(argv[1],ifstream::in);
list<string> words;
char str[BUF_SIZE]={};
int i,j;
while(file.good())
{
file.read(str+strlen(str),BUF_SIZE-1-strlen(str));
for(i=j=0;i<BUF_SIZE-1;i++)
{
if(!isgraph(str[i]))
{
str[i]=0;
if(strlen(str+j))
{
words.push_back(string(str+j));
}
j=i+1;
}
}
if(!j)
{
words.push_back(string(str+j));
*str=0;
}
memcpy(str,str+j,BUF_SIZE-j);
memset(str+BUF_SIZE-j,0,j);
}
while(!words.empty())
{
cout<<words.front()<<endl;
words.pop_front();
}
file.close();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment