Skip to content

Instantly share code, notes, and snippets.

@rgladwell
Created May 23, 2014 09:01
Show Gist options
  • Save rgladwell/593b2a8e6bca6cb10107 to your computer and use it in GitHub Desktop.
Save rgladwell/593b2a8e6bca6cb10107 to your computer and use it in GitHub Desktop.
html2perl
/////////////////////////////////////////////////////////////////////
//
// $Id: main.cpp,v 1.9 2000/11/10 21:01:09 ricardo Exp $
//
// Filename: : main.cpp
// Date: : Sat Oct 21 16:52:01 BST 2000
// Copyright: : (C) 2000 by Ricardo Gladwell
// Email : ricgladwell@netscapeonline.co.uk
//
/////////////////////////////////////////////////////////////////////
// //
// This program is free software; you can redistribute it and/or //
// modify it under the terms of the GNU General Public License //
// as published by the Free Software Foundation; either version //
// 2 of the License, or (at your option) any later version. //
// //
/////////////////////////////////////////////////////////////////////
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
/////////////////////////////////////////////////////////////////////
// D E F I N E S T A T E M E N T S //////////////////////////////
/////////////////////////////////////////////////////////////////////
#define ESCAPE_CHARACTERS "\t\"\'\\@%$#.:&!-"
/////////////////////////////////////////////////////////////////////
// I N C L U D E S T A T E M E N T S ////////////////////////////
/////////////////////////////////////////////////////////////////////
#include <unistd.h>
#include <iostream.h>
#include <fstream.h>
#include <unistd.h>
#include <string>
/////////////////////////////////////////////////////////////////////
// M A I N F U N C T I O N //////////////////////////////////////
/////////////////////////////////////////////////////////////////////
int main( int argc, char *argv[] )
{
/////////////////////////////////////////////////////////////////////
// I N I T I A L I S A T I O N ////////////////////////////////////
/////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////
// Variable Declarations
string *file = NULL; // input file
string buffer; // buffer
string escapees( ESCAPE_CHARACTERS ); // non-printable chars
unsigned int index = 0; // generic index
int lines_read = 0; // lines read
int lines_written = 0; // lines written
bool cgi = false; // cgi mode
istream *in = &cin; // input stream
char c; // command line argument
/////////////////////////////////////////////////////////////////////
// Get and parse command line options.
while( ( c = getopt( argc, argv, "i:e:c" ) ) != -1 )
{
switch( c )
{
case 'i':
// include additional escape characters
{
string argument( optarg );
for( index = 0; index != argument.length(); index++ )
{
char c = argument.at( index );
if( escapees.find( c ) == string::npos ) escapees += c;
}
break;
}
case 'e':
// exclude specified escaped characters
{
string argument( optarg );
string temp = "";
for( index = 0; index != escapees.length(); index++ )
{
char c = escapees.at( index );
if( argument.find( c ) == string::npos ) temp += c;
}
escapees = temp;
break;
}
case 'c':
// switch cgi mode
cgi = true;
break;
default:
cerr << "usage: " << argv[0] << " -[iec] [file]" << endl;
exit( 1 );
break;
}
}
if( optind < argc ) {
file = new string( argv[optind] );
}
/////////////////////////////////////////////////////////////////////
// If a file name has been specified set-up input file stream.
ifstream *fin = NULL;
if ( file != NULL )
{
fin = new ifstream( file->c_str() );
in = fin;
}
if ( !(*in) )
{
cerr << argv[0] << ": "
<< (*file)
<< ": No such file or directory"
<< endl;
return false;
}
/////////////////////////////////////////////////////////////////////
// B E G I N M A I N C O N T R O L L O O P //////////////////
/////////////////////////////////////////////////////////////////////
if( cgi ) cout << "print \"Content-type: text/html\\n\\n";
while ( !in->eof() )
{
/////////////////////////////////////////////////////////////////////
// Read in a line into the string buffer character by characters
// until you hit a newline ('\n') or carriage return ('\r')
// character.
// empty buffer
buffer = "";
// read line or until EOF
for ( c = in->get(); c != '\n' && c != '\r'; c = in->get() )
{
if ( c == -1 ) break;
buffer += c;
}
// increment line counter
// initialise loop variables
lines_read++;
bool empty = true;
int first_tab = -1;
/////////////////////////////////////////////////////////////////////
// Eat leading whitespace.
for( index = 0; index != buffer.length(); index++ )
{
if ( buffer[index] == '\t' && first_tab == -1 )
{
first_tab = index;
}
if ( !isspace( buffer[index] ) )
{
if ( first_tab != -1 ) index = first_tab;
buffer.erase( 0, index );
empty = false;
if( lines_written != 0 || cgi ) cout << "\";" << endl;
break;
}
}
/////////////////////////////////////////////////////////////////////
// If line is empty simply insert an extra newline in the
// terminating newline string.
if ( empty )
{
cout << "\\n";
continue;
}
/////////////////////////////////////////////////////////////////////
// Find all bad characters in the escape character list and add the
// '\' escape character to the front. When you encounter a tab
// character replace with the string '\\t'.
for ( index = buffer.find_first_of( escapees, 0 );
index != string::npos;
index = buffer.find_first_of( escapees, index + 2 ) )
{
if ( buffer[index] == '\t' ) buffer.replace( index, 1, "\\t" );
else buffer.insert( index, "\\" );
}
cout << "print \"" << buffer << "\", \"\\n";
lines_written++;
}
cout << "\";" << endl;
/////////////////////////////////////////////////////////////////////
// T E R M I N A T I O N //////////////////////////////////////////
/////////////////////////////////////////////////////////////////////
if( fin != NULL ) fin->close();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment