Skip to content

Instantly share code, notes, and snippets.

@jsolid
Last active May 23, 2020 03:55
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jsolid/3916184 to your computer and use it in GitHub Desktop.
Save jsolid/3916184 to your computer and use it in GitHub Desktop.
POCO library: Calling from .cpp
/**
* @name EmailInbound (Mail Message Handler for POCO)
* @description Receive and sort emails
*/
#include "MyMailMessage.h"
/**
* POCO::Net::MailMessage
*/
const string MyMailMessage::GetCustomSender()
{
//getSender() will return: John Smith <john.smith@xxx.com>
//This function will grab the email part only
string sender = getSender();
string senderEmail = "";
size_t leftQuotePos;
leftQuotePos = sender.find_last_of("<");
size_t rightQuotePos;
rightQuotePos = sender.find_last_of(">");
size_t emailLength;
emailLength = rightQuotePos - leftQuotePos - 1;
senderEmail = sender.substr(leftQuotePos + 1, emailLength);
return senderEmail;
}
const string MyMailMessage::GetContentTypeProperty()
{
//20120208: Expect the unexpected!! One email appears to have such content type: Content-Type: APPLICATION/PDF; name="8257049.PDF"
//Content Type:application/pdf; name="Invoice.pdf"
//This function to get the value of 'name' property, return Invoice.pdf as result
string ct = getContentType();
string cti = String::ToLower(ct); //for case insensitive find of keyword
string filename = "";
if(cti.find("application") != string::npos && cti.find("name") != string::npos) {
size_t startPos = ct.find("=");
size_t filenameLength = 0;
if(startPos != string::npos) {
if(ct[startPos + 1] == '"') {
filenameLength = ct.size() - (startPos + 2) - 1; //skip [="] (thus +2)
filename = ct.substr(startPos + 2, filenameLength);
} else {
filename = ct.substr(startPos + 1); //skip [=] (thus +1) and get from that index way to the end
}
}
}
return filename;
}
#ifndef MYMAILMESSAGE_H
#define MYMAILMESSAGE_H
#include "Poco/Net/POP3ClientSession.h"
#include "Poco/Net/MailMessage.h"
#include "Poco/Net/PartHandler.h"
#include "Poco/Net/MessageHeader.h"
#include "Poco/Net/NameValueCollection.h"
#include "Poco/Net/QuotedPrintableDecoder.h"
#include "Poco/Net/MultipartReader.h"
#include "Poco/Exception.h"
#include "Poco/StreamCopier.h"
#include "Poco/Base64Decoder.h"
#include "utils/String.h"
#include <iostream>
#include <fstream>
using namespace std;
using Poco::Net::POP3ClientSession;
using Poco::Net::MailMessage;
using Poco::Net::PartHandler;
using Poco::Net::MessageHeader;
using Poco::Net::NameValueCollection;
using Poco::Net::MultipartReader;
using Poco::StreamCopier;
using Poco::Exception;
class MyMailMessage: public MailMessage {
public:
const string GetCustomSender();
const string GetContentTypeProperty();
};
#endif
/**
* @name EmailInbound (Mail Part Handler for POCO)
* @description Receive and sort emails
*/
#include "MyPartHandler.h"
/**
* POCO::Net::PartHandler
* Documentation: http://pocoproject.org/docs/
* Called for every part encountered during the processing of an email message
* For Multipart message, it has the following pattern:
* Content-Transfer-Encoding="quoted-printable"
* Content-Type="text/plain"; charset="us-ascii"
* ....
* Content-Transfer-Encoding="quoted-printable"
* Content-Type="text/html"; charset="us-ascii"
* <html>
* ....
* </html>
*/
void MyPartHandler::handlePart(const MessageHeader& messageHeader, std::istream& stream)
{
stringstream headerSS;
messageHeader.write(headerSS);
_headers.push_back(headerSS.str());
if(messageHeader.has("Content-Disposition")) {
//If there is any file attachment, append the filename and attachment to vectors
string disp;
string filename;
string attachment;
NameValueCollection params;
MessageHeader::splitParameters(messageHeader["Content-Disposition"], disp, params);
filename = params.get("filename", "nil");
if(filename != "nil") {
// Filename might be encoded in Base64 or QuotedPrintable
_filenames.push_back(DecodeString(filename));
StreamCopier::copyToString(stream, attachment);
_attachments.push_back(attachment);
}
}
string contentType = messageHeader.get("Content-Type", "nil");
if((String::ToLower(contentType)).find("multipart") == 0) {
MultipartReader multipartReader(stream);
while(multipartReader.hasNextPart()) {
MessageHeader subMessageHeader;
multipartReader.nextPart(subMessageHeader);
string subContentType = subMessageHeader.get("Content-Type", "nil");
// Convert to lower case for comparison only
string lc_subctype = String::ToLower(subContentType);
//Priority is text/plain format, else save text/html format
if(lc_subctype == "nil") {
continue;
} else if(lc_subctype.find("application") != string::npos && lc_subctype.find("name") != string::npos) {
// Save attachment(s) in sub-content part
string disp;
string filename;
string attachment;
NameValueCollection params;
MessageHeader::splitParameters(lc_subctype, disp, params);
filename = params.get("name", "nil");
if(filename != "nil") {
// Filename and Attachments might be encoded in Base64 or QuotedPrintable
_filenames.push_back(DecodeString(filename));
string encoder = String::ToLower(subMessageHeader.get("Content-Transfer-Encoding", "nil"));
if(encoder == "base64") {
Poco::Base64Decoder base64Decoder(multipartReader.stream());
StreamCopier::copyToString(base64Decoder, attachment);
} else if(encoder == "quoted-printable") {
Poco::Net::QuotedPrintableDecoder qpDecoder(multipartReader.stream());
StreamCopier::copyToString(qpDecoder, attachment);
} else {
StreamCopier::copyToString(multipartReader.stream(), attachment);
}
if (!attachment.empty()) {
_attachments.push_back(attachment);
}
}
} else if(lc_subctype.find("boundary") != string::npos) {
int bStart = 0;
if(_myboundary.empty()) {
bStart = subContentType.find('_');
_myboundary = String::FixField(subContentType, bStart, (subContentType.length() - (bStart + 1)));
}
} else if(lc_subctype.find("text/plain") == 0) {
string charset;
if(subContentType.find("charset") != string::npos) {
//Outlook: Content-Type text/plain charset="us-ascii"
//Yahoo: Content-Type text/plain charset=iso-8859-1
string subct_clean = String::RemoveChar(subContentType, '"');
int charpos = subct_clean.find("charset=") + 8; //+8 to bypass the word "charset="
charset = String::FixField(subct_clean, charpos, (subContentType.length() - charpos) );
}
//If body variable is not empty, it has the text/plain format of the email body.
string cte = subMessageHeader.get("Content-Transfer-Encoding", "nil");
//For some reasons, emails from outlook (content transfer encoding is specified as quoted-printable in header), it generates nil result in QuotedPrintableDecoder
if(charset.compare("us-ascii") != 0) {
if(cte == "base64") {
Poco::Base64Decoder base64Decoder(multipartReader.stream());
StreamCopier::copyToString(base64Decoder, _body);
} else if(cte == "quoted-printable") {
Poco::Net::QuotedPrintableDecoder qpDecoder(multipartReader.stream());
StreamCopier::copyToString(qpDecoder, _body);
} else {
StreamCopier::copyToString(multipartReader.stream(), _body);
}
} else {
StreamCopier::copyToString(multipartReader.stream(), _body);
}
if(!_myboundary.empty() && _myboundary.compare(multipartReader.boundary()) != 0) {
_body = String::Trim(String::FixField(_body, 0, (_body.find(_myboundary) - 2))); //-2 for the boundary heading, e.g. --_000_OD67Eexchau_
}
} else {
if(_body.empty() || _body.length() > 0) break;
// Will hit error "Malformed message: Field value too long/no CRLF found" under MesssageHeader.read() in MessageHeader.cpp
// if "continue" is used. "text/plain" part will always come before "text/html" part
//Keep this code for reference of retrieving text/html content, ignore text/html part at this moment
/*
else if(subContentType.find("text/html") == 0) {
string cte = subMessageHeader.get("Content-Transfer-Encoding", "nil");
if(cte == "base64") {
Poco::Base64Decoder base64Decoder(multipartReader.stream());
StreamCopier::copyToString(base64Decoder, _body);
} else if(cte == "quoted-printable") {
Poco::Net::QuotedPrintableDecoder qpDecoder(multipartReader.stream());
StreamCopier::copyToString(qpDecoder, _body);
} else
StreamCopier::copyToString(stream, _body);
*/
}
}
} else if((String::ToLower(contentType)).find("application") != string::npos && (String::ToLower(contentType)).find("name") != string::npos) {
// Some oddball emails doesn't have a Content-Disposition clause even though they've attachments.
// Decoding is not necessary at top level as POCO will do it automatically. weird...need more testing
string disp;
string filename;
string attachment;
NameValueCollection params;
MessageHeader::splitParameters(String::ToLower(contentType), disp, params);
filename = params.get("name", "nil");
if(filename != "nil") {
_filenames.push_back(DecodeString(filename));
/*
string encoder = String::ToLower(messageHeader.get("Content-Transfer-Encoding", "nil"));
if(encoder == "base64") {
Poco::Base64Decoder base64Decoder(stream);
StreamCopier::copyToString(base64Decoder, attachment);
} else if(encoder == "quoted-printable") {
Poco::Net::QuotedPrintableDecoder qpDecoder(stream);
StreamCopier::copyToString(qpDecoder, attachment);
} else
*/
StreamCopier::copyToString(stream, attachment);
if(!attachment.empty()) _attachments.push_back(attachment);
}
} else {
//Email body content
//Change request 20101007: Ignore text/html part
if(contentType.find("text/html") == string::npos && (_body.empty() || _body.length() > 0))
StreamCopier::copyToString(stream, _body);
}
}
const vector<string>& MyPartHandler::GetHeaders() {
return _headers;
}
const string& MyPartHandler::GetBody() {
return _body;
}
const vector<string>& MyPartHandler::GetFilenames() {
return _filenames;
}
const vector<string>& MyPartHandler::GetAttachments() {
return _attachments;
}
/**
* This function can decode mixed languages within one string (email subject, file attachment)
* For example (mixed of english and ukrainian):
* charset="windows-1251"
* =?windows-1251?Q?outlook:_testing_with_english_text....and_ukrainian_=EA?= =?windows-1251?B?7u3q8/DxIOTw4Oru7bPi8fzq6PUg9+7i7bPi?=
**/
string MyPartHandler::DecodeString(string phrase) {
//If the phrase is encoded in base64 or quoted printable text, it shows
//=?gb2312?B?ztLKc3re4==?=
//Which is enclosed in =??= quotes, B stands for 'base64' encoded, Q stands for 'quoted-printable' encoded.
if(String::Trim(phrase).length() == 0) {
return phrase;
}
if(String::IsBeginWith(phrase, "=?") && String::IsEndWith(phrase, "?=")) {
string utf8Phrase = "";
size_t leftQuotePos = phrase.find("=?");
size_t rightQuotePos = phrase.find("?=");
while(leftQuotePos != string::npos && rightQuotePos != string::npos) {
//+2 is for the ending ?=
string quotedString = phrase.substr(leftQuotePos, rightQuotePos - leftQuotePos + 2);
string decodedPhrase = "";
string textEncoding = String::TakeField(quotedString, 2, "?");
string encodedType = String::TakeField(quotedString, 3, "?");
string encodedString = String::TakeField(quotedString, 4, "?");
if(encodedType == "B") {
istringstream iss(encodedString);
Poco::Base64Decoder base64Decoder(iss);
StreamCopier::copyToString(base64Decoder, decodedPhrase);
} else if(encodedType == "Q") {
istringstream iss(encodedString);
Poco::Net::QuotedPrintableDecoder qpDecoder(iss);
StreamCopier::copyToString(qpDecoder, decodedPhrase);
//Quoted printable treated space as underscore, revert it
replace(decodedPhrase.begin(), decodedPhrase.end(), '_', ' ');
} else {
decodedPhrase = quotedString; //safety measure
}
if (String::ToLower(textEncoding) != "utf-8") {
string errorMessage = "";
string convertedPhrase = "";
//Microsoft Outlook 2007 cannot differentiate between simplified and traditional chinese for email subject.
//It will only list the content type as GB2312, thus we need to do a conversion.
if (String::ToLower(textEncoding) == "gb2312") {
String::ConvertTextEncoding("GBK", "UTF-8", decodedPhrase, convertedPhrase, errorMessage);
} else {
String::ConvertTextEncoding(textEncoding, "UTF-8", decodedPhrase, convertedPhrase, errorMessage);
}
if (errorMessage.length() > 0) {
return "";
} else {
utf8Phrase += convertedPhrase;
}
} else {
utf8Phrase += decodedPhrase;
}
leftQuotePos = phrase.find("=?", leftQuotePos + 1);
rightQuotePos = phrase.find("?=", rightQuotePos + 1);
}
return utf8Phrase;
} else {
return phrase;
}
}
void MyPartHandler::CleanUp() {
_body = "";
_myboundary = "";
_headers.clear();
_filenames.clear();
_attachments.clear();
}
#ifndef MYPARTHANDLER_H
#define MYPARTHANDLER_H
#include "Poco/Net/POP3ClientSession.h"
#include "Poco/Net/MailMessage.h"
#include "Poco/Net/PartHandler.h"
#include "Poco/Net/MessageHeader.h"
#include "Poco/Net/NameValueCollection.h"
#include "Poco/Net/QuotedPrintableDecoder.h"
#include "Poco/Net/MultipartReader.h"
#include "Poco/Exception.h"
#include "Poco/StreamCopier.h"
#include "Poco/Base64Decoder.h"
#include "utils/Directory.h"
#include "utils/String.h"
#include <iostream>
#include <fstream>
using namespace std;
using Poco::Net::POP3ClientSession;
using Poco::Net::MailMessage;
using Poco::Net::PartHandler;
using Poco::Net::MessageHeader;
using Poco::Net::NameValueCollection;
using Poco::Net::MultipartReader;
using Poco::StreamCopier;
using Poco::Exception;
class MyPartHandler: public PartHandler
{
public:
void handlePart(const MessageHeader& header, std::istream& stream);
const string& GetBody();
const vector<string>& GetHeaders();
const vector<string>& GetFilenames();
const vector<string>& GetAttachments();
string DecodeString(string phrase);
void CleanUp();
private:
string _body;
string _myboundary;
vector<string> _headers;
vector<string> _filenames;
vector<string> _attachments;
};
#endif
/**
* @name EmailInbound
* @description Receive and Sort emails
*/
#include "Service.h"
/**
* Constructor
*/
Service::Service()
{
}
/**
* Destructor
*/
Service::~Service()
{
}
/**
* Initialises this, the service implemenation class
*/
void Service::Init()
{
}
/**
* Determines which mode the service is running in and then executes the appropriate 'Run' method
* (called automatically by the ServiceBase class)
*/
void Service::Run()
{
EmailInboundConfig *emailInboundConfig = GetEmailInboundConfig(localDatabase, serviceConfigID);
if (_IsError) {
if (_Verbose) {
VerboseLog(">>> EMAIL CLIENT CONFIG ERROR");
}
HandleSystemError();
delete emailInboundConfig;
continue;
}
//Connect to account and see if there is any new email
try {
POP3ClientSession ppcs(MAIL_SERVER);
MyPartHandler partHandler;
MyMailMessage mailMessage;
POP3ClientSession::MessageInfoVec messageInfoVec;
ppcs.login(emailInboundConfig->GetUsername(), emailInboundConfig->GetPassword());
ppcs.listMessages(messageInfoVec);
for (unsigned int i = 1; i <= messageInfoVec.size(); i++) {
ppcs.retrieveMessage(i, mailMessage, partHandler);
//Gather details of the email received and log them
stringstream headersSS;
mailMessage.write(headersSS);
vector<string> partHeaders = partHandler.GetHeaders();
for (unsigned int j=0; j < partHeaders.size(); j++) {
headersSS << partHeaders[j];
}
if (mailMessage.isMultipart()) {
InsertEmailInboundLog(localDatabase, serviceConfigID, headersSS.str(), partHandler.GetBody());
} else {
// Save body content only if [name] property doesn't exist in ContentType
string ct_filename = mailMessage.GetContentTypeProperty();
if (ct_filename.size() == 0) {
InsertEmailInboundLog(localDatabase, serviceConfigID, headersSS.str(), mailMessage.getContent());
}
}
// Process the email
ProcessEmail(emailInboundConfig, mailMessage, partHandler);
if (_IsError) {
HandleSystemError();
}
mailMessage.clear();
ppcs.deleteMessage(i);
}
ppcs.close();
} catch(Exception& e) {
RaiseSystemError("Exception raised by POCO while processing. " + e.displayText());
HandleSystemError();
delete emailInboundConfig;
continue;
}
delete emailInboundConfig;
}
/**
* Performs any additional steps required to stop
* (called automatically ServiceBase class Stop method)
*/
void Service::Stop()
{
}
/**
* Processes the email, as required
*
* @param emailInboundConfig EmailInbound Config
* @param mailMessage Email to process
*/
void Service::ProcessEmail(EmailInboundConfig* emailInboundConfig, MyMailMessage& mailMessage, MyPartHandler& partHandler)
{
string creatorFilePath;
string outputBodyPath = _PhoenixSettings->GetBaseTempPath() + "body.txt";
string bodyFilename = File::ExtractFilename(outputBodyPath);
if (_Verbose) {
VerboseLog("Body file path= " + outputBodyPath + "; bodyFilename = " + bodyFilename);
}
//Step 1: Check whether to save body content
if (emailInboundConfig->GetSaveBody() == "Yes") {
File* contentFile = new File(outputBodyPath, File::MODE_WRITE);
string content;
try {
if(mailMessage.isMultipart()) {
content.append(partHandler.GetBody());
} else {
// Skip saving body content if ContentType header has [name] property
string ct_filename = mailMessage.GetContentTypeProperty();
if (ct_filename.size() == 0) {
content = mailMessage.getContent();
}
}
} catch(Exception& e) {
RaiseSystemError("POCO error:" + e.displayText());
return;
}
if(content.length() > 0) {
contentFile->Write(content);
contentFile->Close();
delete contentFile;
stringstream tempFilePath;
tempFilePath << _PhoenixSettings->GetBaseGlobalPath() << "creator/in/" << DateTime::NowString("%Y%m%d%H%M%S","GMT") << "_" << _ServiceInstance->GetID();
File *creatorFile = new File(tempFilePath.str(), File::MODE_CREATEUNIQUE);
if (creatorFile->IsError()) {
RaiseSystemError("Unable to create unique file for creator - " + creatorFile->GetErrorMessage());
delete creatorFile;
return;
}
creatorFilePath = creatorFile->GetFilename();
delete creatorFile;
if (!File::Copy(outputBodyPath,creatorFilePath)) {
RaiseSystemError("Unable to copy file attachment \"" + outputBodyPath + "\" to creator in \"" + creatorFilePath + "\"");
return;
}
// Do some file processing here
// ...
if(!File::Delete(outputBodyPath)) {
RaiseSystemError("Unable to remove body file after put into creator - " + outputBodyPath);
}
}
}
//Step 2: Check whether to save file attachments
if (emailInboundConfig->GetSaveAttachments() == "Yes") {
vector<string> filenames;
vector<string> attachments;
// Check if message is multipart
if (mailMessage.isMultipart()) {
//Retrieve filenames & attachments
filenames = partHandler.GetFilenames();
attachments = partHandler.GetAttachments();
} else {
// For some oddball reason, a mail, which has attachment as the body might have no MIME boundary defined
// Example Message Header with no boundary and body content defined (blank) but attachment is located in body content:
// Content-Description: PDF Conversion
// Content-Disposition: attachment; filename="Invoice.pdf"
// Content-Transfer-Encoding: base64
// Content-Type: application/pdf; name="Invoice.pdf" <-- Poco library can only retrieve this field without going through MessageHeader
// Step 1: Check if "Content-Type" clause exists in header. if so, extract filename.
// Check list of MIME types
string ct_filename = mailMessage.GetContentTypeProperty();
if(ct_filename.size() > 0) {
filenames.push_back(ct_filename);
// Step 2: Retrieve the body content (attachment)
attachments.push_back(mailMessage.getContent());
}
}
}
//Step 3: Clean up
partHandler.CleanUp();
}
#ifndef SERVICE_IMPL_H
#define SERVICE_IMPL_H
#include "Poco/Net/POP3ClientSession.h"
#include "Poco/Net/MailMessage.h"
#include "Poco/Net/PartHandler.h"
#include "Poco/Net/MessageHeader.h"
#include "Poco/Net/NameValueCollection.h"
#include "Poco/Net/QuotedPrintableDecoder.h"
#include "Poco/Net/MultipartReader.h"
#include "Poco/Exception.h"
#include "Poco/StreamCopier.h"
#include "Poco/Base64Decoder.h"
#include "utils/String.h"
#include "include/MyMailMessage.h"
#include "include/MyPartHandler.h"
#include <iostream>
#include <fstream>
using namespace std;
using Poco::Net::POP3ClientSession;
using Poco::Net::MailMessage;
using Poco::Net::PartHandler;
using Poco::Net::MessageHeader;
using Poco::Net::NameValueCollection;
using Poco::Net::MultipartReader;
using Poco::StreamCopier;
using Poco::Exception;
#define MAIL_SERVER "mail.yourcompanyserver.com"
/**
* Service Implementation
*/
class Service
{
public:
Service();
virtual ~Service();
void Init();
void Run();
void Stop();
private:
void ProcessEmail(EmailInboundConfig* emailInboundConfig, MyMailMessage& mailMessage, MyPartHandler& handler);
};
#endif
#include "String.h"
using namespace custom::utils;
/**
* Search for the field at the specified field number and returns its data.
*
* @param source The source string to take the field data from
* @param fieldNumber The number (from 1-n) of the field to take data from
* @param delimiter The character which delimits the fields in the source
* @return The data found in the field
*/
string String::TakeField(const string &source, int fieldNumber, char delimiter) {
int length = 0;
int start = 0;
int fieldNum = 1;
for(unsigned int i = 0; i < source.length(); i++, length++) {
if(source.at(i) == delimiter || (i == (source.length() - 1)) ) { //Find delimiter
if(i == (source.length() - 1) && source.at(i) != delimiter) {
length++;
}
if(fieldNum == fieldNumber) { //Found field
return source.substr(start, length);
}
fieldNum++;
start = i + 1;
length = -1;
}
}
return "";
}
/**
* Search for the field at the specified field number and returns its data.
*
* @param source The source string to take the field data from
* @param fieldNumber The number (from 1-n) of the field you wish to take data from
* @param delimiter The string (1-n characters) which delimits the fields in the source
* @return The data found in the field
*/
string String::TakeField(const string &source, int fieldNumber, string delimiter) {
if(delimiter.length() == 1) //if the delimiter is a char use other TakeField - more efficient
return TakeField(source, fieldNumber, delimiter[0]);
int length = 0;
int start = 0;
int fieldNum = 1;
unsigned int i = 0;
string src_partial = "";
while (i < source.length()) {
src_partial = source.substr(i, delimiter.length());
if(strcmp(src_partial.c_str(), delimiter.c_str()) == 0) { //Find delimiter
if(fieldNum == fieldNumber) { //Found field
if(length == 0)
return "";
else
return source.substr(start, length);
}
fieldNum++;
start = i + delimiter.length();
length = 0;
i = i + delimiter.length();
} else {
i++;
length++;
}
}
if(start != 0 && fieldNumber == fieldNum)
return source.substr(start);
else
return ""; //Couldn't find field
}
/**
* A string substr function which is compatible with utf8 character encoding.
*
* @param source The source string to take the sub-string from
* @param start The start position (0-n) to take the sub-string from
* @param length The number of character to take from the start position
* @return Sub-string or empty string if the range is outside the size of the string
*/
string String::FixField(const string &source, int start, int length) {
string result = "";
if (utf8::unchecked::distance(source.begin(), source.end()) < start) {
return result;
}
int pos = 0;
int len = 0;
for (string::const_iterator it = source.begin(); it != source.end(); pos++) {
if (pos >= start) {
string r;
utf8::unchecked::append(utf8::unchecked::next(it), back_inserter(r));
result.append(r);
len++;
if (len == length) {
break;
}
} else {
utf8::unchecked::next(it);
}
}
return result;
}
/**
* Trims all white space from the start and end of a string.
*
* @param source The input string to be trimmed.
* @return Output string containing the trimmed string.
*/
string String::Trim (const string &source) {
if (source.length() == 0) {
return "";
}
string res = "";
int start_idx, end_idx;
start_idx = 0;
end_idx = 0;
unsigned int i;
// Search for starting idx
for (i = 0; i < source.length(); i++) {
if(source[i] != ' ' && source[i] != '\t' && source[i] != '\r' && source[i] != '\n') {
start_idx = i;
break;
}
}
// Search for ending idx
for(i = source.length()-1; i > 0; i--) {
if(source[i] != ' ' && source[i] != '\t' && source[i] != '\r' && source[i] != '\n') {
end_idx = i;
break;
}
}
if (start_idx <= end_idx && source[start_idx] != ' ') {
res = source.substr(start_idx, (end_idx-start_idx+1));
}
return res;
}
/**
* Uses the iconv library to perform conversion between text encodings.
*
* Example usage:
* string input = "text to convert, possibly with encoding specific characters";
* string newLine = "";
* string errMsg = "";
*
* if (false == convertTextEncoding("ISO-8859-1","UTF-8",line,newLine,errMsg)) {
* cout << "ERROR: " << errMsg << endl;
* } else {
* cout << newLine << endl;
* }
*
* @param fromEncoding The name of the character encoding you want to convert from
* @param toEncoding The name of the character encoding you want to convert to
* @param inputStr The string to convert
* @param outputStr The string which will be populated with the converted output
* @param errMsg The variable which will be populated with the error message data if the conversion fails
* @return True if the convresion was successful, otherwise false
*/
bool String::ConvertTextEncoding(string fromEncoding, string toEncoding, const string &inputStr, string &outputStr, string &errMsg) {
outputStr = "";
//setup the conversion descriptor
errno = 0;
iconv_t icDescriptor = iconv_open(toEncoding.c_str(),fromEncoding.c_str());
if ((iconv_t)(-1) < icDescriptor) {
errMsg = "iconv_open failed with ";
if (errno == EMFILE)
errMsg.append("EMFILE: max file descriptors open in calling process");
else if (errno == ENFILE)
errMsg.append("ENFILE: too many files are currently open in the system");
else if (errno == ENOMEM)
errMsg.append("ENOMEM: insufficent memory available");
else if (errno == EINVAL)
errMsg.append("EINVAL: encoding specified for conversion is not supported");
else
errMsg.append("UNKNOWN ERROR: most likely invalid text encoding specified");
return false;
}
char in[TEXTENC_BUFFER];
char out[TEXTENC_BUFFER*2]; //allows all input characters to be converted to 2-byte characters
memset(in,'\0',TEXTENC_BUFFER);
memset(out,'\0',TEXTENC_BUFFER*2);
strcpy(in, inputStr.c_str());
char* inptr = in;
char* outptr = out;
size_t inSize = strlen(inptr);
size_t outSize = sizeof(out);
errno = 0;
if ((size_t)(-1) == iconv(icDescriptor,&inptr,&inSize,&outptr,&outSize)) {
errMsg = "iconv failed with ";
if (errno == E2BIG)
errMsg.append("E2BIG: insufficent space in output buffer");
else if (errno == EILSEQ)
errMsg.append("EILSEQ: input byte does not belong to specified encoding");
else if (errno == EINVAL)
errMsg.append("EINVAL: incomplete character at end of input buffer");
else
errMsg.append("UNKNOWN ERROR: most likely invalid text encoding specified");
return false;
}
outputStr.append(out);
iconv_close(icDescriptor);
errMsg = "";
return true;
}
/**
* Split a string depending on the delimiter
* ref: http://stackoverflow.com/questions/236129/how-to-split-a-string-in-c
*
* @param s The source of string to be split
* @param delim The string delimiter
* @param elems A pre-defined string vector, split result is pushed to this vector
* @return void
**/
void String::Split(const string &s, char delim, vector<string> &elems) {
stringstream ss(s);
string item;
while (getline(ss, item, delim)) {
item = String::Trim(item);
if(item.size() > 0) {
elems.push_back(String::Trim(item));
}
}
}
/**
* Checks if a string begins with another string.
*
* @param source The input string to be checked.
* @param prefix The string to look for at the start of the source string.
* @return True if source starts with prefix, false otherwise.
*/
bool String::IsBeginWith(const string &source, string prefix) {
if (source.length() < prefix.length())
return false;
if (source.substr(0, prefix.length()) == prefix) {
return true;
} else {
return false;
}
}
/**
* Checks if a string ends with another string.
*
* @param source The input string to be checked.
* @param suffix The string to look for at the end of the source string.
* @return True if source ends with suffix, false otherwise.
*/
bool String::IsEndWith(const string &source, string suffix) {
if (source.length() < suffix.length())
return false;
if (source.substr((source.length() - suffix.length())) == suffix) {
return true;
} else {
return false;
}
}
/**
* Checks if a string contains only numeric characters.
* Allows for certin characters to be ignored by listing them in the second paramter.
*
* @param number String to check
* @param ignoreChars String containing one or more characters to ignore when performing check
*/
bool String::IsNumeric(const string &number, string ignoreChars) {
unsigned int numericCharCount = 0;
unsigned int len = number.length();
if(len == 0)
return false;
for (unsigned int i = 0; i < len; i++) {
if (number[i] < '0' || number[i] > '9') {
bool is_ignore = false;
for (unsigned int j = 0; j < ignoreChars.length() && !is_ignore; j++) {
if (ignoreChars[j] == number[i]) {
is_ignore = true;
}
}
if(!is_ignore) return false;
} else {
numericCharCount++;
}
}
if(numericCharCount == 0)
return false;
return true;
}
/**
* Removes all occurrences of the specified character from string.
*
* @param source The source string to remove the character from
* @param remove The character to remove from the source string
* @return The string after the remove has been performed
*/
string String::RemoveChar(const string &source, char remove) {
string res = "";
if (remove == 0) {
res = source;
return res;
}
int len = source.length();
for (int i=0; i<len; i++) {
if (source[i] != remove)
res.push_back(source[i]);
}
return res;
}
/**
* Removes all occurrences of the specified characters from string.
*
* @param source The source string to remove the character from
* @param remove The characters to remove from the source string
* @return The string after the remove has been performed
*/
string String::RemoveChars(const string &source, const string &remove) {
string res = "";
if (remove.length() == 0) {
res = source;
return res;
}
int slen = source.length();
for (int i = 0; i < slen; i++) {
bool isMatch = false;
int rlen = remove.length();
for (int j = 0; j < rlen; j++) {
if (source[i] == remove[j]) {
isMatch = true;
break;
}
}
if (!isMatch) {
res.push_back(source[i]);
}
}
return res;
}
#ifndef String_H
#define String_H
#include <string>
#include <sstream>
#include <errno.h>
#include <iconv.h>
#include <vector>
using namespace std;
namespace custom
{
namespace utils
{
#define TEXTENC_BUFFER 10000 //buffer size for iconv text encoding conversion - currently limited to 10KB which should be plenty for individual string or line processing
/**
* A collection of useful static string utility methods
*/
class String
{
public:
static string TakeField(const string &source, int fieldNumber, char delimiter);
static string TakeField(const string &source, int fieldNumber, string delimiter);
static string FixField(const string &source, int start, int length);
static string Trim (const string &source);
static bool ConvertTextEncoding(string fromEncoding, string toEncoding, const string &inputStr, string &outputStr, string &errMsg);
static void Split(const string &s, char delim, vector<string> &elems);
static bool IsBeginWith(const string &source, string prefix);
static bool IsEndWith(const string &source, string suffix);
static bool IsNumeric(const string &number, string ignoreChars = "");
static string RemoveChar(const string &source, char remove);
static string RemoveChars(const string &source, const string &remove);
//template function which takes any basic type and converts it to a string using the stringstream which has already overloaded the << operator for basic types
template <class T>
static string ToString(T a){stringstream s; s << a; return s.str();}
private:
};
}
}
#endif
@the-gorth
Copy link

So, where's String.h???

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment