Skip to content

Instantly share code, notes, and snippets.

@rane-hs
Created November 17, 2011 06:31
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rane-hs/1372517 to your computer and use it in GitHub Desktop.
Save rane-hs/1372517 to your computer and use it in GitHub Desktop.
ベタ書きなC++ mime parser(工事中)
#include <string>
#include <sstream>
#include <mbstring.h>
//from : http://7ujm.net/C++/JIStoSJIS.html
//このURLのコードのバグを修正しているもの
//JIS(ISO-2022-JP)からShift-JISに変換
//jis : 変換元 //sjis : 変換先
namespace iso2022jp
{
const std::string decode(const char *src)
{
std::ostringstream oss("");
unsigned int i=0;
bool bConvert=false;
const size_t iLen=std::string(src).length()-1;
while(i<=iLen){
if(src[i]==0x1B){
if((iLen-i)>3){
//2バイト文字の開始
if( (src[i+1]==0x24 && src[i+2]==0x40)
|| (src[i+1]==0x24 && src[i+2]==0x42)
){
i+=3; bConvert=true;
}
if( (src[i+1]==0x28 && src[i+2]==0x42)//ASCII文字の開始
|| (src[i+1]==0x28 && src[i+2]==0x4A)//JISローマ字の開始
){
i+=3; bConvert=false;
}
}
if((iLen-i)>4){
if(src[i+1]==0x24 && src[i+2]==0x28 && src[i+3]==0x44){
//2バイト文字の開始
i=i+4; bConvert=true;
}
}
if((iLen-i)>6){
if(src[i+1]==0x26 && src[i+2]==0x40 && src[i+3]==0x1B && src[i+4]==0x24 && src[i+5]==0x42 ){
//2バイト文字の開始
i=i+6;
bConvert=true;
}
}
}
if(bConvert){
//2バイト文字の変換
union Field
{
struct Bit
{
unsigned char H;
unsigned char L;
} bit;
unsigned long WORD;
} field;
field.bit.H=src[i+1];
field.bit.L=src[i];
field.WORD =_mbcjistojms(field.WORD);
char buff[3]="";
buff[0]=field.bit.L;
buff[1]=field.bit.H;
oss << buff;
i=i+2;
}else{
//2バイト文字以外はそのままコピー
char buff[2]="";
buff[0]=src[i++];
oss << buff;
}
}
return oss.str();
}
};
#include <bcl/bclstr.h>
//encoder, decoders
#include <clx/base64.h>
#include "utf8.hpp"
#include "iso2022jp.hpp"
#include "quoted_printable.hpp"
namespace mime{
typedef std::pair<std::string, std::string > StringPair;
typedef std::vector<StringPair > mimeData_;
//put <key>value</key>
//need string-checker? (double-quote + <> + (null) replacer?
inline const std::string XMLstring(const StringPair &src_){
return "<" + src_.first + ">" + src_.second + "</" + src_.first + ">";
}
class DataType{
public:
enum Type_{
text,
html,
other,
error
};
};
class MailType{
public:
enum Type_{
plain,
multipart,
error
};
};
class Encoding{
public:
enum Type_{
utf8, utf16, utf32,
base64, string_7bit,
quoted_printable,
other, error
};
static const std::string Decode(const Type_ typ, const std::string &src){
switch(typ){
case base64: return clx::base64::decode(src); break;
case utf8: return bcl::utf8::decode(src.begin(), src.end()); break;
case utf16: break;
case utf32: break;
case string_7bit: break;
case quoted_printable: return quoted_printable::decode(src.begin(), src.end()); break;
case other:
return src;
break;
case error:
throw std::exception("can't decode string");
break;
}
return src;
}
};
class Charset{
public:
enum Type_{
utf8, utf7, euc, shift_jis, iso2022_jis, iso8859_latin, ascii, error
};
static const Type_ GetType(const std::string &charsetString){
if(charsetString.length() < 1) return Charset::error;
if(charsetString.find("ISO-2022-JP") != std::string::npos){ return Charset::iso2022_jis; }
if(charsetString.find("iso-2022-jp") != std::string::npos){ return Charset::iso2022_jis; }
if(charsetString.find("shift-jis") != std::string::npos){ return Charset::shift_jis; }
if(charsetString.find("SHIFT-JIS") != std::string::npos){ return Charset::shift_jis; }
if(charsetString.find("ASCII") != std::string::npos){ return Charset::shift_jis; }
if(charsetString.find("ascii") != std::string::npos){ return Charset::shift_jis; }
if(charsetString.find("utf-8") != std::string::npos){ return Charset::utf8; }
if(charsetString.find("UTF-8") != std::string::npos){ return Charset::utf8; }
return Charset::error;
}
static const std::string Decode(Type_ typ, const std::string &src){
switch(typ){
case Charset::ascii:
case Charset::shift_jis: return src; break;
case Charset::iso2022_jis: return iso2022jp::decode(src.c_str()); break;
case Charset::utf8: return bcl::utf8::decodeA(src.begin(), src.end()); break;
default:
case Charset::error: throw std::runtime_error(""); break;
//
case Charset::euc:
case Charset::iso8859_latin:
case Charset::utf7:
return src;
break;
}
return "";
}
};
const std::string GetEncodeValue(const std::string &src_, std::string &charsetToken, Encoding::Type_ &encodingType){
const std::string trimedSrc = bcl::trim(src_, std::string("=?"), std::string ("?="));
const size_t encodeTokenPos = trimedSrc.find("?");
if(encodeTokenPos == std::string::npos)
return "";
charsetToken = trimedSrc.substr(0, encodeTokenPos);
const std::string encodeString = trimedSrc.substr(encodeTokenPos+1, 2);
encodingType = mime::Encoding::error;
if(encodeString == "B?")
encodingType = mime::Encoding::base64;
else if(encodeString == "Q?")
encodingType = mime::Encoding::quoted_printable;
return trimedSrc.substr(encodeTokenPos+3, trimedSrc.length() - (encodeTokenPos+3));
}
//mime dataum
class mimeData : protected mimeData_
{
public:
private:
std::string body_;
MailType::Type_ type_;
DataType::Type_ dataType_;
Encoding::Type_ encodingType_;
std::string boundary_;
public:
using mimeData_::push_back;
using mimeData_::const_iterator;
using mimeData_::cend;
using mimeData_::cbegin;
using mimeData_::end;
using mimeData_::begin;
using mimeData_::iterator;
using mimeData_::clear;
using mimeData_::operator[];
//getter
std::string const GetBody() const { return body_;}
std::string const GetBoundary() const { return "--" + boundary_;}
MailType::Type_ const MailType() const { return type_;}
Encoding::Type_ const EncodingType() const { return encodingType_;}
//parser
std::string GetMimeValue(const std::string &key_) const {
for(std::vector<StringPair>::const_iterator datIt = cbegin();
datIt != cend() ; ++datIt)
{
if(datIt->first == key_){ return datIt->second; }
}
return "";
}
// mail type (multi-part/plain/mixed)
MailType::Type_ const HeaderType()
{
//init
boundary_ = "";
//get
const std::string mimeTypeString = GetMimeValue("Content-Type");
if(mimeTypeString.length() < 1) return MailType::error;
MailType::Type_ dest = MailType::error;
if(mimeTypeString.find("Multipart") != std::string::npos){
dest = MailType::multipart;
}else if(mimeTypeString.find("multipart") != std::string::npos){
dest = MailType::multipart;
}else if(mimeTypeString.find("plain") != std::string::npos){
dest = MailType::plain;
}
if(dest != MailType::multipart){
return dest;
}
//get boundary
const std::string boundaryChecker("boundary=");
const size_t boundaryPos = mimeTypeString.find(boundaryChecker);
if(boundaryPos == std::string::npos)
return MailType::error; //it's multipart but not multi-parted
//hit boundary
boundary_ = bcl::trim(
mimeTypeString.substr(boundaryPos + boundaryChecker.length(),
mimeTypeString.length() - boundaryPos - boundaryChecker.length()
), "\"");
return dest;
}
// data type (text/html/etc...)
DataType::Type_ const GetDataType() const
{
//get
const std::string dataTypeString = GetMimeValue("Content-Type");
if(dataTypeString.length() < 1) return DataType::error;
if(dataTypeString.find("text") != std::string::npos){ return DataType::text; }
else if(dataTypeString.find("html") != std::string::npos){ return DataType::html; }
//not hit
return DataType::other;
}
// encoding type (text/html/etc...)
Encoding::Type_ const GetEncodingType() const
{
//get
const std::string dataTypeString = GetMimeValue("Content-Transfer-Encoding");
if(dataTypeString.length() < 1) return Encoding::error;
if(dataTypeString.find("Base64") != std::string::npos){ return Encoding::base64; }
if(dataTypeString.find("base64") != std::string::npos){ return Encoding::base64; }
if(dataTypeString.find("Quoted-printable") != std::string::npos){ return Encoding::quoted_printable; }
if(dataTypeString.find("quoted-printable") != std::string::npos){ return Encoding::quoted_printable; }
//not hit
return Encoding::other;
}
Charset::Type_ const GetCharset() const
{
//get
const std::string dataTypeString = GetMimeValue("Content-Type");
//get charset
const std::string charsetChecker("charset=");
const size_t offsetPos = dataTypeString.find(charsetChecker);
if(offsetPos == std::string::npos)
return Charset::error; //it's multipart but not multi-parted
//hit boundary
const std::string charsetString = bcl::trim(
dataTypeString.substr(offsetPos + charsetChecker.length(),
dataTypeString.length() - offsetPos - charsetChecker.length()
), "\"");
//not hit
return Charset::GetType(charsetString);
}
void Parse(const std::string &src_){
//init
clear();
std::string headerBuf = "";
size_t headerEndPos = std::string::npos;
size_t bodyStartPos = std::string::npos;
body_ = "";
// \n\n search
if(src_.find("\n\n") != std::string::npos){
headerEndPos = src_.find("\n\n");
bodyStartPos = headerEndPos + std::string("\n\n").length();
}else if(src_.find("\r\r") != std::string::npos) {
headerEndPos = src_.find("\r\r");
bodyStartPos = headerEndPos + std::string("\r\r").length();
}else if(src_.find("\r\n\r\n") != std::string::npos) {
headerEndPos = src_.find("\r\n\r\n");
bodyStartPos = headerEndPos + std::string("\r\n\r\n").length();
}else { return ;}
//header parse
headerBuf = src_.substr(0, headerEndPos);
ParseHeader(headerBuf);
type_ = HeaderType();
dataType_ = GetDataType();
encodingType_ = GetEncodingType();
//parse success
body_ = src_.substr(bodyStartPos, src_.length() - bodyStartPos);
}
void ParseHeader(const std::string &src_)
{
std::string hdBuf = src_;
//replacer
bcl::replace(hdBuf, std::string("\r\n\t"), std::string(" "));
bcl::replace(hdBuf, std::string("\r\t"), std::string(" "));
bcl::replace(hdBuf, std::string("\n\t"), std::string(" "));
bcl::replace(hdBuf, std::string("\r\n "), std::string(""));
bcl::replace(hdBuf, std::string("\r "), std::string(""));
bcl::replace(hdBuf, std::string("\n "), std::string(""));
std::istringstream iss(hdBuf);
size_t separater_pos;
while(!iss.eof()){
std::string lineStr;
std::getline(iss, lineStr);
if((separater_pos = lineStr.find(":")) == std::string::npos)
continue;
std::string key_ = bcl::trim(lineStr.substr(0, separater_pos), " ");
std::string value_ = bcl::trim(lineStr.substr(separater_pos+1 , lineStr.length() - separater_pos - 1), " ");
push_back(StringPair(key_, value_));
}
}
const bool IsAttachment() const {
const std::string contentDispositionValue = GetMimeValue("Content-Disposition");
return (contentDispositionValue.find("attachment")!=std::string::npos)?true:false;
}
const std::string GetAttachmentByteArray() const{
if(!IsAttachment()) throw std::exception("don't attachment!");
switch(EncodingType()){
case Encoding::base64:
return clx::base64::decode(OneLinerBody());
break;
case Encoding::quoted_printable:
case Encoding::error:
case Encoding::other:
case Encoding::utf16:
case Encoding::utf32:
case Encoding::utf8:
break;
}
return body_;
};
const std::string DecodedBody() const {
if(encodingType_ != Encoding::error)
return Charset::Decode(GetCharset(), Encoding::Decode(encodingType_, body_));
//return Encoding::Decode(encodingType_, Charset::Decode(GetCharset(), body_));
else
return Charset::Decode(GetCharset(), body_);
}
const std::string OneLinerBody() const
{
std::istringstream iss(body_);
std::ostringstream oss_dest("");
while(!iss.eof()){
std::string lineStr; std::getline(iss, lineStr); oss_dest << lineStr;
}
return oss_dest.str();
}
const std::string getAttachName() const
{
//エンコード種類を取得
const std::string contentTypeValue = GetMimeValue("Content-Type");
const std::string fileNameChecker("name=");
const size_t boundaryPos = contentTypeValue.find(fileNameChecker);
if(boundaryPos == std::string::npos)
return ""; //it's multipart but not multi-parted
//hit
const std::string encodedNameBuf = bcl::trim(
contentTypeValue.substr(boundaryPos + fileNameChecker.length(),
contentTypeValue.length() - boundaryPos - fileNameChecker.length()
), "\"");
//decode and return
std::string encodeToken;
Encoding::Type_ encodingType;
const std::string encodeName = GetEncodeValue(encodedNameBuf, encodeToken, encodingType);
//デコードを2回して、返す
const std::string decodedBuf = Encoding::Decode(encodingType, encodeName);
return Charset::Decode(Charset::GetType(encodeToken), decodedBuf);
}
};
typedef std::vector<mimeData> mimeDataList;
struct MailItem
{
mimeData header_;
mimeDataList multiPartData_;
void clear(){
header_.clear();
multiPartData_.clear();
}
static void Tokenize(const std::string &s, const std::string &separator, std::vector<std::string > &dest){
//クリア
dest.clear();
if (s.length() < 1){ dest.push_back(s); return ; }
size_t idx_e = s.find(separator);
if (idx_e == std::string::npos){ dest.push_back(s); }
std::string wk = s;
//分割
while (idx_e != std::string::npos) {
dest.push_back(wk.substr(0, idx_e));
wk = wk.substr(idx_e + separator.length(), wk.length() - idx_e - separator.length());
idx_e = wk.find(separator);
} ;
if(wk.length() > 0){
dest.push_back(wk);
}
}
void Parse(const std::string src_){
header_.Parse(src_);
if(header_.MailType() != mime::MailType::multipart)
return;
//read boundary
const std::string boundaryEnd = header_.GetBoundary() + std::string("--\n");
std::vector<std::string > wkAll, wkBody;
Tokenize(header_.GetBody(), boundaryEnd, wkAll);
const std::string checkBoundary = header_.GetBoundary() + std::string("\n");
Tokenize(wkAll.at(0), checkBoundary, wkBody);
for(bcl::split::const_iterator bodyIt = wkBody.cbegin();
bodyIt != wkBody.cend(); ++bodyIt)
{
if((*bodyIt).length() < 1) continue;
mimeData wkData;
wkData.Parse(*bodyIt);
multiPartData_.push_back(wkData);
}
}
};
};
#ifndef BCL_QUOTED_PRINTABLE_HPP
#define BCL_QUOTED_PRINTABLE_HPP
#include <string>
#include <sstream>
namespace quoted_printable{
const char asciiToBinary(const char src1){
if( (src1 >= 'A') && (src1 <= 'F') ) return 10+(src1 - 'A');
if( (src1 >= 'a') && (src1 <= 'f') ) return 10+(src1 - 'a');
if( (src1 >= '0') && (src1 <= '9') ) return (src1 - '0');
return -1;
}
const char asciiToBinary(const char src1, const char src2){
//エンディアン指定いるかな?
return (asciiToBinary(src1) << 4) + asciiToBinary(src2);
}
template <typename Iter_>
const std::string decode(Iter_ begin, Iter_ end)
{
std::ostringstream oss("");
Iter_ nowPos = begin;
while(nowPos != end)
{
if((*nowPos) == '=')
{
//"="を検出
bool bDec = true;
Iter_ nextPos = nowPos; nextPos++;
if(nextPos != end){
if( ((*nextPos) == '\n') || ((*nextPos) == '\r') ) //quoted-printable改行('='+'\n')
{
bDec = false;
//=と改行を飛ばす
++nowPos;
++nowPos;
}
}
if(bDec){
const char srcPos1 = *(++nowPos);
const char srcPos2 = *(++nowPos);
//ascii -> char
char put[2] = {asciiToBinary(srcPos1, srcPos2), 0};
oss << put;
++nowPos;
}
}else{
//その他なんで、無視
oss << *nowPos;
++nowPos;
}
}
return oss.str();
}
const std::string decode(const char *src)
{
const size_t len = std::string(src).length();
return decode<const char *> (src, &src[len-1]);
}
template <typename Iter_>
const bool IsEnc(Iter_ src){
//タブ+改行のパターンは例外としてエンコードしなければならない
//ASCII文字列の判定
if( ((*src) >= 0x20) && ((*src) <= 0x7E) ) //半角スペース~
return false;
return true;
}
template <typename Iter_>
const std::string encode(Iter_ begin, Iter_ end)
{
std::ostringstream oss("");
while(begin != end)
{
//エンコード対象文字か判別
bool bEnc = false;
if(IsEnc(begin)){
//エンコード対象
char buf[3];
}else{
//そのまま
oss << begin;
++begin;
}
}
return oss.str();
}
const std::string encode(const char *src)
{
const size_t len = std::string(src).length();
return decode<const char *> (src, &src[len-1]);
}
};
#endif // BCL_QUOTED_PRINTABLE_HPP
#include <clx/pop3.h>
#include <iostream>
#include <string>
#include <fstream>
#include "mime_parser.hpp"
char Put(const std::string &path_, const std::string &dat_){
std::ofstream ofs(path_);
if(!ofs.is_open()) return -1;
try{
ofs << dat_;
}catch(std::exception &e){
std::cout << e.what() << std::endl;
ofs.close();
return -2;
}
ofs.close();
return 0;
}
char PutBinary(const std::string &path_, const std::string &dat_){
std::ofstream ofs(path_, std::ios::binary);
if(!ofs.is_open()) return -1;
try{
for(size_t i_=0; i_<dat_.length(); i_++){
ofs << dat_[i_];
}
}catch(std::exception &e){
std::cout << e.what() << std::endl;
ofs.close();
return -2;
}
ofs.close();
return 0;
}
int POP3Recieve(const size_t pos_, const char *srv, const char *userName, const char *pass,
std::string &receiveString)
{
try {
clx::pop3 session(srv, 110);
// APOP が使用できるなら APOP 認証を用いる.
session.login(userName, pass, session.apop_support());
std::cout << "status: " << session.status() << std::endl;
// LIST コマンド
clx::pop3::list_type list = session.list();
std::cout << "mail list" << std::endl;
std::cout << "-----" << std::endl;
for (size_t i = 0; i < list.size(); ++i) {
std::cout << list[i] << std::endl;
}
std::cout << "-----" << std::endl;
// TOP コマンド
std::cout << "top [1]" << std::endl;
std::cout << "-----" << std::endl;
std::cout << session.top(1, 0) << std::endl;
std::cout << "-----" << std::endl;
// RETR コマンド
std::cout << "retr [1]" << std::endl;
receiveString = session.retrieve(1);
std::cout << "-----" << std::endl;
std::cout << "length : " << receiveString.length() << std::endl;
std::cout << "-----" << std::endl;
//quit
session.finish();
}
catch (clx::pop3_error& e) {
std::cerr << e.what() << std::endl;
return -2;
}
catch (clx::socket_error& e) {
std::cerr << e.what() << std::endl;
return -3;
}
catch (std::runtime_error& e) {
std::cerr << e.what() << std::endl;
return -4;
}
return 0;
}
int main(int argc, char* argv[]) {
std::string retr;
setlocale(LC_CTYPE, "");
if (argc < 4) {
std::cout << "usage : " << std::endl;
std::cout << "code_testing [server-ip] [user-id] [password]" << std::endl;
std::exit(-1);
}
const char ret = POP3Recieve(1, argv[1], argv[2], argv[3], retr);
if(ret < 0){
std::cout << "connection error! " << ret << std::endl;
std::exit(-1);
}
Put("retr.txt", retr);
mime::MailItem mailItemData;
mailItemData.Parse(retr);
const std::string putDir = ("./");
if(mailItemData.header_.MailType() == mime::MailType::multipart)
{
int idx = 1;
for(mime::mimeDataList::const_iterator mimeIt = mailItemData.multiPartData_.begin();
mimeIt != mailItemData.multiPartData_.cend(); ++ mimeIt)
{
std::cout << "--body[" << idx++ << "]---" << std::endl;
if(mimeIt->EncodingType() == mime::Encoding::error){
}else if(mimeIt->IsAttachment()){
//添付ファイル
std::string putByteArray;
try{
putByteArray = mimeIt->GetAttachmentByteArray();
std::cout << putByteArray.length() << std::endl;
}catch (std::runtime_error& e) {
std::cerr << e.what() << std::endl;
continue;
}
try{
PutBinary(putDir + std::string("\\") + mimeIt->getAttachName() , putByteArray);
}catch (std::runtime_error& e) {
std::cerr << e.what() << std::endl;
continue;
}
}else{
if(mimeIt->GetCharset() != mime::Charset::error)
std::cout << mimeIt->DecodedBody() << std::endl;
}
std::cout << "-----" << std::endl;
}
}else{
std::cout << "--body---" << std::endl;
try{
std::cout << clx::base64::decode(mailItemData.header_.GetBody()) << std::endl;
}catch (std::runtime_error& e) {
std::cerr << e.what() << std::endl;
std::exit(-1);
}
std::cout << "-----" << std::endl;
}
return 0;
}
#include <clx/utf8.h>
#include <bcl/bclstr.h>
namespace bcl { namespace utf8{
template <typename Iter_>
const std::string decode(Iter_ begin, Iter_ end){
std::ostringstream oss("");
while (begin != end) {
std::string dest;
clx::utf8::get(begin, end, std::insert_iterator<std::string>(dest, dest.end()));
oss << dest;
}
return oss.str();
}
const std::string decode(const char *src)
{
const size_t len = std::string(src).length();
return decode<const char *> (src, &src[len-1]);
}
template <typename Iter_>
const std::wstring decodeW(Iter_ begin, Iter_ end){
std::wostringstream woss(L"");
while (begin != end) {
std::string dest;
clx::utf8::get(begin, end, std::insert_iterator<std::string>(dest, dest.end()));
wchar_t putCh = clx::utf8::get_unicode(dest.begin(), dest.end());
woss << putCh;
}
return woss.str();
}
template <typename Iter_>
const std::string decodeA(Iter_ begin, Iter_ end){
return bcl::narrow(decodeW<Iter_>(begin,end));
}
};};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment