Skip to content

Instantly share code, notes, and snippets.

@CaptainJH
Last active August 29, 2015 13:59
Show Gist options
  • Save CaptainJH/10847192 to your computer and use it in GitHub Desktop.
Save CaptainJH/10847192 to your computer and use it in GitHub Desktop.
Read all the files in certain folder and parse the files
bool getline(FILE* f, std::string& line)
{
line.clear();
char c = 0;
do {
c = fgetc(f);
if (c == EOF)
return !line.empty();
else if (c != '\n' && c != '\r')
line.push_back(c);
} while ((c != '\n' && c != '\r') || line.empty());
return true;
}
/// how to use
//auto ifile = fopen(dir.c_str(), "rb");
//while (getline(ifile, line))
//{
//}
#include <iostream>
#include <fstream>
#include <algorithm>
#include <string>
#include <set>
#include <regex>
#include <boost\filesystem.hpp>
#include "pystring.h"
void CollectFiles(const std::string& path, std::vector<std::string>& v)
{
boost::filesystem::path dir(path);
boost::filesystem::directory_iterator it_end;
for (boost::filesystem::directory_iterator it_dir(dir); it_dir != it_end; ++it_dir)
{
if (boost::filesystem::is_directory(it_dir->path()))
{
CollectFiles(it_dir->path().string(), v);
}
else if (it_dir->path().extension() == L".csv")
{
if (it_dir->path().filename() != L"miss.csv")
v.push_back(it_dir->path().string());
}
}
}
int main()
{
std::cout << "Device Report Check " << std::endl;
const std::string DeviceReportRoot = "F:\\UserReport";
std::vector<std::string> v;
CollectFiles(DeviceReportRoot, v);
std::set<unsigned long long> QQ_Set;
std::for_each(v.begin(), v.end(), [&QQ_Set](const std::string& p){
// how to read file content in one line!
std::string text((std::istreambuf_iterator<char>(std::ifstream(p))),
std::istreambuf_iterator<char>());
std::vector<std::string> lines;
pystring::splitlines(text, lines);
std::for_each(lines.begin(), lines.end(), [&QQ_Set](const std::string& s){
std::vector<std::string> blocks;
pystring::split(s, blocks, ",");
if (blocks.size() >= 2)
{
const std::string& qq = blocks[1];
std::smatch m;
auto found = std::regex_search(qq, m, std::regex("[0-9]+"));
if (found)
{
auto qq_id = std::stoull(m.str());
if (QQ_Set.find(qq_id) == QQ_Set.end())
QQ_Set.insert(qq_id);
}
}
});
});
std::cout << "Found " << QQ_Set.size() << " unique ids" << std::endl;
}
@CaptainJH
Copy link
Author

之前基于stream 的文件读法,会忽略 空格 和 换行,如果要完整读入,应该这样:

    std::stringstream ss;
    ss << std::ifstream(filename).rdbuf();
    std::string text = ss.str();

@CaptainJH
Copy link
Author

使用之前的方法读文件,如果文件中出现 '\0',则会立即中止。对于一些自己无法控制内容的文件,会有潜在的风险,这时只能使用最原始的 C 的读文件的方法

        auto f = fopen(dir.string().c_str(), "r");
        fseek(f, 0, SEEK_END);
        auto size = ftell(f);
        rewind(f);
        char *buf = new char[size];
        fread(buf, 1, size, f);
        fclose(f);

        std::string text(buf);
        delete[] buf;

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment