Skip to content

Instantly share code, notes, and snippets.

@Answeror
Created July 3, 2012 02:23
Show Gist options
  • Save Answeror/3037132 to your computer and use it in GitHub Desktop.
Save Answeror/3037132 to your computer and use it in GitHub Desktop.
交叉引用
#include <map>
#include <string>
#include <vector>
#include <fstream>
#include <iostream>
#include <sstream>
#include <iterator>
#include <algorithm>
#include <string>
#include <vector>
#include <algorithm>
#include <functional>
#include <fstream>
#include <iterator>
#include <iostream>
using namespace std;
typedef string::const_iterator iter;
bool isurlch(char ch)
{
static const string urlcs = "~;/?:@=&$-_.+!*'{},";
return isalnum(ch) || find(urlcs.begin(), urlcs.end(), ch) != urlcs.end();
}
iter url_end(iter begin, iter end)
{
return find_if(begin, end, [](char ch){ return !isurlch(ch); });
}
iter url_begin(iter begin, iter end)
{
static const string sep = "://";
iter i = begin;
while (true)
{
i = search(i, end, sep.begin(), sep.end());
if (i == end) break;
if (i != begin && i + sep.size() != end && isurlch(i[sep.size()]))
{
iter result = i;
//while (result != begin && isalpha(result[-1])) --result;
result = find_if_not(string::const_reverse_iterator(i), string::const_reverse_iterator(begin), isalpha).base();
if (result != i) return result;
}
if (i != end) i += sep.size();
}
return end;
}
vector<string> extract_url(const string &text)
{
vector<string> urls;
iter begin = text.begin();
while (begin != text.end())
{
begin = url_begin(begin, text.end());
if (begin != text.end())
{
iter end = url_end(begin, text.end());
urls.push_back(string(begin, end));
begin = end;
}
}
return urls;
}
typedef map<string, vector<int> > dict;
vector<string> split(const string &s)
{
istringstream iss(s);
typedef istream_iterator<string> iter;
vector<string> v;
copy(iter(iss), iter(), back_inserter(v));
return v;
}
dict xref(istream &is,
vector<string> find_words(const string&) = split)
{
dict d;
string line;
int linecount = 0;
while (getline(is, line))
{
++linecount;
auto words = find_words(line);
if (!words.empty())
{
for (auto i = words.begin(); i != words.end(); ++i)
{
d[*i].push_back(linecount);
}
}
}
for (auto i = d.begin(); i != d.end(); ++i)
{
auto &lns = i->second;
lns.erase(unique(lns.begin(), lns.end()), lns.end());
}
return d;
}
int main()
{
ifstream ifs("in.txt");
auto d = xref(ifs, extract_url);
for (auto i = d.begin(); i != d.end(); ++i)
{
auto &lns = i->second;
cout << i->first << ": ";
copy(lns.begin(), lns.end(),
ostream_iterator<int>(cout, " "));
cout << endl;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment