Created
August 17, 2014 13:38
-
-
Save bsdelf/3e9ed789c433ac2f8802 to your computer and use it in GitHub Desktop.
remove watermark for "pdf.th7.cn/down/files/1407/Real%20World%20OCaml.pdf"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* clang++ -I/usr/local/include -pipe -std=c++11 -stdlib=libc++ -o main.cc.o -c main.cc | |
* clang++ -L/usr/local/lib -lpodofo -stdlib=libc++ -o b.out main.cc.o | |
* | |
*/ | |
#include <iostream> | |
#include <string> | |
#include <list> | |
using namespace std; | |
#include <podofo/podofo.h> | |
void ShowObjType(const PoDoFo::PdfObject* obj) { | |
if (!obj->IsNull()) { | |
cout << obj->IsBool() << endl; | |
cout << obj->IsNumber() << endl; | |
cout << obj->IsReal() << endl; | |
cout << obj->IsString() << endl; | |
cout << obj->IsHexString() << endl; | |
cout << obj->IsName() << endl; | |
cout << obj->IsArray() << endl; | |
cout << obj->IsDictionary() << endl; | |
cout << obj->IsRawData() << endl; | |
cout << obj->IsNull() << endl; | |
cout << obj->IsReference() << endl; | |
} | |
} | |
int main() { | |
PoDoFo::PdfVecObjects objs; | |
PoDoFo::PdfParser parser(&objs); | |
parser.ParseFile("/tmp/rwo.pdf", false); | |
int idx = 0; | |
list<int> badidx; | |
for (const auto obj: objs) { | |
bool bdel = false; | |
// annotate | |
if (obj->IsDictionary()) { | |
const auto& d0 = obj->GetDictionary(); | |
const auto& k0 = d0.GetKeys(); | |
if (k0.size() == 2 && | |
d0.HasKey(PoDoFo::PdfName("Length")) && | |
d0.HasKey(PoDoFo::PdfName("LC"))) { | |
const auto stream = (PoDoFo::PdfMemStream*)obj->GetStream(); | |
if (stream->GetLength() > 0 && | |
string(stream->Get()).find("www.it-ebooks.info") != string::npos) { | |
bdel = true; | |
} | |
} | |
} | |
// URI link | |
if (obj->IsDictionary()) { | |
const auto& d0 = obj->GetDictionary(); | |
if (d0.HasKey(PoDoFo::PdfName("A"))) { | |
const auto& k0 = d0.GetKey(PoDoFo::PdfName("A")); | |
if (k0->IsDictionary()) { | |
const auto& d1 = k0->GetDictionary(); | |
if (d1.HasKey(PoDoFo::PdfName("URI"))) { | |
auto str = d1.GetKey(PoDoFo::PdfName("URI"))->GetString().GetString(); | |
if (str == string("http://www.it-ebooks.info/")) { | |
bdel = true; | |
} | |
} | |
} | |
} | |
} | |
if (bdel) { | |
badidx.insert(badidx.begin(), idx); | |
} | |
idx++; | |
} | |
cout << "obj count:" << badidx.size() << endl; | |
for (auto offset: badidx) { | |
auto obj = objs.begin() + offset; | |
delete objs.RemoveObject(obj); | |
} | |
// dangle refernce | |
for (const auto obj: objs) { | |
auto prune_array = [&badidx](PoDoFo::PdfArray& arr) { | |
for (int i = arr.size()-1; i >= 0; --i) { | |
const auto& item = arr[i]; | |
if (item.IsReference()) { | |
bool orphan = false; | |
const auto& num = item.GetReference().ObjectNumber(); | |
for (auto offset: badidx) { | |
if (offset+1 == num) { | |
orphan = true; | |
break; | |
} | |
} | |
if (orphan) { | |
arr.erase(arr.begin()+i); | |
cout << "erased" << endl; | |
} | |
} | |
} | |
}; | |
if (obj->IsArray()) { | |
auto& arr = obj->GetArray(); | |
prune_array(arr); | |
} | |
if (obj->IsDictionary()) { | |
auto& d0 = obj->GetDictionary(); | |
const auto& k0 = d0.GetKeys(); | |
auto prune_key = [&](const string& key) { | |
if (d0.HasKey(PoDoFo::PdfName(key))) { | |
auto k1 = d0.GetKey(PoDoFo::PdfName(key)); | |
if (k1->IsArray()) { | |
auto& arr = k1->GetArray(); | |
prune_array(arr); | |
if (arr.empty()) { | |
d0.RemoveKey(PoDoFo::PdfName(key)); | |
cout << "removed" << endl; | |
} | |
} | |
} | |
}; | |
prune_key("Annots"); | |
prune_key("Contents"); | |
} | |
} | |
objs.Finish(); | |
// save | |
{ | |
auto ptrailer = parser.GetTrailer(); | |
PoDoFo::PdfWriter writer(&objs, ptrailer); | |
writer.SetPdfVersion(PoDoFo::ePdfVersion_1_6); | |
writer.Write("out.pdf"); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment