Skip to content

Instantly share code, notes, and snippets.

@zikani03
Created July 18, 2017 11:12
Show Gist options
  • Save zikani03/afd28b6c6b80432ab69c6b1a11c637f4 to your computer and use it in GitHub Desktop.
Save zikani03/afd28b6c6b80432ab69c6b1a11c637f4 to your computer and use it in GitHub Desktop.
Extract SQL queries from pentaho files with Rust
extern crate zip;
extern crate quick_xml;
extern crate html_entities;
use std::io::BufReader;
use std::fs::File;
use std::io::Read;
use std::ops::Deref;
use std::collections::BTreeMap;
use zip::read::ZipArchive;
use zip::read::ZipFile;
use quick_xml::reader::Reader;
use quick_xml::events::Event;
use quick_xml::events::attributes::Attribute;
use html_entities::decode_html_entities;
fn main() {
let arg = std::env::args().nth(1);
let arg_dest = std::env::args().nth(2);
read_pentaho_file(arg.unwrap());
}
fn read_pentaho_file(filename: String) -> zip::result::ZipResult<()> {
let mut reader = BufReader::new(File::open(filename).unwrap());
let mut zip = try!(ZipArchive::new(reader));
// let mut files: Vec<String> = vec![];
let mut data: String = String::new();
for i in 0..zip.len() {
let mut file = zip.by_index(i).unwrap();
if file.name().ends_with("sql-ds.xml") {
file.read_to_string (&mut data);
extract_sql_queries(data.clone());
}
}
Ok(())
}
///
/// The SQL Queries in Pentaho report files are kept in a file named sql-ds.xml
/// the structure of the xml file contents is as follows:
/// data:sql-datasource
/// > data:query-definitions
/// > data:query name="QUERY_NAME_HERE"
/// > data:static-query > TEXT
fn extract_sql_queries(contents: String) {
let mut reader = Reader::from_str(contents.as_str());
reader.trim_text(true);
let count = 0;
let mut buf = Vec::new();
let mut queries = Vec::<String>::new();
let mut query_name = String::new();
let mut is_query_data = false;
loop {
match reader.read_namespaced_event(&mut buf) {
Ok((ref namespace_value, Event::Start(ref e))) => {
match e.name() {
// We don't care to match these other queries
//b"data:sql-datasource"
//b"data:query-definitions"
//b"data:query" => e.attributes("name")
b"data:query" => {
let value = e.attributes()
.map(|a| match a { Ok(a) => a.value, Err(_) => &[] } )
.find(|val|
match std::str::from_utf8(val) {
Ok(v) => "name" == v,
_ => false,
});
match value {
Some(bytes) => {
match std::str::from_utf8(bytes) {
Ok(v) => query_name = v.to_string(),
Err(e) => (),
}
},
None => (),
}
is_query_data = false;
},
// b"data:static-query > TEXT"
b"data:static-query" => is_query_data = true,
// Indicate we're processing tags that don't contain actual query content.
_ => is_query_data = false,
}
},
Ok((ref namespace_value, Event::Text(ref e))) => {
if is_query_data {
// queries.insert(query_name.clone(), std::str::from_utf8(e.deref()).unwrap().to_string());
match decode_html_entities(std::str::from_utf8(e.deref()).unwrap()) {
Ok(value) => {
queries.push(value);
},
Err(_) => (),
}
}
},
Ok((ref namespace_value, Event::Eof)) => break,
Err(e) => panic!("Error {:?}", e),
_ => (),
}
// from the docs: "if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low"
buf.clear();
}
for entry in &queries {
println!("Query sql={:?}", entry);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment