Skip to content

Instantly share code, notes, and snippets.

@Glamdring
Created May 21, 2016 21:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Glamdring/e81461090a24f3b15177a02ba63a34ad to your computer and use it in GitHub Desktop.
Save Glamdring/e81461090a24f3b15177a02ba63a34ad to your computer and use it in GitHub Desktop.
Legislation effort of the Bulgarian executive branch and legislature
package legislation;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.jsoup.Jsoup;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import com.google.common.io.CharStreams;
public class LegislationStats {
private static final String ROOT = "http://parliament.bg/export.php/bg/xml/bills/";
public static void main(String[] args) throws Exception {
URL searchURL = new URL("http://parliament.bg/bg/laws");
HttpURLConnection conn = (HttpURLConnection) searchURL.openConnection();
conn.setRequestMethod("POST");
conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
conn.setDoOutput(true);
Writer writer = new OutputStreamWriter(conn.getOutputStream(), "UTF-8");
writer.write("from=&to=&L_ActL_title=&submit=%D0%A2%D1%8A%D1%80%D1%81%D0%B8");
writer.flush();
conn.connect();
String body = CharStreams.toString(new InputStreamReader(conn.getInputStream(), "UTF-8"));
conn.disconnect();
Pattern p = Pattern.compile("<a href=\"/bg/laws/ID/([0-9]+)/\">");
Matcher m = p.matcher(body);
DocumentBuilderFactory dmf = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = dmf.newDocumentBuilder();
LegislativeEffort executiveBranch = new LegislativeEffort();
LegislativeEffort legislature = new LegislativeEffort();
while (m.find()) {
String url = ROOT + m.group(1);
try (InputStream in = new URL(url).openStream()) {
Document doc = builder.parse(in);
NodeList bodyNodes = doc.getElementsByTagName("LawBody");
if (bodyNodes.getLength() == 0) {
continue;
}
// strip HTML contents
int symbols = Jsoup.parse(bodyNodes.item(0).getTextContent()).text().length();
NodeList nodes = doc.getElementsByTagName("Importer");
if (nodes.getLength() > 0) {
if (nodes.item(0).getAttributes().getNamedItem("value").getTextContent().equals("Министерски съвет")) {
executiveBranch.actsPassed ++;
executiveBranch.symbols += symbols;
} else {
legislature.actsPassed ++;
legislature.symbols += symbols;
}
}
}
}
System.out.println("Executive branch: " + executiveBranch);
System.out.println("Legislature: " + legislature);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment