Legislation effort of the Bulgarian executive branch and legislature
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package legislation; | |
import java.io.InputStream; | |
import java.io.InputStreamReader; | |
import java.io.OutputStreamWriter; | |
import java.io.Writer; | |
import java.net.HttpURLConnection; | |
import java.net.URL; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
import javax.xml.parsers.DocumentBuilder; | |
import javax.xml.parsers.DocumentBuilderFactory; | |
import org.jsoup.Jsoup; | |
import org.w3c.dom.Document; | |
import org.w3c.dom.NodeList; | |
import com.google.common.io.CharStreams; | |
public class LegislationStats { | |
private static final String ROOT = "http://parliament.bg/export.php/bg/xml/bills/"; | |
public static void main(String[] args) throws Exception { | |
URL searchURL = new URL("http://parliament.bg/bg/laws"); | |
HttpURLConnection conn = (HttpURLConnection) searchURL.openConnection(); | |
conn.setRequestMethod("POST"); | |
conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); | |
conn.setDoOutput(true); | |
Writer writer = new OutputStreamWriter(conn.getOutputStream(), "UTF-8"); | |
writer.write("from=&to=&L_ActL_title=&submit=%D0%A2%D1%8A%D1%80%D1%81%D0%B8"); | |
writer.flush(); | |
conn.connect(); | |
String body = CharStreams.toString(new InputStreamReader(conn.getInputStream(), "UTF-8")); | |
conn.disconnect(); | |
Pattern p = Pattern.compile("<a href=\"/bg/laws/ID/([0-9]+)/\">"); | |
Matcher m = p.matcher(body); | |
DocumentBuilderFactory dmf = DocumentBuilderFactory.newInstance(); | |
DocumentBuilder builder = dmf.newDocumentBuilder(); | |
LegislativeEffort executiveBranch = new LegislativeEffort(); | |
LegislativeEffort legislature = new LegislativeEffort(); | |
while (m.find()) { | |
String url = ROOT + m.group(1); | |
try (InputStream in = new URL(url).openStream()) { | |
Document doc = builder.parse(in); | |
NodeList bodyNodes = doc.getElementsByTagName("LawBody"); | |
if (bodyNodes.getLength() == 0) { | |
continue; | |
} | |
// strip HTML contents | |
int symbols = Jsoup.parse(bodyNodes.item(0).getTextContent()).text().length(); | |
NodeList nodes = doc.getElementsByTagName("Importer"); | |
if (nodes.getLength() > 0) { | |
if (nodes.item(0).getAttributes().getNamedItem("value").getTextContent().equals("Министерски съвет")) { | |
executiveBranch.actsPassed ++; | |
executiveBranch.symbols += symbols; | |
} else { | |
legislature.actsPassed ++; | |
legislature.symbols += symbols; | |
} | |
} | |
} | |
} | |
System.out.println("Executive branch: " + executiveBranch); | |
System.out.println("Legislature: " + legislature); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment