Skip to content

Instantly share code, notes, and snippets.

@InsulaVentus
Last active June 18, 2018 08:15
Show Gist options
  • Save InsulaVentus/deb983b7b24d905b9bdfbfc192682bf1 to your computer and use it in GitHub Desktop.
Save InsulaVentus/deb983b7b24d905b9bdfbfc192682bf1 to your computer and use it in GitHub Desktop.
package no.statnett.ois;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
public class WikipediaArticle {
public static void main(String [] args) {
System.out.println(topicCount("article"));
System.out.println(topicCount("pizza"));
System.out.println(topicCount("Pizza"));
System.out.println(topicCount("MØ"));
System.out.println(topicCount(""));
}
static int topicCount(String topic) {
if (topic == null || topic.isEmpty()) {
return 0;
}
URL wikiUrl = createWikiUrl(topic);
String wikiContent = getContent(wikiUrl);
int beginningOfArticle = findBeginningOfArticle(wikiContent);
return countOccurrencesOfString(wikiContent, topic, beginningOfArticle);
}
private static URL createWikiUrl(final String topic) {
try {
return new URL(String.format("https://en.wikipedia.org/w/api.php?action=parse&section=0&prop=text&format=json&utf8&page=%s", URLEncoder.encode(topic, "UTF-8")));
} catch (MalformedURLException | UnsupportedEncodingException e) {
throw new RuntimeException("Error: Could not create url", e);
}
}
private static String getContent(final URL url) {
HttpURLConnection connection = null;
BufferedReader bufferedReader = null;
String content = "";
try {
connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("GET");
connection.setConnectTimeout(5000);
connection.setReadTimeout(5000);
connection.connect();
if (connection.getResponseCode() == HttpURLConnection.HTTP_OK) {
BufferedInputStream bufferedInputStream = new BufferedInputStream(connection.getInputStream());
bufferedReader = new BufferedReader(new InputStreamReader(bufferedInputStream, StandardCharsets.UTF_8));
StringBuilder contentBuilder = new StringBuilder(content);
String line;
while ((line = bufferedReader.readLine()) != null) {
contentBuilder.append(line);
}
content = contentBuilder.toString();
}
} catch (IOException e) {
throw new RuntimeException("Error: Unable to get content", e);
} finally {
if (connection != null) {
connection.disconnect();
}
try {
if (bufferedReader != null) {
bufferedReader.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return content;
}
private static int findBeginningOfArticle(final String wikiContent) {
String beginningOfArticle = "\"text\":{\"*\":";
int index = wikiContent.indexOf(beginningOfArticle);
if (index != -1) {
return index + beginningOfArticle.length();
} else {
return index;
}
}
private static int countOccurrencesOfString(final String str, final String findStr, final int startingFromIndex) {
int count = 0;
int lastIndex = startingFromIndex;
while (lastIndex != -1) {
lastIndex = str.indexOf(findStr, lastIndex);
if (lastIndex != -1) {
count++;
lastIndex += findStr.length();
}
}
return count;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment