Last active
June 18, 2018 08:15
-
-
Save InsulaVentus/deb983b7b24d905b9bdfbfc192682bf1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package no.statnett.ois; | |
import java.io.*; | |
import java.net.HttpURLConnection; | |
import java.net.MalformedURLException; | |
import java.net.URL; | |
import java.net.URLEncoder; | |
import java.nio.charset.StandardCharsets; | |
public class WikipediaArticle { | |
public static void main(String [] args) { | |
System.out.println(topicCount("article")); | |
System.out.println(topicCount("pizza")); | |
System.out.println(topicCount("Pizza")); | |
System.out.println(topicCount("MØ")); | |
System.out.println(topicCount("")); | |
} | |
static int topicCount(String topic) { | |
if (topic == null || topic.isEmpty()) { | |
return 0; | |
} | |
URL wikiUrl = createWikiUrl(topic); | |
String wikiContent = getContent(wikiUrl); | |
int beginningOfArticle = findBeginningOfArticle(wikiContent); | |
return countOccurrencesOfString(wikiContent, topic, beginningOfArticle); | |
} | |
private static URL createWikiUrl(final String topic) { | |
try { | |
return new URL(String.format("https://en.wikipedia.org/w/api.php?action=parse§ion=0&prop=text&format=json&utf8&page=%s", URLEncoder.encode(topic, "UTF-8"))); | |
} catch (MalformedURLException | UnsupportedEncodingException e) { | |
throw new RuntimeException("Error: Could not create url", e); | |
} | |
} | |
private static String getContent(final URL url) { | |
HttpURLConnection connection = null; | |
BufferedReader bufferedReader = null; | |
String content = ""; | |
try { | |
connection = (HttpURLConnection) url.openConnection(); | |
connection.setRequestMethod("GET"); | |
connection.setConnectTimeout(5000); | |
connection.setReadTimeout(5000); | |
connection.connect(); | |
if (connection.getResponseCode() == HttpURLConnection.HTTP_OK) { | |
BufferedInputStream bufferedInputStream = new BufferedInputStream(connection.getInputStream()); | |
bufferedReader = new BufferedReader(new InputStreamReader(bufferedInputStream, StandardCharsets.UTF_8)); | |
StringBuilder contentBuilder = new StringBuilder(content); | |
String line; | |
while ((line = bufferedReader.readLine()) != null) { | |
contentBuilder.append(line); | |
} | |
content = contentBuilder.toString(); | |
} | |
} catch (IOException e) { | |
throw new RuntimeException("Error: Unable to get content", e); | |
} finally { | |
if (connection != null) { | |
connection.disconnect(); | |
} | |
try { | |
if (bufferedReader != null) { | |
bufferedReader.close(); | |
} | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
} | |
return content; | |
} | |
private static int findBeginningOfArticle(final String wikiContent) { | |
String beginningOfArticle = "\"text\":{\"*\":"; | |
int index = wikiContent.indexOf(beginningOfArticle); | |
if (index != -1) { | |
return index + beginningOfArticle.length(); | |
} else { | |
return index; | |
} | |
} | |
private static int countOccurrencesOfString(final String str, final String findStr, final int startingFromIndex) { | |
int count = 0; | |
int lastIndex = startingFromIndex; | |
while (lastIndex != -1) { | |
lastIndex = str.indexOf(findStr, lastIndex); | |
if (lastIndex != -1) { | |
count++; | |
lastIndex += findStr.length(); | |
} | |
} | |
return count; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment