Skip to content

Instantly share code, notes, and snippets.

@JohnKim
Last active December 15, 2015 15:49
Show Gist options
  • Save JohnKim/5284616 to your computer and use it in GitHub Desktop.
Save JohnKim/5284616 to your computer and use it in GitHub Desktop.
get the Title from HTML page (for GAE)
package io.stalk.gae.html;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.Charset;
import javax.servlet.http.*;
@SuppressWarnings("serial")
public class GetPageTitleServlet extends HttpServlet {
private String startTag = "<title>";
private String endTag = "</title>";
private int startTagLength = startTag.length();
public void doGet(HttpServletRequest req, HttpServletResponse resp)
throws IOException {
String url = req.getParameter("q");
String charset = req.getParameter("c");
String title = getPageTitle(url, charset);
resp.setContentType("application/json");
resp.setCharacterEncoding("UTF-8");
if(title != null){
resp.getWriter().println(String.format("{\"status\":\"ok\",\"url\":\"%s\"}", title));
}else{
resp.getWriter().println("{\"status\":\"error\",\"url\":\"\"}");
}
}
private String getPageTitle(String urlString, String cset){
if(cset != null && cset.length() > 0 && Charset.isSupported(cset)){
Charset charset = Charset.forName(cset);
return getPageTitle2(urlString, charset);
}else{
return getPageTitle2(urlString, null);
}
}
private String getPageTitle2(String urlString, Charset c){
if(urlString == null || urlString.length() == 0) return "";
if(urlString.indexOf("http://") != 0 && urlString.indexOf("https://") != 0 )
urlString = "http://"+urlString;
BufferedReader bufReader;
String line;
boolean foundStartTag = false;
boolean foundEndTag = false;
int startIndex, endIndex;
String title = "";
try
{
URL theURL = new URL(urlString);
if(c !=null){
bufReader = new BufferedReader( new InputStreamReader(theURL.openStream(), c) );
}else{
bufReader = new BufferedReader( new InputStreamReader(theURL.openStream()) );
}
while( (line = bufReader.readLine()) != null && !foundEndTag)
{
if( !foundStartTag && (startIndex = line.toLowerCase().indexOf(startTag)) != -1 ){
foundStartTag = true;
}else{
startIndex = -startTagLength;
}
if( foundStartTag && (endIndex = line.toLowerCase().indexOf(endTag)) != -1 ){
foundEndTag = true;
}else{
endIndex = line.length();
}
if( foundStartTag || foundEndTag ){
title += line.substring( startIndex + startTagLength, endIndex );
}
}
bufReader.close();
if( title.length() > 0 ){
return title;
}else{
return "";
}
}
catch( IOException e )
{
e.printStackTrace();
}
return "";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment