Last active
December 15, 2015 15:49
-
-
Save JohnKim/5284616 to your computer and use it in GitHub Desktop.
get the Title from HTML page (for GAE)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package io.stalk.gae.html; | |
import java.io.BufferedReader; | |
import java.io.IOException; | |
import java.io.InputStreamReader; | |
import java.net.URL; | |
import java.nio.charset.Charset; | |
import javax.servlet.http.*; | |
@SuppressWarnings("serial") | |
public class GetPageTitleServlet extends HttpServlet { | |
private String startTag = "<title>"; | |
private String endTag = "</title>"; | |
private int startTagLength = startTag.length(); | |
public void doGet(HttpServletRequest req, HttpServletResponse resp) | |
throws IOException { | |
String url = req.getParameter("q"); | |
String charset = req.getParameter("c"); | |
String title = getPageTitle(url, charset); | |
resp.setContentType("application/json"); | |
resp.setCharacterEncoding("UTF-8"); | |
if(title != null){ | |
resp.getWriter().println(String.format("{\"status\":\"ok\",\"url\":\"%s\"}", title)); | |
}else{ | |
resp.getWriter().println("{\"status\":\"error\",\"url\":\"\"}"); | |
} | |
} | |
private String getPageTitle(String urlString, String cset){ | |
if(cset != null && cset.length() > 0 && Charset.isSupported(cset)){ | |
Charset charset = Charset.forName(cset); | |
return getPageTitle2(urlString, charset); | |
}else{ | |
return getPageTitle2(urlString, null); | |
} | |
} | |
private String getPageTitle2(String urlString, Charset c){ | |
if(urlString == null || urlString.length() == 0) return ""; | |
if(urlString.indexOf("http://") != 0 && urlString.indexOf("https://") != 0 ) | |
urlString = "http://"+urlString; | |
BufferedReader bufReader; | |
String line; | |
boolean foundStartTag = false; | |
boolean foundEndTag = false; | |
int startIndex, endIndex; | |
String title = ""; | |
try | |
{ | |
URL theURL = new URL(urlString); | |
if(c !=null){ | |
bufReader = new BufferedReader( new InputStreamReader(theURL.openStream(), c) ); | |
}else{ | |
bufReader = new BufferedReader( new InputStreamReader(theURL.openStream()) ); | |
} | |
while( (line = bufReader.readLine()) != null && !foundEndTag) | |
{ | |
if( !foundStartTag && (startIndex = line.toLowerCase().indexOf(startTag)) != -1 ){ | |
foundStartTag = true; | |
}else{ | |
startIndex = -startTagLength; | |
} | |
if( foundStartTag && (endIndex = line.toLowerCase().indexOf(endTag)) != -1 ){ | |
foundEndTag = true; | |
}else{ | |
endIndex = line.length(); | |
} | |
if( foundStartTag || foundEndTag ){ | |
title += line.substring( startIndex + startTagLength, endIndex ); | |
} | |
} | |
bufReader.close(); | |
if( title.length() > 0 ){ | |
return title; | |
}else{ | |
return ""; | |
} | |
} | |
catch( IOException e ) | |
{ | |
e.printStackTrace(); | |
} | |
return ""; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment