Skip to content

Instantly share code, notes, and snippets.

@liangzai-cool
Created September 28, 2016 07:35
Show Gist options
  • Save liangzai-cool/2eda2c59558d6daa2c660fabeafa2091 to your computer and use it in GitHub Desktop.
Save liangzai-cool/2eda2c59558d6daa2c660fabeafa2091 to your computer and use it in GitHub Desktop.
/**
* jsoup 工具包
* founder
* @author XueLiang
* @date 2016年9月23日 下午3:16:36
* @version 1.0
*/
public class JsoupUtils {
/**
* 支持 meta 标签跳转
* @param document
* @return
* @throws IOException
* @throws URISyntaxException
*/
public Document supportMetaRefresh(Document document) throws IOException, URISyntaxException {
String refresh = getMetaValue(document, "refresh");
if (refresh == null) {
return document;
}
String baseUri = document.baseUri();
URI uri = new URI(baseUri);
Matcher m = Pattern.compile("(?si)\\d+;\\s+url=(.+)|\\d+").matcher(refresh);
// find the first one that is valid
if (m.matches()) {
String refreshUrl = m.group(1);
if (refreshUrl != null) {
Document doc = Jsoup.connect(uri.resolve(refreshUrl).toString()).userAgent("Mozilla")
.followRedirects(true).get();
return supportMetaRefresh(doc);
}
}
return document;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment