Skip to content

Instantly share code, notes, and snippets.

@Terryhung
Last active October 13, 2017 09:27
Show Gist options
  • Save Terryhung/fc7e0d606e227d975106e23718c9fde2 to your computer and use it in GitHub Desktop.
Save Terryhung/fc7e0d606e227d975106e23718c9fde2 to your computer and use it in GitHub Desktop.
//JAVA
public static void AddDocHead(Elements doc) throws Exception {
Element head = doc.first();
head.prepend("<style>.article-header{content: '';background: linear-gradient(to top, #000 1rem,#fff 40rem,#fff 100%);height: 35rem;display: block;}.main-image{width: 100%;max-height: 700px;opacty: 0.9}.article-content{background-color: #fff;position: absolute;width: 90%;z-index: 1;margin-left: 4%; margin-right: 5%;margin-top: -2rem;font-size: 25px; padding-left: 5px; padding-right: 5px}.article-header:after {content: '';background: #000;background: -moz-linear-gradient(top, #000 28%, #fff 58%, #fff 100%);background: -webkit-linear-gradient(top, #000 22rem,#fff 40rem,#fff 100%);background: linear-gradient(to bottom, #000 22rem,#fff 40rem,#fff 100%);height: 40rem;display: block;} .article-content img{width: 100%}</style>");
}
public static void Parsing() throws Exception {
String url = "http://www.cna.com.tw/news/aloc/201710030325-1.aspx";
Document doc = Jsoup.connect(url)
.header("User-Agent", "Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-G950U Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/5.2 Chrome/51.0.2704.106 Mobile Safari/537.36")
.get();
String target = "div.news_article";
Elements divs = doc.select(target);
String remove = "script, button";
Elements removed = doc.select(remove);
String image_url = "http://img.appledaily.com.tw/images/twapple/640pix/20170925/BN02/BN02_005.jpg";
String news_title = "苗栗縣鼓勵青年創業苗栗縣鼓勵青年創業苗栗縣鼓勵青年創業";
removed.remove();
/* Add Title to first position*/
divs.first().prependElement("h2").text(news_title);
/*Wrap New Tag*/
String child_html = divs.html();
divs.html(String.format("<div class='article-content'>%s<div>", child_html));
System.out.println(divs);
divs.prepend(String.format("<div class='article-header'><img src='%s' class='main-image'></div></div>", image_url));
Whitelist wl = Whitelist.relaxed();
wl.addTags("div", "span", "p", "br", "article", "section", "style");
wl.addAttributes("div", "class");
wl.addAttributes("img", "class");
AddDocHead(divs);
String mProcessedHtml = Jsoup.clean(divs.outerHtml(),wl);
CreateHTML(Arrays.asList(mProcessedHtml));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment