-
-
Save Daenyth/4742267 to your computer and use it in GitHub Desktop.
import java.io.IOException; | |
import java.net.URL; | |
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Document; | |
import org.jsoup.nodes.Element; | |
/** | |
* Given a url to a web page, extract a suitable image from that page. This will | |
* attempt to follow a method similar to Google+, as described <a href= | |
* "http://webmasters.stackexchange.com/questions/25581/how-does-google-plus-select-an-image-from-a-shared-link" | |
* >here</a> | |
* | |
*/ | |
public class ImageExtractor { | |
// TODO: Add junit test case for this. (Construct Document from string, extract, check) | |
public static String extractImageUrl(String url) throws IOException { | |
String contentType = new URL(url).openConnection().getContentType(); | |
if (contentType != null) { | |
if (contentType.startsWith("image/")) { | |
return url; | |
} | |
} | |
Document document = Jsoup.connect(url).get(); | |
String imageUrl = null; | |
imageUrl = getImageFromSchema(document); | |
if (imageUrl != null) { | |
return imageUrl; | |
} | |
imageUrl = getImageFromOpenGraph(document); | |
if (imageUrl != null) { | |
return imageUrl; | |
} | |
imageUrl = getImageFromTwitterCard(document); | |
if (imageUrl != null) { | |
return imageUrl; | |
} | |
imageUrl = getImageFromTwitterShared(document); | |
if (imageUrl != null) { | |
return imageUrl; | |
} | |
imageUrl = getImageFromLinkRel(document); | |
if (imageUrl != null) { | |
return imageUrl; | |
} | |
imageUrl = getImageFromGuess(document); | |
if (imageUrl != null) { | |
return imageUrl; | |
} | |
return imageUrl; | |
} | |
private static String getImageFromTwitterShared(Document document) { | |
Element div = document.select("div.media-gallery-image-wrapper").first(); | |
if (div == null) { | |
return null; | |
} | |
Element img = div.select("img.media-slideshow-image").first(); | |
if (img != null) { | |
return img.absUrl("src"); | |
} | |
return null; | |
} | |
private static String getImageFromGuess(Document document) { | |
// TODO | |
return null; | |
} | |
private static String getImageFromLinkRel(Document document) { | |
Element link = document.select("link[rel=image_src]").first(); | |
if (link != null) { | |
return link.attr("abs:href"); | |
} | |
return null; | |
} | |
private static String getImageFromTwitterCard(Document document) { | |
Element meta = document.select("meta[name=twitter:card][content=photo]").first(); | |
if (meta == null) { | |
return null; | |
} | |
Element image = document.select("meta[name=twitter:image]").first(); | |
return image.attr("abs:content"); | |
} | |
private static String getImageFromOpenGraph(Document document) { | |
Element image = document.select("meta[property=og:image]").first(); | |
if (image != null) { | |
return image.attr("abs:content"); | |
} | |
Element secureImage = document.select("meta[property=og:image:secure]").first(); | |
if (secureImage != null) { | |
return secureImage.attr("abs:content"); | |
} | |
return null; | |
} | |
private static String getImageFromSchema(Document document) { | |
Element container = | |
document.select("*[itemscope][itemtype=http://schema.org/ImageObject]").first(); | |
if (container == null) { | |
return null; | |
} | |
Element image = container.select("img[itemprop=contentUrl]").first(); | |
if (image == null) { | |
return null; | |
} | |
return image.absUrl("src"); | |
} | |
} |
Thanks very much
Hello,
I have tried scraping the data table from a web page and saving those data tables in an excel sheet by using Jsoup libary. But when i using the line Jsoup.connect(url).get(); in my code i am getting an error "Could not generate secret".
How to get rid of that error. Please help me in resolving it.
:WARN:oejs.ServletHandler:qtp159413332-17:
javax.servlet.ServletException: org.glassfish.jersey.server.ContainerException: javax.net.ssl.SSLHandshakeException: Could not generate secret
at org.glassfish.jersey.servlet.WebComponent.serviceImpl(WebComponent.java:408)
at org.glassfish.jersey.servlet.WebComponent.service(WebComponent.java:346)
at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:365)
at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:318)
at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:205)
at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:840)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:585)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226)
at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:513)
at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185)
at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134)
at org.eclipse.jetty.server.Server.handle(Server.java:539)
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:333)
at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251)
at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:283)
at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:108)
at org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93)
at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303)
at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148)
at org.eclipse.jetty.util.thre...(ExecuteProduceConsume.java:136)
at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:671)
at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:589)
at java.lang.Thread.run(Thread.java:748)
Thanks in advance..
this is great thank you for sharing!
Thanks very much .This is solved my problem
For those using Gradle: