Created
May 16, 2018 16:36
-
-
Save pagetronic/7264adfd217ebae2f86215936118164b to your computer and use it in GitHub Desktop.
Import Tropicos Mobot data from Tropicos service Api
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package gaia.mobot; | |
import com.google.gson.JsonArray; | |
import com.google.gson.JsonElement; | |
import com.google.gson.JsonObject; | |
import com.google.gson.JsonParser; | |
import com.mongodb.client.MongoCursor; | |
import com.mongodb.client.model.Accumulators; | |
import com.mongodb.client.model.Aggregates; | |
import com.mongodb.client.model.Filters; | |
import com.mongodb.client.model.IndexOptions; | |
import com.mongodb.client.model.Sorts; | |
import com.mongodb.client.model.UpdateOptions; | |
import live.page.base.blobstore.utils.BlobsUtils; | |
import live.page.base.db.Db; | |
import live.page.base.utils.Fx; | |
import live.page.base.utils.Json; | |
import org.apache.commons.lang3.StringUtils; | |
import org.apache.http.HttpEntity; | |
import org.apache.http.client.config.RequestConfig; | |
import org.apache.http.client.methods.CloseableHttpResponse; | |
import org.apache.http.client.methods.HttpGet; | |
import org.apache.http.conn.ConnectTimeoutException; | |
import org.apache.http.impl.client.CloseableHttpClient; | |
import org.apache.http.impl.client.HttpClientBuilder; | |
import org.apache.http.util.EntityUtils; | |
import org.bson.conversions.Bson; | |
import javax.servlet.ServletContextEvent; | |
import javax.servlet.ServletContextListener; | |
import javax.servlet.annotation.WebListener; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.Calendar; | |
import java.util.Date; | |
import java.util.HashMap; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.Map.Entry; | |
import java.util.concurrent.ExecutorService; | |
import java.util.concurrent.Executors; | |
import java.util.concurrent.Future; | |
import java.util.concurrent.ScheduledExecutorService; | |
import java.util.concurrent.TimeUnit; | |
import java.util.regex.Pattern; | |
/* | |
db.getCollection('Species').count() + '/' + db.getCollection('Specimens').count() | |
*/ | |
@WebListener | |
public class TropicosApi implements ServletContextListener { | |
private String tropicosApiKey = "XXXXXXXXXXXXXXXXXXXXXXXXXXX"; | |
private final int pause = 600; | |
private final int timeout = 20 * 1000; | |
private final ExecutorService service = Executors.newSingleThreadExecutor(); | |
private final ScheduledExecutorService control = Executors.newSingleThreadScheduledExecutor(); | |
private Future<?> ft = null; | |
@Override | |
public void contextInitialized(ServletContextEvent sce) { | |
if (Fx.IS_DEBUG) { | |
return; | |
} | |
control.scheduleAtFixedRate(() -> run(), Fx.IS_DEBUG ? 0 : 1, 5, TimeUnit.MINUTES); | |
} | |
private void run() { | |
Thread.currentThread().setName("tropicos-shuddle"); | |
if (ft == null || ft.isCancelled() || ft.isDone()) { | |
ft = service.submit(() -> { | |
Fx.log("Tropicos Api started"); | |
try { | |
updateNames(); | |
} catch (Exception e) { | |
Fx.log("Tropicos error " + e.getMessage()); | |
} | |
Fx.log("Tropicos Api stopped"); | |
}); | |
} | |
} | |
@Override | |
public void contextDestroyed(ServletContextEvent sce) { | |
control.shutdown(); | |
service.shutdown(); | |
try { | |
control.awaitTermination(2, TimeUnit.SECONDS); | |
service.awaitTermination(2, TimeUnit.SECONDS); | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
service.shutdownNow(); | |
control.shutdownNow(); | |
} | |
private void updateNames() throws Exception { | |
Json lastspecies = Db.find("Species").sort(Sorts.descending("tId")).first(); | |
int startid = 0; | |
if (lastspecies != null) { | |
startid = lastspecies.getInteger("tId") + 1; | |
} | |
JsonArray listNameid = getTropicos("Name/list", "startid=" + startid, "pagesize=100").getAsJsonArray(); | |
if (listNameid == null) { | |
Fx.log("Tropicos error null listNameid startid=" + startid); | |
return; | |
} | |
if (!listNameid.get(0).getAsJsonObject().keySet().contains("Error")) { | |
for (JsonElement nameIdObj : listNameid) { | |
int nameid = nameIdObj.getAsJsonObject().get("NameId").getAsInt(); | |
Json species = getSpecies(nameid); | |
if (species == null) { | |
return; | |
} | |
//!! = nom. cons., ! = Legitimate, ** = Invalid, *** = nom. rej., * = Illegitimate | |
String url = getSpeciesUrl(species.getString("name", "specy"), species.getString("sym", "")); | |
species.put("_id", url); | |
List<Json> specimens = new ArrayList<>(); | |
if (!species.getString("rank", "").equals("") && | |
!species.getString("rank", "").equals("genus") && | |
!species.getString("rank", "").equals("family") && | |
!species.getString("rank", "").equals("division")) { | |
List<Json> images = getImages(nameid); | |
if (images == null) { | |
return; | |
} | |
specimens = getSpecimens(nameid, species, images); | |
if (specimens == null) { | |
return; | |
} | |
} | |
if (service.isTerminated() || service.isShutdown()) { | |
return; | |
} | |
species.put("specimens", specimens.size()); | |
Db.getDb("Species").insertOne(species); | |
if (!species.getString("family", "").equals("")) { | |
Db.updateOne("Families", Filters.eq("_id", species.getString("family", "").toLowerCase()), | |
new Json("$inc", | |
new Json("specimens", specimens.size())) | |
.put("$setOnInsert", | |
new Json("_id", species.getString("family", "").toLowerCase()) | |
.put("name", species.getString("family", "")) | |
) | |
, new UpdateOptions().upsert(true) | |
); | |
} | |
if (specimens.size() > 0) { | |
Db.save("Specimens", specimens); | |
Fx.log("Tropicos : " + nameid + " / " + specimens.size() + " specimens"); | |
} else if (Fx.IS_DEBUG) { | |
System.out.print("."); | |
} | |
} | |
updateNames(); | |
} else { | |
Fx.log("Tropicos error listNameid startid=" + startid); | |
} | |
} | |
private String getSpeciesUrl(String name, String sym) { | |
String url = Fx.cleanURL(name).toLowerCase(); | |
long count = 0; | |
if (!sym.equals("") && !sym.equals("!") && !sym.equals("!!")) { | |
count += 1; | |
} | |
while (Db.exists("Species", Filters.eq("_id", (count > 0) ? url + "-" + count : url))) { | |
count++; | |
} | |
if (count > 0) { | |
url += "-" + count; | |
} | |
return url; | |
} | |
private Json getSpecies(int nameId) throws Exception { | |
Json species = new Json(); | |
JsonObject speciestp = getTropicos("Name/" + nameId).getAsJsonObject(); | |
if (speciestp == null) { | |
Fx.log("Species error " + nameId); | |
return null; | |
} | |
if (speciestp.getAsJsonObject().keySet().contains("Error")) { | |
Fx.log("Species error " + nameId); | |
return null; | |
} | |
species.put("tId", speciestp.get("NameId").getAsInt()); | |
species.put("name", speciestp.get("ScientificName").getAsString()); | |
if (speciestp.get("Family") != null) { | |
species.put("family", speciestp.get("Family").getAsString()); | |
} | |
if (speciestp.get("SynonymCount") != null) { | |
int synonym = Integer.valueOf(speciestp.get("SynonymCount").getAsString()); | |
if (synonym > 0) { | |
List<String> synonyms = getSynonym(nameId); | |
if (synonyms == null) { | |
Fx.log("Synonym error " + nameId); | |
return null; | |
} | |
species.put("synonym", synonyms); | |
} | |
} | |
if (speciestp.get("AcceptedNameCount") != null) { | |
int accepted = Integer.valueOf(speciestp.get("AcceptedNameCount").getAsString()); | |
if (accepted > 0) { | |
List<String> accepteds = getAccepted(nameId); | |
if (accepteds == null) { | |
Fx.log("AcceptedName error " + nameId); | |
return null; | |
} | |
species.put("accepted", accepteds); | |
} | |
} | |
if (speciestp.get("Rank") != null) { | |
species.put("rank", speciestp.get("Rank").getAsString()); | |
} | |
if (speciestp.get("Author") != null) { | |
species.put("author", speciestp.get("Author").getAsString()); | |
} | |
if (speciestp.get("Symbol") != null) { | |
species.put("sym", speciestp.get("Symbol").getAsString()); | |
} | |
return species; | |
} | |
private List<Json> getImages(int nameId) throws Exception { | |
List<Json> images = new ArrayList<>(); | |
JsonArray listImages = getTropicos("Name/" + nameId + "/Images").getAsJsonArray(); | |
if (listImages == null) { | |
Fx.log("Tropicos error Images " + nameId); | |
return null; | |
} | |
if (!listImages.get(0).getAsJsonObject().keySet().contains("Error")) { | |
for (JsonElement imageObj : listImages) { | |
JsonObject imagetp = imageObj.getAsJsonObject(); | |
if (imagetp.get("SpecimenId") != null && | |
imagetp.get("ImageKindText") != null && | |
StringUtils.containsIgnoreCase(imagetp.get("ImageKindText").getAsString(), "photo") && | |
imagetp.get("DetailJpgUrl") != null && | |
!StringUtils.containsIgnoreCase(imagetp.get("DetailJpgUrl").getAsString(), "imageprotected")) { | |
Json image = new Json(); | |
image.put("tId", imagetp.get("ImageId").getAsInt()); | |
image.put("stId", imagetp.get("SpecimenId").getAsInt()); | |
image.put("copyright", imagetp.get("Copyright").getAsString() + " / Tropicos"); | |
image.put("url", imagetp.get("DetailJpgUrl").getAsString()); | |
image.put("caption", imagetp.get("Caption").getAsString()); | |
image.put("kind", imagetp.get("ImageKindText").getAsString()); | |
images.add(image); | |
} | |
} | |
} else if (!listImages.get(0).getAsJsonObject().get("Error").getAsString().equals("No records were found")) { | |
Fx.log("Tropicos error Images " + nameId); | |
return null; | |
} | |
return images; | |
} | |
private List<Json> getSpecimens(int nameId, Json species, List<Json> images) throws Exception { | |
List<Json> specimens = new ArrayList<>(); | |
Map<Integer, List<Json>> specimens_images = new HashMap<>(); | |
for (Json image : images) { | |
int specimenId = image.getInteger("stId"); | |
List<Json> specimen_images = new ArrayList<>(); | |
if (specimens_images.containsKey(specimenId)) { | |
specimen_images = specimens_images.get(specimenId); | |
} | |
specimen_images.add(image); | |
specimens_images.put(specimenId, specimen_images); | |
} | |
for (Entry<Integer, List<Json>> images_group : specimens_images.entrySet()) { | |
int specimenId = images_group.getKey(); | |
List<Json> images_specimen = images_group.getValue(); | |
JsonObject data_specimen = getTropicos("Specimen/" + specimenId).getAsJsonObject(); | |
if (data_specimen == null) { | |
Fx.log("Tropicos error Specimen " + specimenId); | |
return null; | |
} | |
if (data_specimen.get("LatitudeDecDeg") != null && data_specimen.get("LongitudeDecDeg") != null && data_specimen.get("CollectionYear") != null) { | |
Json specimen = new Json("update", new Date()); | |
for (Json image : images_specimen) { | |
String imageid = BlobsUtils.downloadBlob( | |
image.getString("url"), | |
new Json() | |
.put("text", image.getString("caption") + " © " + image.getString("copyright")) | |
.put("mobot", image.getInteger("tId")) | |
, 2048); | |
if (imageid == null) { | |
Fx.log("Error download Tropicos Image " + image.getString("url") + " for " + nameId); | |
return null; | |
} | |
specimen.add("images", imageid); | |
} | |
specimen.put("species_name", species.getString("name")); | |
specimen.put("species", species.getId()); | |
specimen.put("family", species.getString("family")); | |
specimen.put("tId", specimenId); | |
specimen.put("author", data_specimen.get("CollectorString").getAsString()); | |
specimen.put("location", new Json("type", "Point").put("coordinates", Arrays.asList(data_specimen.get("LongitudeDecDeg").getAsDouble(), data_specimen.get("LatitudeDecDeg").getAsDouble()))); | |
Calendar cal = Calendar.getInstance(); | |
cal.set(Calendar.YEAR, data_specimen.get("CollectionYear").getAsInt()); | |
if (data_specimen.get("CollectionMonth") != null) { | |
cal.set(Calendar.MONTH, data_specimen.get("CollectionMonth").getAsInt() - 1); | |
} else { | |
cal.set(Calendar.MONTH, 0); | |
} | |
if (data_specimen.get("CollectionDay") != null) { | |
cal.set(Calendar.DAY_OF_MONTH, data_specimen.get("CollectionDay").getAsInt()); | |
} else { | |
cal.set(Calendar.DAY_OF_MONTH, 1); | |
} | |
specimen.put("date", cal.getTime()); | |
if (data_specimen.get("NoteDescription") != null) { | |
specimen.put("desc", Fx.normalize(data_specimen.get("NoteDescription").getAsString())); | |
} | |
specimens.add(specimen); | |
} | |
} | |
return specimens; | |
} | |
private List<String> getSynonym(int nameId) throws Exception { | |
List<String> synonyms = new ArrayList<>(); | |
JsonArray synonymstp = getTropicos("Name/" + nameId + "/Synonyms").getAsJsonArray(); | |
if (synonymstp == null) { | |
Fx.log("Tropicos error Synonyms " + nameId); | |
return null; | |
} | |
for (JsonElement nameIdObj : synonymstp) { | |
String name = nameIdObj.getAsJsonObject().get("SynonymName").getAsJsonObject().get("ScientificName").getAsString(); | |
if (!synonyms.contains(name)) { | |
synonyms.add(name); | |
} | |
} | |
return synonyms; | |
} | |
private List<String> getAccepted(int nameId) throws Exception { | |
List<String> accepted = new ArrayList<>(); | |
JsonArray acceptedstp = getTropicos("Name/" + nameId + "/AcceptedNames").getAsJsonArray(); | |
if (acceptedstp == null) { | |
Fx.log("Tropicos error AcceptedNames " + nameId); | |
return null; | |
} | |
for (JsonElement nameIdObj : acceptedstp) { | |
String name = nameIdObj.getAsJsonObject().get("AcceptedName").getAsJsonObject().get("ScientificName").getAsString(); | |
if (!accepted.contains(name)) { | |
accepted.add(name); | |
} | |
} | |
return accepted; | |
} | |
private JsonElement getTropicos(String type, String... parameters) { | |
try { | |
Thread.sleep(pause); | |
} catch (Exception e) { | |
} | |
if (service.isTerminated() || service.isShutdown()) { | |
return null; | |
} | |
CloseableHttpClient httpclient = null; | |
String url = "http://services.tropicos.org/" + type + "?"; | |
for (String parameter : parameters) { | |
url += parameter + "&"; | |
} | |
url += "apikey=" + tropicosApiKey + "&format=json"; | |
try { | |
httpclient = HttpClientBuilder.create().setDefaultRequestConfig(RequestConfig.custom() | |
.setConnectTimeout(timeout).build()).build(); | |
HttpGet request = new HttpGet(url); | |
CloseableHttpResponse response = httpclient.execute(request); | |
HttpEntity entity = response.getEntity(); | |
String res = EntityUtils.toString(entity); | |
response.close(); | |
return new JsonParser().parse(res); | |
} catch (ConnectTimeoutException e) { | |
Fx.log("Tropicos Timeout " + url); | |
return null; | |
} catch (Exception e) { | |
Fx.log("Tropicos error Url " + url); | |
e.printStackTrace(); | |
return null; | |
} finally { | |
try { | |
httpclient.close(); | |
} catch (Exception e) { | |
} | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment