Skip to content

Instantly share code, notes, and snippets.

@pagetronic
Created May 16, 2018 16:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pagetronic/7264adfd217ebae2f86215936118164b to your computer and use it in GitHub Desktop.
Save pagetronic/7264adfd217ebae2f86215936118164b to your computer and use it in GitHub Desktop.
Import Tropicos Mobot data from Tropicos service Api
package gaia.mobot;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.mongodb.client.MongoCursor;
import com.mongodb.client.model.Accumulators;
import com.mongodb.client.model.Aggregates;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.IndexOptions;
import com.mongodb.client.model.Sorts;
import com.mongodb.client.model.UpdateOptions;
import live.page.base.blobstore.utils.BlobsUtils;
import live.page.base.db.Db;
import live.page.base.utils.Fx;
import live.page.base.utils.Json;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.ConnectTimeoutException;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.bson.conversions.Bson;
import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;
import javax.servlet.annotation.WebListener;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
/*
db.getCollection('Species').count() + '/' + db.getCollection('Specimens').count()
*/
@WebListener
public class TropicosApi implements ServletContextListener {
private String tropicosApiKey = "XXXXXXXXXXXXXXXXXXXXXXXXXXX";
private final int pause = 600;
private final int timeout = 20 * 1000;
private final ExecutorService service = Executors.newSingleThreadExecutor();
private final ScheduledExecutorService control = Executors.newSingleThreadScheduledExecutor();
private Future<?> ft = null;
@Override
public void contextInitialized(ServletContextEvent sce) {
if (Fx.IS_DEBUG) {
return;
}
control.scheduleAtFixedRate(() -> run(), Fx.IS_DEBUG ? 0 : 1, 5, TimeUnit.MINUTES);
}
private void run() {
Thread.currentThread().setName("tropicos-shuddle");
if (ft == null || ft.isCancelled() || ft.isDone()) {
ft = service.submit(() -> {
Fx.log("Tropicos Api started");
try {
updateNames();
} catch (Exception e) {
Fx.log("Tropicos error " + e.getMessage());
}
Fx.log("Tropicos Api stopped");
});
}
}
@Override
public void contextDestroyed(ServletContextEvent sce) {
control.shutdown();
service.shutdown();
try {
control.awaitTermination(2, TimeUnit.SECONDS);
service.awaitTermination(2, TimeUnit.SECONDS);
} catch (Exception e) {
e.printStackTrace();
}
service.shutdownNow();
control.shutdownNow();
}
private void updateNames() throws Exception {
Json lastspecies = Db.find("Species").sort(Sorts.descending("tId")).first();
int startid = 0;
if (lastspecies != null) {
startid = lastspecies.getInteger("tId") + 1;
}
JsonArray listNameid = getTropicos("Name/list", "startid=" + startid, "pagesize=100").getAsJsonArray();
if (listNameid == null) {
Fx.log("Tropicos error null listNameid startid=" + startid);
return;
}
if (!listNameid.get(0).getAsJsonObject().keySet().contains("Error")) {
for (JsonElement nameIdObj : listNameid) {
int nameid = nameIdObj.getAsJsonObject().get("NameId").getAsInt();
Json species = getSpecies(nameid);
if (species == null) {
return;
}
//!! = nom. cons., ! = Legitimate, ** = Invalid, *** = nom. rej., * = Illegitimate
String url = getSpeciesUrl(species.getString("name", "specy"), species.getString("sym", ""));
species.put("_id", url);
List<Json> specimens = new ArrayList<>();
if (!species.getString("rank", "").equals("") &&
!species.getString("rank", "").equals("genus") &&
!species.getString("rank", "").equals("family") &&
!species.getString("rank", "").equals("division")) {
List<Json> images = getImages(nameid);
if (images == null) {
return;
}
specimens = getSpecimens(nameid, species, images);
if (specimens == null) {
return;
}
}
if (service.isTerminated() || service.isShutdown()) {
return;
}
species.put("specimens", specimens.size());
Db.getDb("Species").insertOne(species);
if (!species.getString("family", "").equals("")) {
Db.updateOne("Families", Filters.eq("_id", species.getString("family", "").toLowerCase()),
new Json("$inc",
new Json("specimens", specimens.size()))
.put("$setOnInsert",
new Json("_id", species.getString("family", "").toLowerCase())
.put("name", species.getString("family", ""))
)
, new UpdateOptions().upsert(true)
);
}
if (specimens.size() > 0) {
Db.save("Specimens", specimens);
Fx.log("Tropicos : " + nameid + " / " + specimens.size() + " specimens");
} else if (Fx.IS_DEBUG) {
System.out.print(".");
}
}
updateNames();
} else {
Fx.log("Tropicos error listNameid startid=" + startid);
}
}
private String getSpeciesUrl(String name, String sym) {
String url = Fx.cleanURL(name).toLowerCase();
long count = 0;
if (!sym.equals("") && !sym.equals("!") && !sym.equals("!!")) {
count += 1;
}
while (Db.exists("Species", Filters.eq("_id", (count > 0) ? url + "-" + count : url))) {
count++;
}
if (count > 0) {
url += "-" + count;
}
return url;
}
private Json getSpecies(int nameId) throws Exception {
Json species = new Json();
JsonObject speciestp = getTropicos("Name/" + nameId).getAsJsonObject();
if (speciestp == null) {
Fx.log("Species error " + nameId);
return null;
}
if (speciestp.getAsJsonObject().keySet().contains("Error")) {
Fx.log("Species error " + nameId);
return null;
}
species.put("tId", speciestp.get("NameId").getAsInt());
species.put("name", speciestp.get("ScientificName").getAsString());
if (speciestp.get("Family") != null) {
species.put("family", speciestp.get("Family").getAsString());
}
if (speciestp.get("SynonymCount") != null) {
int synonym = Integer.valueOf(speciestp.get("SynonymCount").getAsString());
if (synonym > 0) {
List<String> synonyms = getSynonym(nameId);
if (synonyms == null) {
Fx.log("Synonym error " + nameId);
return null;
}
species.put("synonym", synonyms);
}
}
if (speciestp.get("AcceptedNameCount") != null) {
int accepted = Integer.valueOf(speciestp.get("AcceptedNameCount").getAsString());
if (accepted > 0) {
List<String> accepteds = getAccepted(nameId);
if (accepteds == null) {
Fx.log("AcceptedName error " + nameId);
return null;
}
species.put("accepted", accepteds);
}
}
if (speciestp.get("Rank") != null) {
species.put("rank", speciestp.get("Rank").getAsString());
}
if (speciestp.get("Author") != null) {
species.put("author", speciestp.get("Author").getAsString());
}
if (speciestp.get("Symbol") != null) {
species.put("sym", speciestp.get("Symbol").getAsString());
}
return species;
}
private List<Json> getImages(int nameId) throws Exception {
List<Json> images = new ArrayList<>();
JsonArray listImages = getTropicos("Name/" + nameId + "/Images").getAsJsonArray();
if (listImages == null) {
Fx.log("Tropicos error Images " + nameId);
return null;
}
if (!listImages.get(0).getAsJsonObject().keySet().contains("Error")) {
for (JsonElement imageObj : listImages) {
JsonObject imagetp = imageObj.getAsJsonObject();
if (imagetp.get("SpecimenId") != null &&
imagetp.get("ImageKindText") != null &&
StringUtils.containsIgnoreCase(imagetp.get("ImageKindText").getAsString(), "photo") &&
imagetp.get("DetailJpgUrl") != null &&
!StringUtils.containsIgnoreCase(imagetp.get("DetailJpgUrl").getAsString(), "imageprotected")) {
Json image = new Json();
image.put("tId", imagetp.get("ImageId").getAsInt());
image.put("stId", imagetp.get("SpecimenId").getAsInt());
image.put("copyright", imagetp.get("Copyright").getAsString() + " / Tropicos");
image.put("url", imagetp.get("DetailJpgUrl").getAsString());
image.put("caption", imagetp.get("Caption").getAsString());
image.put("kind", imagetp.get("ImageKindText").getAsString());
images.add(image);
}
}
} else if (!listImages.get(0).getAsJsonObject().get("Error").getAsString().equals("No records were found")) {
Fx.log("Tropicos error Images " + nameId);
return null;
}
return images;
}
private List<Json> getSpecimens(int nameId, Json species, List<Json> images) throws Exception {
List<Json> specimens = new ArrayList<>();
Map<Integer, List<Json>> specimens_images = new HashMap<>();
for (Json image : images) {
int specimenId = image.getInteger("stId");
List<Json> specimen_images = new ArrayList<>();
if (specimens_images.containsKey(specimenId)) {
specimen_images = specimens_images.get(specimenId);
}
specimen_images.add(image);
specimens_images.put(specimenId, specimen_images);
}
for (Entry<Integer, List<Json>> images_group : specimens_images.entrySet()) {
int specimenId = images_group.getKey();
List<Json> images_specimen = images_group.getValue();
JsonObject data_specimen = getTropicos("Specimen/" + specimenId).getAsJsonObject();
if (data_specimen == null) {
Fx.log("Tropicos error Specimen " + specimenId);
return null;
}
if (data_specimen.get("LatitudeDecDeg") != null && data_specimen.get("LongitudeDecDeg") != null && data_specimen.get("CollectionYear") != null) {
Json specimen = new Json("update", new Date());
for (Json image : images_specimen) {
String imageid = BlobsUtils.downloadBlob(
image.getString("url"),
new Json()
.put("text", image.getString("caption") + " © " + image.getString("copyright"))
.put("mobot", image.getInteger("tId"))
, 2048);
if (imageid == null) {
Fx.log("Error download Tropicos Image " + image.getString("url") + " for " + nameId);
return null;
}
specimen.add("images", imageid);
}
specimen.put("species_name", species.getString("name"));
specimen.put("species", species.getId());
specimen.put("family", species.getString("family"));
specimen.put("tId", specimenId);
specimen.put("author", data_specimen.get("CollectorString").getAsString());
specimen.put("location", new Json("type", "Point").put("coordinates", Arrays.asList(data_specimen.get("LongitudeDecDeg").getAsDouble(), data_specimen.get("LatitudeDecDeg").getAsDouble())));
Calendar cal = Calendar.getInstance();
cal.set(Calendar.YEAR, data_specimen.get("CollectionYear").getAsInt());
if (data_specimen.get("CollectionMonth") != null) {
cal.set(Calendar.MONTH, data_specimen.get("CollectionMonth").getAsInt() - 1);
} else {
cal.set(Calendar.MONTH, 0);
}
if (data_specimen.get("CollectionDay") != null) {
cal.set(Calendar.DAY_OF_MONTH, data_specimen.get("CollectionDay").getAsInt());
} else {
cal.set(Calendar.DAY_OF_MONTH, 1);
}
specimen.put("date", cal.getTime());
if (data_specimen.get("NoteDescription") != null) {
specimen.put("desc", Fx.normalize(data_specimen.get("NoteDescription").getAsString()));
}
specimens.add(specimen);
}
}
return specimens;
}
private List<String> getSynonym(int nameId) throws Exception {
List<String> synonyms = new ArrayList<>();
JsonArray synonymstp = getTropicos("Name/" + nameId + "/Synonyms").getAsJsonArray();
if (synonymstp == null) {
Fx.log("Tropicos error Synonyms " + nameId);
return null;
}
for (JsonElement nameIdObj : synonymstp) {
String name = nameIdObj.getAsJsonObject().get("SynonymName").getAsJsonObject().get("ScientificName").getAsString();
if (!synonyms.contains(name)) {
synonyms.add(name);
}
}
return synonyms;
}
private List<String> getAccepted(int nameId) throws Exception {
List<String> accepted = new ArrayList<>();
JsonArray acceptedstp = getTropicos("Name/" + nameId + "/AcceptedNames").getAsJsonArray();
if (acceptedstp == null) {
Fx.log("Tropicos error AcceptedNames " + nameId);
return null;
}
for (JsonElement nameIdObj : acceptedstp) {
String name = nameIdObj.getAsJsonObject().get("AcceptedName").getAsJsonObject().get("ScientificName").getAsString();
if (!accepted.contains(name)) {
accepted.add(name);
}
}
return accepted;
}
private JsonElement getTropicos(String type, String... parameters) {
try {
Thread.sleep(pause);
} catch (Exception e) {
}
if (service.isTerminated() || service.isShutdown()) {
return null;
}
CloseableHttpClient httpclient = null;
String url = "http://services.tropicos.org/" + type + "?";
for (String parameter : parameters) {
url += parameter + "&";
}
url += "apikey=" + tropicosApiKey + "&format=json";
try {
httpclient = HttpClientBuilder.create().setDefaultRequestConfig(RequestConfig.custom()
.setConnectTimeout(timeout).build()).build();
HttpGet request = new HttpGet(url);
CloseableHttpResponse response = httpclient.execute(request);
HttpEntity entity = response.getEntity();
String res = EntityUtils.toString(entity);
response.close();
return new JsonParser().parse(res);
} catch (ConnectTimeoutException e) {
Fx.log("Tropicos Timeout " + url);
return null;
} catch (Exception e) {
Fx.log("Tropicos error Url " + url);
e.printStackTrace();
return null;
} finally {
try {
httpclient.close();
} catch (Exception e) {
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment