raymondben/sounds.R Secret

## sounds.R
## Many occurrences have associated media: sound, images, or video. GBIF provides access to these media files through its portal as well as the API. Here, we reconstruct the "soundscapes" of particular regions by compiling the bird and frog sounds from those regions.
## Ben Raymond, Peter Neish
## Last-Modified: <2015-09-04 10:40:47>

## change these paths to suit your needs
setwd("~/your/path/")
cache_directory="~/your/data/cache/path/"

## required packages
require(rgbif)
require(plyr)
require(R.cache)
require(sampling)
require(jsonlite)

## We use the `R.cache` package to cache our web calls to the GBIF API. This just means that, having run a certain query once, we can re-run it without needing an internet connection. Specify the cache directory and create a cached version of the `occ_search` and `name_usage` functions:
setCacheRootPath(cache_directory)
cached_occ_search=addMemoization(occ_search)
cached_name_usage=addMemoization(name_usage)


## Specify the taxon keys we are interested in: 212 (Aves; birds) and 952 (Anura; frogs and toads)
parentKeys=c(212,952)

## Our aim is to find the sounds associated with the taxa present in a region of interest.
## Ideally we could just search for occurrences in that region that also have media linked to the occurrence records.
## Unfortunately, though, most occurrence records *do not* have associated sound media.
## Our strategy instead is to find out which taxa are present in our region of interest, and then find sound files for those same taxa (regardless of location).
## These media will almost certainly have been recorded at locations other than our region of interest, but we assume that a given species makes the same sounds worldwide --- this might not necessarily be the case for all species!


## There aren't very many occurrences of birds and frogs with sound media, so the easiest thing is just to grab the lot and filter them later according to other criteria.
temp=cached_occ_search(taxonKey=parentKeys,mediatype='Sound',limit=30000)
## reformat this for convenience into a single data frame
all_sounds=list(data=ldply(temp,function(z)z$data))
all_sounds$media=unlist(sapply(temp,function(z)z$media),recursive=FALSE)

## define a number of regions of interest
all_regions=data.frame(region="Macquarie Island",lon_min=157.9,lon_max=159.9,lat_min=-55.1,lat_max=-54.1,stringsAsFactors=FALSE)
all_regions=rbind(all_regions,data.frame(region="Bavaria",lon_min=10.3,lon_max=13.6,lat_min=47.6,lat_max=50.6))
all_regions=rbind(all_regions,data.frame(region="Bavaria (summer)",lon_min=10.3,lon_max=13.6,lat_min=47.6,lat_max=50.6))
all_regions=rbind(all_regions,data.frame(region="Bavaria (winter)",lon_min=10.3,lon_max=13.6,lat_min=47.6,lat_max=50.6))
all_regions=rbind(all_regions,data.frame(region="Costa Rica",lon_min=-85.7,lon_max=-83,lat_min=8.3,lat_max=11.0))
all_regions=rbind(all_regions,data.frame(region="Ecuador (Podocarpus National Park)",lon_min=-79.1,lon_max=-78.8,lat_min=-4.5,lat_max=-4.0))
all_regions=rbind(all_regions,data.frame(region="Scotland (Loch Lomond)",lon_min=-4.8,lon_max=-4.2,lat_min=56,lat_max=56.5))
all_regions=rbind(all_regions,data.frame(region="South Africa (Kruger National Park)",lon_min=31,lon_max=32,lat_min=-25.4,lat_max=-22.4))
all_regions=rbind(all_regions,data.frame(region="Spain (Doñana National Park)",lon_min=-6.6,lon_max=-6.2,lat_min=36.8,lat_max=37.2))
all_regions=rbind(all_regions,data.frame(region="Sri Lanka",lon_min=79.4,lon_max=82.1,lat_min=5.8,lat_max=10))
all_regions=rbind(all_regions,data.frame(region="USA (Death Valley)",lon_min=-117.6,lon_max=-116.3,lat_min=35.6,lat_max=37.0))
all_regions=rbind(all_regions,data.frame(region="Adelaide Hills",lon_min=138.6,lon_max=138.9,lat_min=-35,lat_max=-34.9))
all_regions=rbind(all_regions,data.frame(region="Yukon Delta National Wildlife Refuge",lon_min=-167.4,lon_max=-160.2,lat_min=59.7,lat_max=63.5))
all_regions=rbind(all_regions,data.frame(region="Philippines",lon_min=118.7,lon_max=126.8,lat_min=6.6,lat_max=18.7))
all_regions=rbind(all_regions,data.frame(region="Sea of Okhotsk",lon_min=140.6,lon_max=156.6,lat_min=49.2,lat_max=59.3))

all_regions$region=as.character(all_regions$region)

## loop through each region in turn
for (roi in 1:nrow(all_regions)) {
    this_region=all_regions$region[roi]
    cat(sprintf("%s ...\n",this_region))
    this_lon=with(all_regions[roi,],c(lon_min,lon_max))
    this_lat=with(all_regions[roi,],c(lat_min,lat_max))

    ## Grab occurrence records (up to 5000 each of birds and frogs) in our region and find the list of unique taxa
    ## It would be nice to be able to request just the list of taxa that meet certain search criteria but the GBIF API does not support this.
    ## There is a risk that we won't get all the taxa present doing it this way (unless we download *all* matching occurrences, which seems excessive)

    do_by_season=grepl("(winter|summer)",this_region)
    if (!do_by_season) {
        this=ldply(cached_occ_search(taxonKey=parentKeys,decimalLongitude=paste(this_lon,collapse=","),decimalLatitude=paste(this_lat,collapse=","),limit=5000,return="data"),function(z)if (is.data.frame(z)) z else data.frame())
    } else {
        if (grepl("winter",this_region)) {
            ## assume northern-hemisphere seasons
            months_of_interest=c(12,1,2)
        } else {
            months_of_interest=c(6,7,8)
        }
        this=ldply(months_of_interest,function(z) ldply(cached_occ_search(taxonKey=parentKeys,decimalLongitude=paste(this_lon,collapse=","),decimalLatitude=paste(this_lat,collapse=","),month=z,limit=1500,return="data"),function(z)if (is.data.frame(z)) z else data.frame()))
        ## can check distribution of records by month and taxonomic class with
        ## table(this[,c("month","class")])
    }

    ## Now we intersect the list of taxa present in our region with those for which we have sound media
    taxa_with_media=intersect(this$taxonKey,all_sounds$data$taxonKey)
    this_data=subset(all_sounds$data,taxonKey %in% taxa_with_media)
    temp=all_sounds$media[all_sounds$data$taxonKey %in% taxa_with_media]
    this_data$audio_url=sapply(temp,function(z)as.character(z[[1]][[1]]$identifier))
    this_data$audio_reference=sapply(temp,function(z){ zz=z[[1]][[1]]$references; if (is.null(zz)) "" else zz })
    cat(sprintf("Found %d media items for %d species in parentKeys %s and region %s\n",nrow(this_data),length(unique(this_data$taxonKey)),paste(parentKeys,collapse=","),this_region))

    ## filter out some files that we don't want to use:
    ## many of the files from http://web.corral.tacc.utexas.edu have human voices audible, so avoid these
    idx=!grepl("tacc\\.utexas",this_data$audio_url)
    this_data=subset(this_data,idx)
    ## prions apparently make horrible noises that rather detract from the final result, so avoid these too
    this_data=subset(this_data,!grepl("Pachyptila",this_data$name))

    ## select one media item per taxon
    this_data=arrange(this_data,taxonKey)
    this_data=this_data[strata(this_data,"taxonKey",rep(1,length(unique(this_data$taxonKey))),"srswor")$ID_unit,]

    ## retrieve photos for each taxon
    temp2=cached_occ_search(taxonKey=this_data$taxonKey,mediatype = 'StillImage',return = "media", limit=1) ## max of one per taxon
    temp2=ldply(temp2,function(z)if(is.list(z)) as.data.frame(z[[1]][[1]],stringsAsFactors=FALSE) else data.frame())
    temp2=subset(temp2,select=c(".id","identifier","references"))
    names(temp2)=c("taxonKey","image_url","image_reference")
    temp2$image_reference[is.na(temp2$image_reference)]=""
    this_data=merge(this_data,temp2,by="taxonKey",all.x=TRUE)

    this_data=arrange(this_data,class,name) ## order by name within class

    ## add vernacular names
    this_data$vernacularName=""
    for (k in 1:nrow(this_data)) {
        temp=cached_name_usage(key=this_data$taxonKey[k],data="vernacularNames")
        if (!is.null(temp$data)) {
            counts=table(subset(temp$data,language=="eng")$vernacularName)
            if (length(counts)>0) {
                this_data$vernacularName[k]=names(which.max(counts))
            }
        }
    }

    ## write json file
    outfile=paste0(gsub(" ","_",this_region),".json")
    out_content=list(locality=this_region,lon=this_lon,lat=this_lat)
    temp=subset(this_data,class=="Aves")
    out_content$birds=list()
    if (nrow(temp)>0) {
        for (k in 1:nrow(temp)) {
            out_content$birds[[length(out_content$birds)+1]]=list(name=temp$name[k],vernacularName=temp$vernacularName[k],audio=temp$audio_url[k],audio_reference=temp$audio_reference[k],image=temp$image_url[k],image_reference=temp$image_reference[k])
        }
    }
    temp=subset(this_data,class=="Amphibia")
    out_content$frogs=list()
    if (nrow(temp)>0) {
        for (k in 1:nrow(temp)) {
            out_content$frogs[[length(out_content$frogs)+1]]=list(name=temp$name[k],vernacularName=temp$vernacularName[k],audio=temp$audio_url[k],audio_reference=temp$audio_reference[k],image=temp$image_url[k],image_reference=temp$image_reference[k])
        }
    }
    json=toJSON(out_content,auto_unbox=TRUE)
    write(json,file=outfile,append=FALSE)

    ## optionally write html file just for a quick test
    if (FALSE) {
        outfile=paste0(gsub(" ","_",this_region),".html")
        write("<!DOCTYPE html>\n<html>\n<body>\n",file=outfile,append=FALSE)
        write("<script>function play_all() { var a=document.getElementsByClassName(\"noisy\"); for (var i=0;i<a.length;i++) a[i].play(); }</script>",file=outfile,append=TRUE)
        write("<script>function pause_all() { var a=document.getElementsByClassName(\"noisy\"); for (var i=0;i<a.length;i++) a[i].pause(); }</script>",file=outfile,append=TRUE)
        write("<button type=\"button\" onclick=\"play_all()\">Play all</button> <button type=\"button\" onclick=\"pause_all()\">Pause all</button><br />",file=outfile,append=TRUE)
        last_class=""
        for (k in 1:nrow(this_data)) {
            if (! last_class==this_data$class[k]) {
                if (! last_class=="") write("</p>",file=outfile,append=TRUE)
                write(sprintf("<p><strong>%s</strong></p>",this_data$class[k]),file=outfile,append=TRUE)
            }
            last_class=this_data$class[k]
            write(sprintf("<p>%s <em>%s</em> ",this_data$vernacularName[k],this_data$name[k]),file=outfile,append=TRUE)
            if (FALSE) {##(!is.na(this_data$image[k])) {
                write(sprintf("<img width=\"100px\" src=\"%s\" />",this_data$image[k]),file=outfile,append=TRUE)
            }
            write(sprintf("<audio class=\"noisy\" controls loop autoplay><source src=\"%s\" type=\"audio/mpeg\">Your browser does not support the audio element.</audio></p>",this_data$audio[k]),file=outfile,append=TRUE)
        }
        write("</body>\n</html>",file=outfile,append=TRUE)
        ##browseURL(outfile)
    }
} ## end looping regions


## For the round 2 entry, we used a slightly different data file format.
## Combine and reformat the JSON files now
x=data.frame()
ff=dir(".",pattern = "^[A-Z].*.json$")
locs=list()
for (i in 1:length(ff)) {
    thisx=fromJSON(ff[i])
    this_region=thisx$locality
    this_season=""
    if (grepl("winter",this_region)) this_season="winter"
    if (grepl("summer",this_region)) this_season="summer"
    this_region_bare=sub(" \\((winter|summer)\\)","",this_region)

    if (length(thisx$birds)>0) {
        thisx$birds$region=this_region
        thisx$birds$season=this_season
        thisx$birds$type="bird"
    } else {
        thisx$birds=data.frame()
    }

    if (length(thisx$frogs)>0) {
        thisx$frogs$region=this_region
        thisx$frogs$season=this_season
        thisx$frogs$type="frog"
    } else {
        thisx$frogs=data.frame()
    }

    this_data=rbind(thisx$birds,thisx$frogs)

    if (i==1) {
        x=this_data
    } else {
        x=rbind(x,this_data)
    }
    if (!any(sapply(locs,function(z)z$name==this_region_bare))) {
        locs[[length(locs)+1]]=list(name=this_region_bare,bounds=list(c(thisx$lat[1],thisx$lon[1]),c(thisx$lat[2],thisx$lon[2])))
    }
}

## Now consolidate to one entry per taxon
## JSON formatting is a bit tedious here, to ensure proper array/string formatting (see jsonlite::unbox)
nu=unique(x$name)
jx=list()
for (ni in 1:length(nu)) {
    idx=x$name==nu[ni]
    this_region=sort(unique(x$region[idx]))
    this_season=rep("",length(this_region))

    this_season[grepl("winter",this_region)]="winter"
    this_season[grepl("summer",this_region)]="summer"

    this_region=sub(" \\((winter|summer)\\)","",this_region)

    temp=as.list(x[which(idx)[1],])
    temp$region=this_region
    temp$season=this_season
    ## convert to JSON now so we can control unboxing
    jx[[ni]]=toJSON(list(name=unbox(temp$name),vernacularName=unbox(temp$vernacularName),audio=unbox(temp$audio),audio_reference=unbox(temp$audio_reference),image=unbox(temp$image),image_reference=unbox(temp$image_reference),type=unbox(temp$type),region=temp$region,season=temp$season))
}

json=toJSON(list(localities=locs),auto_unbox=TRUE)
json=substr(json,1,nchar(json)-1) ## drop trailing }
jx=paste(jx,collapse=",")
json=paste0(json,",\"taxa\":[",jx,"]}")
write(json,file="soundscape2.json",append=FALSE)
	## Many occurrences have associated media: sound, images, or video. GBIF provides access to these media files through its portal as well as the API. Here, we reconstruct the "soundscapes" of particular regions by compiling the bird and frog sounds from those regions.
	## Ben Raymond, Peter Neish
	## Last-Modified: <2015-09-04 10:40:47>

	## change these paths to suit your needs
	setwd("~/your/path/")
	cache_directory="~/your/data/cache/path/"

	## required packages
	require(rgbif)
	require(plyr)
	require(R.cache)
	require(sampling)
	require(jsonlite)

	## We use the `R.cache` package to cache our web calls to the GBIF API. This just means that, having run a certain query once, we can re-run it without needing an internet connection. Specify the cache directory and create a cached version of the `occ_search` and `name_usage` functions:
	setCacheRootPath(cache_directory)
	cached_occ_search=addMemoization(occ_search)
	cached_name_usage=addMemoization(name_usage)


	## Specify the taxon keys we are interested in: 212 (Aves; birds) and 952 (Anura; frogs and toads)
	parentKeys=c(212,952)

	## Our aim is to find the sounds associated with the taxa present in a region of interest.
	## Ideally we could just search for occurrences in that region that also have media linked to the occurrence records.
	## Unfortunately, though, most occurrence records do not have associated sound media.
	## Our strategy instead is to find out which taxa are present in our region of interest, and then find sound files for those same taxa (regardless of location).
	## These media will almost certainly have been recorded at locations other than our region of interest, but we assume that a given species makes the same sounds worldwide --- this might not necessarily be the case for all species!


	## There aren't very many occurrences of birds and frogs with sound media, so the easiest thing is just to grab the lot and filter them later according to other criteria.
	temp=cached_occ_search(taxonKey=parentKeys,mediatype='Sound',limit=30000)
	## reformat this for convenience into a single data frame
	all_sounds=list(data=ldply(temp,function(z)z$data))
	all_sounds$media=unlist(sapply(temp,function(z)z$media),recursive=FALSE)

	## define a number of regions of interest
	all_regions=data.frame(region="Macquarie Island",lon_min=157.9,lon_max=159.9,lat_min=-55.1,lat_max=-54.1,stringsAsFactors=FALSE)
	all_regions=rbind(all_regions,data.frame(region="Bavaria",lon_min=10.3,lon_max=13.6,lat_min=47.6,lat_max=50.6))
	all_regions=rbind(all_regions,data.frame(region="Bavaria (summer)",lon_min=10.3,lon_max=13.6,lat_min=47.6,lat_max=50.6))
	all_regions=rbind(all_regions,data.frame(region="Bavaria (winter)",lon_min=10.3,lon_max=13.6,lat_min=47.6,lat_max=50.6))
	all_regions=rbind(all_regions,data.frame(region="Costa Rica",lon_min=-85.7,lon_max=-83,lat_min=8.3,lat_max=11.0))
	all_regions=rbind(all_regions,data.frame(region="Ecuador (Podocarpus National Park)",lon_min=-79.1,lon_max=-78.8,lat_min=-4.5,lat_max=-4.0))
	all_regions=rbind(all_regions,data.frame(region="Scotland (Loch Lomond)",lon_min=-4.8,lon_max=-4.2,lat_min=56,lat_max=56.5))
	all_regions=rbind(all_regions,data.frame(region="South Africa (Kruger National Park)",lon_min=31,lon_max=32,lat_min=-25.4,lat_max=-22.4))
	all_regions=rbind(all_regions,data.frame(region="Spain (Doñana National Park)",lon_min=-6.6,lon_max=-6.2,lat_min=36.8,lat_max=37.2))
	all_regions=rbind(all_regions,data.frame(region="Sri Lanka",lon_min=79.4,lon_max=82.1,lat_min=5.8,lat_max=10))
	all_regions=rbind(all_regions,data.frame(region="USA (Death Valley)",lon_min=-117.6,lon_max=-116.3,lat_min=35.6,lat_max=37.0))
	all_regions=rbind(all_regions,data.frame(region="Adelaide Hills",lon_min=138.6,lon_max=138.9,lat_min=-35,lat_max=-34.9))
	all_regions=rbind(all_regions,data.frame(region="Yukon Delta National Wildlife Refuge",lon_min=-167.4,lon_max=-160.2,lat_min=59.7,lat_max=63.5))
	all_regions=rbind(all_regions,data.frame(region="Philippines",lon_min=118.7,lon_max=126.8,lat_min=6.6,lat_max=18.7))
	all_regions=rbind(all_regions,data.frame(region="Sea of Okhotsk",lon_min=140.6,lon_max=156.6,lat_min=49.2,lat_max=59.3))

	all_regions$region=as.character(all_regions$region)

	## loop through each region in turn
	for (roi in 1:nrow(all_regions)) {
	this_region=all_regions$region[roi]
	cat(sprintf("%s ...\n",this_region))
	this_lon=with(all_regions[roi,],c(lon_min,lon_max))
	this_lat=with(all_regions[roi,],c(lat_min,lat_max))

	## Grab occurrence records (up to 5000 each of birds and frogs) in our region and find the list of unique taxa
	## It would be nice to be able to request just the list of taxa that meet certain search criteria but the GBIF API does not support this.
	## There is a risk that we won't get all the taxa present doing it this way (unless we download all matching occurrences, which seems excessive)

	do_by_season=grepl("(winter\|summer)",this_region)
	if (!do_by_season) {
	this=ldply(cached_occ_search(taxonKey=parentKeys,decimalLongitude=paste(this_lon,collapse=","),decimalLatitude=paste(this_lat,collapse=","),limit=5000,return="data"),function(z)if (is.data.frame(z)) z else data.frame())
	} else {
	if (grepl("winter",this_region)) {
	## assume northern-hemisphere seasons
	months_of_interest=c(12,1,2)
	} else {
	months_of_interest=c(6,7,8)
	}
	this=ldply(months_of_interest,function(z) ldply(cached_occ_search(taxonKey=parentKeys,decimalLongitude=paste(this_lon,collapse=","),decimalLatitude=paste(this_lat,collapse=","),month=z,limit=1500,return="data"),function(z)if (is.data.frame(z)) z else data.frame()))
	## can check distribution of records by month and taxonomic class with
	## table(this[,c("month","class")])
	}

	## Now we intersect the list of taxa present in our region with those for which we have sound media
	taxa_with_media=intersect(this$taxonKey,all_sounds$data$taxonKey)
	this_data=subset(all_sounds$data,taxonKey %in% taxa_with_media)
	temp=all_sounds$media[all_sounds$data$taxonKey %in% taxa_with_media]
	this_data$audio_url=sapply(temp,function(z)as.character(z[[1]][[1]]$identifier))
	this_data$audio_reference=sapply(temp,function(z){ zz=z[[1]][[1]]$references; if (is.null(zz)) "" else zz })
	cat(sprintf("Found %d media items for %d species in parentKeys %s and region %s\n",nrow(this_data),length(unique(this_data$taxonKey)),paste(parentKeys,collapse=","),this_region))

	## filter out some files that we don't want to use:
	## many of the files from http://web.corral.tacc.utexas.edu have human voices audible, so avoid these
	idx=!grepl("tacc\\.utexas",this_data$audio_url)
	this_data=subset(this_data,idx)
	## prions apparently make horrible noises that rather detract from the final result, so avoid these too
	this_data=subset(this_data,!grepl("Pachyptila",this_data$name))

	## select one media item per taxon
	this_data=arrange(this_data,taxonKey)
	this_data=this_data[strata(this_data,"taxonKey",rep(1,length(unique(this_data$taxonKey))),"srswor")$ID_unit,]

	## retrieve photos for each taxon
	temp2=cached_occ_search(taxonKey=this_data$taxonKey,mediatype = 'StillImage',return = "media", limit=1) ## max of one per taxon
	temp2=ldply(temp2,function(z)if(is.list(z)) as.data.frame(z[[1]][[1]],stringsAsFactors=FALSE) else data.frame())
	temp2=subset(temp2,select=c(".id","identifier","references"))
	names(temp2)=c("taxonKey","image_url","image_reference")
	temp2$image_reference[is.na(temp2$image_reference)]=""
	this_data=merge(this_data,temp2,by="taxonKey",all.x=TRUE)

	this_data=arrange(this_data,class,name) ## order by name within class

	## add vernacular names
	this_data$vernacularName=""
	for (k in 1:nrow(this_data)) {
	temp=cached_name_usage(key=this_data$taxonKey[k],data="vernacularNames")
	if (!is.null(temp$data)) {
	counts=table(subset(temp$data,language=="eng")$vernacularName)
	if (length(counts)>0) {
	this_data$vernacularName[k]=names(which.max(counts))
	}
	}
	}

	## write json file
	outfile=paste0(gsub(" ","_",this_region),".json")
	out_content=list(locality=this_region,lon=this_lon,lat=this_lat)
	temp=subset(this_data,class=="Aves")
	out_content$birds=list()
	if (nrow(temp)>0) {
	for (k in 1:nrow(temp)) {
	out_content$birds[[length(out_content$birds)+1]]=list(name=temp$name[k],vernacularName=temp$vernacularName[k],audio=temp$audio_url[k],audio_reference=temp$audio_reference[k],image=temp$image_url[k],image_reference=temp$image_reference[k])
	}
	}
	temp=subset(this_data,class=="Amphibia")
	out_content$frogs=list()
	if (nrow(temp)>0) {
	for (k in 1:nrow(temp)) {
	out_content$frogs[[length(out_content$frogs)+1]]=list(name=temp$name[k],vernacularName=temp$vernacularName[k],audio=temp$audio_url[k],audio_reference=temp$audio_reference[k],image=temp$image_url[k],image_reference=temp$image_reference[k])
	}
	}
	json=toJSON(out_content,auto_unbox=TRUE)
	write(json,file=outfile,append=FALSE)

	## optionally write html file just for a quick test
	if (FALSE) {
	outfile=paste0(gsub(" ","_",this_region),".html")
	write("<!DOCTYPE html>\n<html>\n<body>\n",file=outfile,append=FALSE)
	write("<script>function play_all() { var a=document.getElementsByClassName(\"noisy\"); for (var i=0;i<a.length;i++) a[i].play(); }</script>",file=outfile,append=TRUE)
	write("<script>function pause_all() { var a=document.getElementsByClassName(\"noisy\"); for (var i=0;i<a.length;i++) a[i].pause(); }</script>",file=outfile,append=TRUE)
	write("<button type=\"button\" onclick=\"play_all()\">Play all</button> <button type=\"button\" onclick=\"pause_all()\">Pause all</button><br />",file=outfile,append=TRUE)
	last_class=""
	for (k in 1:nrow(this_data)) {
	if (! last_class==this_data$class[k]) {
	if (! last_class=="") write("</p>",file=outfile,append=TRUE)
	write(sprintf("<p><strong>%s</strong></p>",this_data$class[k]),file=outfile,append=TRUE)
	}
	last_class=this_data$class[k]
	write(sprintf("<p>%s <em>%s</em> ",this_data$vernacularName[k],this_data$name[k]),file=outfile,append=TRUE)
	if (FALSE) {##(!is.na(this_data$image[k])) {
	write(sprintf("<img width=\"100px\" src=\"%s\" />",this_data$image[k]),file=outfile,append=TRUE)
	}
	write(sprintf("<audio class=\"noisy\" controls loop autoplay><source src=\"%s\" type=\"audio/mpeg\">Your browser does not support the audio element.</audio></p>",this_data$audio[k]),file=outfile,append=TRUE)
	}
	write("</body>\n</html>",file=outfile,append=TRUE)
	##browseURL(outfile)
	}
	} ## end looping regions


	## For the round 2 entry, we used a slightly different data file format.
	## Combine and reformat the JSON files now
	x=data.frame()
	ff=dir(".",pattern = "^[A-Z].*.json$")
	locs=list()
	for (i in 1:length(ff)) {
	thisx=fromJSON(ff[i])
	this_region=thisx$locality
	this_season=""
	if (grepl("winter",this_region)) this_season="winter"
	if (grepl("summer",this_region)) this_season="summer"
	this_region_bare=sub(" \\((winter\|summer)\\)","",this_region)

	if (length(thisx$birds)>0) {
	thisx$birds$region=this_region
	thisx$birds$season=this_season
	thisx$birds$type="bird"
	} else {
	thisx$birds=data.frame()
	}

	if (length(thisx$frogs)>0) {
	thisx$frogs$region=this_region
	thisx$frogs$season=this_season
	thisx$frogs$type="frog"
	} else {
	thisx$frogs=data.frame()
	}

	this_data=rbind(thisx$birds,thisx$frogs)

	if (i==1) {
	x=this_data
	} else {
	x=rbind(x,this_data)
	}
	if (!any(sapply(locs,function(z)z$name==this_region_bare))) {
	locs[[length(locs)+1]]=list(name=this_region_bare,bounds=list(c(thisx$lat[1],thisx$lon[1]),c(thisx$lat[2],thisx$lon[2])))
	}
	}

	## Now consolidate to one entry per taxon
	## JSON formatting is a bit tedious here, to ensure proper array/string formatting (see jsonlite::unbox)
	nu=unique(x$name)
	jx=list()
	for (ni in 1:length(nu)) {
	idx=x$name==nu[ni]
	this_region=sort(unique(x$region[idx]))
	this_season=rep("",length(this_region))

	this_season[grepl("winter",this_region)]="winter"
	this_season[grepl("summer",this_region)]="summer"

	this_region=sub(" \\((winter\|summer)\\)","",this_region)

	temp=as.list(x[which(idx)[1],])
	temp$region=this_region
	temp$season=this_season
	## convert to JSON now so we can control unboxing
	jx[[ni]]=toJSON(list(name=unbox(temp$name),vernacularName=unbox(temp$vernacularName),audio=unbox(temp$audio),audio_reference=unbox(temp$audio_reference),image=unbox(temp$image),image_reference=unbox(temp$image_reference),type=unbox(temp$type),region=temp$region,season=temp$season))
	}

	json=toJSON(list(localities=locs),auto_unbox=TRUE)
	json=substr(json,1,nchar(json)-1) ## drop trailing }
	jx=paste(jx,collapse=",")
	json=paste0(json,",\"taxa\":[",jx,"]}")
	write(json,file="soundscape2.json",append=FALSE)