jlopezper/count_correct_csv.R

## count_correct_csv.R
# install.packages("devtools")
devtools::install_github("ropenspain/opendataes")
library(opendataes)


determine_number <- function(x) {
  check_read <- function(data) !all(names(data) %in% c('name', 'format', "URL"))

  has_url_col <- vapply(x[[2]], check_read, logical(1))
  number_of_reads  <- sum(has_url_col)


  number_of_reads
}


check_errors <- function(publisher) {

  print("Function started")
  res <- opendataes:::get_resp_paginated(url = paste0("http://datos.gob.es/apidata/catalog/dataset/publisher/",publisher), num_pages = 1000)
  print("Response obtained")
  # The results is a data_list
  datasets_can_read <- sapply(res$result$items, function(x) "csv" %in% opendataes:::determine_dataset_url(x))
  filtered_data <- res$result$items[datasets_can_read]

  # Here we obtain a vector with the URLs
  urls <- sapply(filtered_data, opendataes:::extract_endpath)

  # Apply cargar_Datos over all urls
  print("Reading data")
  all_data <- sapply(urls, function(x) {
    res <- determine_number(opendataes::cargar_datos(x))
    print(paste0(x, " : ", res))
    res
  })
  print("Total files read determined")

  # Count zeros
  count_zeros <- sapply(all_data, function(x) x != 0)
  return(sum(count_zeros) / length(count_zeros))
}


x <- check_errors(publisher = "L01280796")
#x <- check_errors(publisher = "L01480209")
x

#id_bcn <-  'l01080193-elecciones-al-parlamento-europeo-sobre-electores'
#id_mad <- "l01280796-sedes-oficinas-de-elaboracion-de-dni-y-pasaporte1"
#cargar_datos('l01080193-elecciones-al-parlamento-europeo-sobre-electores')
	# install.packages("devtools")
	devtools::install_github("ropenspain/opendataes")
	library(opendataes)


	determine_number <- function(x) {
	check_read <- function(data) !all(names(data) %in% c('name', 'format', "URL"))

	has_url_col <- vapply(x[[2]], check_read, logical(1))
	number_of_reads <- sum(has_url_col)


	number_of_reads
	}


	check_errors <- function(publisher) {

	print("Function started")
	res <- opendataes:::get_resp_paginated(url = paste0("http://datos.gob.es/apidata/catalog/dataset/publisher/",publisher), num_pages = 1000)
	print("Response obtained")
	# The results is a data_list
	datasets_can_read <- sapply(res$result$items, function(x) "csv" %in% opendataes:::determine_dataset_url(x))
	filtered_data <- res$result$items[datasets_can_read]

	# Here we obtain a vector with the URLs
	urls <- sapply(filtered_data, opendataes:::extract_endpath)

	# Apply cargar_Datos over all urls
	print("Reading data")
	all_data <- sapply(urls, function(x) {
	res <- determine_number(opendataes::cargar_datos(x))
	print(paste0(x, " : ", res))
	res
	})
	print("Total files read determined")

	# Count zeros
	count_zeros <- sapply(all_data, function(x) x != 0)
	return(sum(count_zeros) / length(count_zeros))
	}


	x <- check_errors(publisher = "L01280796")
	#x <- check_errors(publisher = "L01480209")
	x

	#id_bcn <- 'l01080193-elecciones-al-parlamento-europeo-sobre-electores'
	#id_mad <- "l01280796-sedes-oficinas-de-elaboracion-de-dni-y-pasaporte1"
	#cargar_datos('l01080193-elecciones-al-parlamento-europeo-sobre-electores')