erikgregorywebb/clinicaltrials-xml.R

## clinicaltrials-xml.R
library(tidyverse)
library(rvest)
library(xml2)
library(XML)

# extract list of xml files
setwd("~/Fiverr/clinicaltrials/NCT0247xxxx")
xml_files = list.files()

# loop over files, extracting content
datalist = list()
n = 1
for (i in 1:length(xml_files)) {
  study = xml2::read_xml(xml_files[i])
  all_paths = study %>% xml_find_all( '//*') %>% xml_path()
  investigator_paths = paths[str_detect(paths, 'investigator')]
  investigator_paths = paste('/', investigator_paths, sep = '')
  for (j in 1:length(investigator_paths)) {
    try({
      value = xml_find_all(study, investigator_paths[j]) %>% html_text()
      #print(paste(xml_files[i], investigator_paths[j], value), sep = ' ')
      datalist[[n]] = tibble(xml_file = xml_files[i], investigator_path = investigator_paths[j], value)
      n = n + 1
    })
  }
}
raw = do.call(rbind, datalist)
	library(tidyverse)
	library(rvest)
	library(xml2)
	library(XML)

	# extract list of xml files
	setwd("~/Fiverr/clinicaltrials/NCT0247xxxx")
	xml_files = list.files()

	# loop over files, extracting content
	datalist = list()
	n = 1
	for (i in 1:length(xml_files)) {
	study = xml2::read_xml(xml_files[i])
	all_paths = study %>% xml_find_all( '//*') %>% xml_path()
	investigator_paths = paths[str_detect(paths, 'investigator')]
	investigator_paths = paste('/', investigator_paths, sep = '')
	for (j in 1:length(investigator_paths)) {
	try({
	value = xml_find_all(study, investigator_paths[j]) %>% html_text()
	#print(paste(xml_files[i], investigator_paths[j], value), sep = ' ')
	datalist[[n]] = tibble(xml_file = xml_files[i], investigator_path = investigator_paths[j], value)
	n = n + 1
	})
	}
	}
	raw = do.call(rbind, datalist)