Shawn Graham shawngraham

## now, extract the lat,long
import json
with open('ottawadata.json') as f:
    data = json.load(f)

....now, what cunning piece of code would do the trick? with jqplay I can get eg latitude with

.content.indexedStructured.geoLocation[]|.points[].latitude.content

anyway... off to read some basic python stuff I guess.

## topic-model-john-adams-diaries.r
# Topic Modeling John Adams' Diaries
# slightly modified version of
# https://tm4ss.github.io/docs/Tutorial_6_Topic_Models.html
# by Andreas Niekler, Gregor Wiedemann

library(tidyverse)
library(tidytext)

# go get the diaries
# these were scraped from

## johnadams.csv

          
            id
            date
            text

            
              1
              1753-06-08
              At Colledge. A Clowdy ; Dull morning and so continued till about 5 a Clock when it began to rain ; moderately But continued not long But remained Clowdy all night in which night I watched with Powers.

            
              2
              1753-06-09
              At Colledge the weather still remaining Clowdy all Day till 6 o'Clock when the Clowds were Dissipated and the sun brake forth in all his glory.

            
              3
              1753-06-10
              At Colledge a clear morning. Heard Mr.  Appleton expound those words in I.Cor.12 Chapt. 7 first verses and in  the afternoon heard him preach from those words in 26 of Mathew 41 verse watch  and pray that ye enter not into temptation.

            
              4
              1753-06-11
              At Colledge a fair morning and pretty warm.  About 2 o'Clock there appeared some symptoms of an approaching shower attended  with some thunder and lightning.

            
              5
              1753-06-12
              At Colledge a Clowdy  morning heard Dr. Wigglesworth Preach from the 20 Chapter of  exodus 8 9 and 10th. Verses.

            
              6
              1753-06-13
              At Colledge a Cloudy morning about 10 o'Clock  the Sun shone out very warm but abo

## John-Adams-Diaries.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                shawngraham
                / John-Adams-Diaries.ipynb
            
            
              Created
              November 17, 2019 21:31
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## text-analysis-and-topic-model-from-scraping-one-set-of-diaries.r
#let's fix the first column in scrape
#i want to remove the first three characters, leaving us with a date
#or at least something that looks like a date

#this removes the diary metadata from the date
scrape$id <- substring(scrape$id, 4)

#this creates a new column with just the month extracted
month <- str_sub(scrape$id, 5, 6)
scrape['month'] <- month

## topic-model-from-one-diary-scrape.r
#let's fix the first column in scrape
#i want to remove the first three characters, leaving us with a date
#or at least something that looks like a date


scrape$id <- substring(scrape$id, 4)

library(tm)


## scraping-one-set-of-diaries.r

library("rvest")
library(dplyr)

#https://francojc.github.io/2017/11/02/acquiring-data-for-language-research-web-scraping/
#modified

webpage <- "https://www.masshist.org/digitaladams/archive/browse/diaries_by_date.php"

html <- read_html(webpage) # read the raw html

## diaries-to-topicmodels.r
setwd("~/diaries")
library(tm)


#turn entries into a corpus object
docs <- Corpus(VectorSource(entries))
docs <- tm_map(docs, removePunctuation)
#Transform to lower case
docs <- tm_map(docs,content_transformer(tolower))
#Strip digits

## diary-scraper.r
#after https://francojc.github.io/2015/03/01/web-scraping-with-rvest-in-r/

library(rvest)
library(dplyr)


base_url <- "https://www.masshist.org"
# Load the page
main.page <- read_html(x = "https://www.masshist.org/digitaladams/archive/browse/diaries_by_date.php")
# Get link URLs

## diary-scrape.r
library(rvest)

base_url <- "https://www.masshist.org"
# Load the page
main.page <- read_html(x = "https://www.masshist.org/digitaladams/archive/browse/diaries_by_date.php")
# Get link URLs
urls <- main.page %>% # feed `main.page` to the next step
  html_nodes("a") %>% # get the CSS nodes
  html_attr("href") # extract the URLs
# Get link text
	import json
	with open('ottawadata.json') as f:
	data = json.load(f)

	....now, what cunning piece of code would do the trick? with jqplay I can get eg latitude with

	.content.indexedStructured.geoLocation[]\|.points[].latitude.content

	anyway... off to read some basic python stuff I guess.
	# Topic Modeling John Adams' Diaries
	# slightly modified version of
	# https://tm4ss.github.io/docs/Tutorial_6_Topic_Models.html
	# by Andreas Niekler, Gregor Wiedemann

	library(tidyverse)
	library(tidytext)

	# go get the diaries
	# these were scraped from
id	date	text
1	1753-06-08	At Colledge. A Clowdy ; Dull morning and so continued till about 5 a Clock when it began to rain ; moderately But continued not long But remained Clowdy all night in which night I watched with Powers.
2	1753-06-09	At Colledge the weather still remaining Clowdy all Day till 6 o'Clock when the Clowds were Dissipated and the sun brake forth in all his glory.
3	1753-06-10	At Colledge a clear morning. Heard Mr. Appleton expound those words in I.Cor.12 Chapt. 7 first verses and in the afternoon heard him preach from those words in 26 of Mathew 41 verse watch and pray that ye enter not into temptation.
4	1753-06-11	At Colledge a fair morning and pretty warm. About 2 o'Clock there appeared some symptoms of an approaching shower attended with some thunder and lightning.
5	1753-06-12	At Colledge a Clowdy morning heard Dr. Wigglesworth Preach from the 20 Chapter of exodus 8 9 and 10th. Verses.
6	1753-06-13	At Colledge a Cloudy morning about 10 o'Clock the Sun shone out very warm but abo
	#let's fix the first column in scrape
	#i want to remove the first three characters, leaving us with a date
	#or at least something that looks like a date

	#this removes the diary metadata from the date
	scrape$id <- substring(scrape$id, 4)

	#this creates a new column with just the month extracted
	month <- str_sub(scrape$id, 5, 6)
	scrape['month'] <- month

	library("rvest")
	library(dplyr)

	#https://francojc.github.io/2017/11/02/acquiring-data-for-language-research-web-scraping/
	#modified

	webpage <- "https://www.masshist.org/digitaladams/archive/browse/diaries_by_date.php"

	html <- read_html(webpage) # read the raw html
	setwd("~/diaries")
	library(tm)


	#turn entries into a corpus object
	docs <- Corpus(VectorSource(entries))
	docs <- tm_map(docs, removePunctuation)
	#Transform to lower case
	docs <- tm_map(docs,content_transformer(tolower))
	#Strip digits
	#after https://francojc.github.io/2015/03/01/web-scraping-with-rvest-in-r/

	library(rvest)
	library(dplyr)


	base_url <- "https://www.masshist.org"
	# Load the page
	main.page <- read_html(x = "https://www.masshist.org/digitaladams/archive/browse/diaries_by_date.php")
	# Get link URLs