Hannes Datta hannesdatta

## hashes.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                hannesdatta
                / hashes.ipynb
            
            
              Created
              March 6, 2024 10:34
            
              
                Anonymizing usernames for web scraping projects
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## script.R
# use cases

# as a calculator
x + 1

# to assign variables
x = 5

# calculation w/ variables
x + 5

## commands.txt
R --vanilla < "filename.R" # you see output on screen
Rscript filename.R # no output, unless explicitly "print"-ed
R -e "unlink(*.*)" # executes one R command
R -e "rmarkdown::render('filename.Rmd', output_file='../paper/output/filename.pdf')"


## scraper.py
# FINAL CODE
import requests
from bs4 import BeautifulSoup

# Define the URL and user-agent header
url = 'https://www.coolblue.nl/tweedekans-product/2191236'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                  '(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}

## scripts.R
# Setup/initialization
library(tidyverse)

## Wipe any downloaded files before
unlink('*.zip')
unlink('*.csv')

## Download raw data
download.file('https://github.com/hannesdatta/course-dprep/raw/master/content/docs/tutorials/data-preparation/data_without_duplicates.zip', 'data.zip')

## exercises.Rmd
---
title: "dPrep Tutorial"
output: html_document
date: "2023-02-16"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## books_to_scrape.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                hannesdatta
                / books_to_scrape.ipynb
            
            
              Created
              September 20, 2022 13:58
            
              
                Getting product descriptions and unique product category links from books.toscrape.com
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## exercise_3.9.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                hannesdatta
                / exercise_3.9.ipynb
            
            
              Created
              September 2, 2022 11:24
            
              
                Solution to exercise 3.9 in my Python Bootcamp Tutorial (https://odcm.hannesdatta.com/docs/tutorials/pythonbootcamp/)
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## script.R
# This is the R Bootcamp - demo (written by Hannes)

1+1
cat("Hello!")

name <- 'Hannes'

dir.create('data')
dir.create('data_output')
dir.create('documents')

## scrape_reddit.py
# Setup

# Make selenium and chromedriver work for Untappd.com

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager

#driver = webdriver.Chrome()
driver = webdriver.Chrome(ChromeDriverManager().install())
	# use cases

	# as a calculator
	x + 1

	# to assign variables
	x = 5

	# calculation w/ variables
	x + 5
	R --vanilla < "filename.R" # you see output on screen
	Rscript filename.R # no output, unless explicitly "print"-ed
	R -e "unlink(.)" # executes one R command
	R -e "rmarkdown::render('filename.Rmd', output_file='../paper/output/filename.pdf')"
	# FINAL CODE
	import requests
	from bs4 import BeautifulSoup

	# Define the URL and user-agent header
	url = 'https://www.coolblue.nl/tweedekans-product/2191236'
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
	'(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
	}
	# Setup/initialization
	library(tidyverse)

	## Wipe any downloaded files before
	unlink('*.zip')
	unlink('*.csv')

	## Download raw data
	download.file('https://github.com/hannesdatta/course-dprep/raw/master/content/docs/tutorials/data-preparation/data_without_duplicates.zip', 'data.zip')
	---
	title: "dPrep Tutorial"
	output: html_document
	date: "2023-02-16"
	---

	```{r setup, include=FALSE}
	knitr::opts_chunk$set(echo = TRUE)
	```
	# This is the R Bootcamp - demo (written by Hannes)

	1+1
	cat("Hello!")

	name <- 'Hannes'

	dir.create('data')
	dir.create('data_output')
	dir.create('documents')
	# Setup

	# Make selenium and chromedriver work for Untappd.com

	from selenium import webdriver
	from selenium.webdriver.chrome.options import Options
	from webdriver_manager.chrome import ChromeDriverManager

	#driver = webdriver.Chrome()
	driver = webdriver.Chrome(ChromeDriverManager().install())