boB Rudis hrbrmstr

## 2021-10-fincen-ransomware.csv

          
            year
            ct
            amt
            kind

            
              2011
              0
              0
              Filing Date

            
              2011
              2
              10
              Transaction Date

            
              2012
              0
              0
              Filing Date

            
              2012
              8
              0.4
              Transaction Date

            
              2013
              9
              0.4
              Filing Date

            
              2013
              24
              35
              Transaction Date

            
              2014
              19
              2.5
              Filing Date

            
              2014
              38
              11
              Transaction Date

            
              2015
              60
              54
              Filing Date

## dockerfile.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                hrbrmstr
                / dockerfile.md
            
            
              Created
              December 12, 2020 12:44
                — forked from kevwan/dockerfile.md
            
              
                The simplest way to write Dockerfile!
              
          
    1. choose a simple linux image

For example alpine, it's only about 5MB.
2. set timezone if necessary

RUN apk add --no-cache tzdata
ENV TZ America/New_York

  
## xpath-cheatsheet.js
// XPath CheatSheet
// To test XPath in your Chrome Debugger: $x('/html/body')
// http://www.jittuu.com/2012/2/14/Testing-XPath-In-Chrome/


// 0. XPath Examples.
// More: http://xpath.alephzarro.com/content/cheatsheet.html


'//hr[@class="edge" and position()=1]'                // every first hr of 'edge' class

## scrape.R

get_page <- function(page_num = 1) {

  # this is to be kind to the web site
  # it does not have a robots.txt so this should be the default wait
  # time between requests since the desires of the scraper are not
  # greater than that of the site owner and you'd be abusing
  # their resources if you did not put a delay in between requests
  Sys.sleep(5)


## ex.sql


;
WITH CTE AS
(
  SELECT
    *,
    ROW_NUMBER() OVER (PARTITION BY [product_id], shop_code
  ORDER BY
    [doc_date]) - ROW_NUMBER() OVER (PARTITION BY [product_id], shop_code, mark_1

## forso.R
library(rvest)
library(dplyr)

pg <- read_html("https://bidplus.gem.gov.in/bidresultlists")

blocks <- html_nodes(pg, ".block")

items_and_quantity <- html_nodes(blocks, xpath=".//div[@class='col-block' and contains(., 'Item(s)')]")

items <- html_nodes(items_and_quantity, xpath=".//strong[contains(., 'Item(s)')]/following-sibling::span") %>% html_text(trim=TRUE)

## _README.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                hrbrmstr
                / _README.md
            
            
              Last active
              October 17, 2018 16:15
            
              
                really pathetic child text tag extraction
              
          
    Since cognitive processes are challenging for Python programmers, this is pretty much what the (not great idiom in) the appropriately-acronym'd BS 4 text => get_text() does (https://github.com/wention/BeautifulSoup4/blob/03a2b3a9d1fc5877212d9d382a512663f24c887d/bs4/element.py#L846-L854). There are FAR BETTER WAYS to get text than this, but I would not expect Python things to grok that.

  
## power-mining.R
library(httr)
library(rvest)
library(readxl)
library(tidyverse)

doe <- read_html("https://www.oe.netl.doe.gov/OE417_annual_summary.aspx")

dir.create("~/Data/doe-cache-dir", showWarnings = FALSE)

html_nodes(doe, xpath=".//a[contains(., 'XLS')]") %>%

## gpg-agent.conf
launchctl unload -w -S Aqua /System/Library/LaunchAgents/gpg.agent.daemon.plist
launchctl load -w -S Aqua /System/Library/LaunchAgents/gpg.agent.daemon.plist

## mikrotik-coinhive-asn-aso.csv

          
            asn
            org
            iso3c
            n

            
              AS262661
              Linknet Telecomunicaçoes
              BRA
              2771

            
              AS262988
              Pombonet Telecomunicações e Informática
              BRA
              2439

            
              AS262296
              Windx Networks
              BRA
              2382

            
              AS52909
              Vox Telecomunicações do Brasil Ltda
              BRA
              1518

            
              AS263030
              CNET Provedor de Internet Ltda ME
              BRA
              1492

            
              AS263468
              Rapnet Comunicacao Multimidia Ltda
              BRA
              1460

            
              AS262579
              GE Network Provedor de Internet LTDA
              BRA
              1455

            
              AS264479
              Turbozone Internet
              BRA
              1450

            
              AS263991
              Fernanda Cristina Ruiz Matiazzo - Me
              BRA
              1445
year	ct	amt	kind
2011	0	0	Filing Date
2011	2	10	Transaction Date
2012	0	0	Filing Date
2012	8	0.4	Transaction Date
2013	9	0.4	Filing Date
2013	24	35	Transaction Date
2014	19	2.5	Filing Date
2014	38	11	Transaction Date
2015	60	54	Filing Date
	// XPath CheatSheet
	// To test XPath in your Chrome Debugger: $x('/html/body')
	// http://www.jittuu.com/2012/2/14/Testing-XPath-In-Chrome/


	// 0. XPath Examples.
	// More: http://xpath.alephzarro.com/content/cheatsheet.html


	'//hr[@class="edge" and position()=1]' // every first hr of 'edge' class

	get_page <- function(page_num = 1) {

	# this is to be kind to the web site
	# it does not have a robots.txt so this should be the default wait
	# time between requests since the desires of the scraper are not
	# greater than that of the site owner and you'd be abusing
	# their resources if you did not put a delay in between requests
	Sys.sleep(5)


	;
	WITH CTE AS
	(
	SELECT
	*,
	ROW_NUMBER() OVER (PARTITION BY [product_id], shop_code
	ORDER BY
	[doc_date]) - ROW_NUMBER() OVER (PARTITION BY [product_id], shop_code, mark_1
	library(rvest)
	library(dplyr)

	pg <- read_html("https://bidplus.gem.gov.in/bidresultlists")

	blocks <- html_nodes(pg, ".block")

	items_and_quantity <- html_nodes(blocks, xpath=".//div[@class='col-block' and contains(., 'Item(s)')]")

	items <- html_nodes(items_and_quantity, xpath=".//strong[contains(., 'Item(s)')]/following-sibling::span") %>% html_text(trim=TRUE)
	library(httr)
	library(rvest)
	library(readxl)
	library(tidyverse)

	doe <- read_html("https://www.oe.netl.doe.gov/OE417_annual_summary.aspx")

	dir.create("~/Data/doe-cache-dir", showWarnings = FALSE)

	html_nodes(doe, xpath=".//a[contains(., 'XLS')]") %>%
	launchctl unload -w -S Aqua /System/Library/LaunchAgents/gpg.agent.daemon.plist
	launchctl load -w -S Aqua /System/Library/LaunchAgents/gpg.agent.daemon.plist
asn	org	iso3c	n
AS262661	Linknet Telecomunicaçoes	BRA	2771
AS262988	Pombonet Telecomunicações e Informática	BRA	2439
AS262296	Windx Networks	BRA	2382
AS52909	Vox Telecomunicações do Brasil Ltda	BRA	1518
AS263030	CNET Provedor de Internet Ltda ME	BRA	1492
AS263468	Rapnet Comunicacao Multimidia Ltda	BRA	1460
AS262579	GE Network Provedor de Internet LTDA	BRA	1455
AS264479	Turbozone Internet	BRA	1450
AS263991	Fernanda Cristina Ruiz Matiazzo - Me	BRA	1445