Daniel Rodriguez danielfrg

## set_campaign_goal.py
#!/usr/bin/env python
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software

## data-types.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                danielfrg
                / data-types.ipynb
            
            
              Last active
              August 30, 2020 00:57
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## matplotlib.html
<!DOCTYPE html>
<html>
<head><meta charset="utf-8" />

<title>matplotlib</title><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.1.10/require.min.js"></script><link rel="stylesheet" href="https://unpkg.com/font-awesome@4.7.0/css/font-awesome.min.css" type="text/css" />


<style type="text/css">

## keybase.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                danielfrg
                / keybase.md
            
            
              Created
              February 1, 2020 00:20
            
          
    Keybase proof

I hereby claim:

I am danielfrg on github.
I am danielfrg (https://keybase.io/danielfrg) on keybase.
I have a public key ASDYKve9COIyFov3ozEHC6eHuRZFZqPQq8b1ezthy4hNVgo

To claim this, I am signing this object:

  
## rspark-test.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                danielfrg
                / rspark-test.ipynb
            
            
              Created
              April 23, 2018 21:47
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## dask-spacy.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                danielfrg
                / dask-spacy.ipynb
            
            
              Created
              November 13, 2015 23:39
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## nutch-to-tdf.py
import pandas as pd

_input = 'dump0'
_output = 'html0.tdf'

df = pd.DataFrame({'url': [], 'html': []})

df.to_csv(_output, sep='\t', index=None)

def append_tdf(urls, html):

## jython-pig.job
Register utils.py using jython as utils;

urls = LOAD 'INPUT_FILE' USING PigStorage('\t') AS (url:chararray);

query = FOREACH urls GENERATE utils.query(url) AS everything;

file = FOREACH query GENERATE FLATTEN(everything);

STORE file INTO 's3n://OUTPUT_DIR' USING PigStorage('\t');

## merge-files-hdfs-count-pipeline.py
import json
import luigi
import luigi.hdfs
import luigi.hadoop
import pandas as pd

import numpy
import pandas
luigi.hadoop.attach(numpy, pandas)

## clean-html-solr-pipeline.py
import re
import json
import luigi
import pandas as pd
from mysolr import Solr
from bs4 import BeautifulSoup


class InputText(luigi.ExternalTask):
	#!/usr/bin/env python
	# Copyright 2018 Google LLC
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# https://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	<!DOCTYPE html>
	<html>
	<head><meta charset="utf-8" />

	<title>matplotlib</title><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.1.10/require.min.js"></script><link rel="stylesheet" href="https://unpkg.com/font-awesome@4.7.0/css/font-awesome.min.css" type="text/css" />




	<style type="text/css">
	import pandas as pd

	_input = 'dump0'
	_output = 'html0.tdf'

	df = pd.DataFrame({'url': [], 'html': []})

	df.to_csv(_output, sep='\t', index=None)

	def append_tdf(urls, html):
	Register utils.py using jython as utils;

	urls = LOAD 'INPUT_FILE' USING PigStorage('\t') AS (url:chararray);

	query = FOREACH urls GENERATE utils.query(url) AS everything;

	file = FOREACH query GENERATE FLATTEN(everything);

	STORE file INTO 's3n://OUTPUT_DIR' USING PigStorage('\t');
	import json
	import luigi
	import luigi.hdfs
	import luigi.hadoop
	import pandas as pd

	import numpy
	import pandas
	luigi.hadoop.attach(numpy, pandas)
	import re
	import json
	import luigi
	import pandas as pd
	from mysolr import Solr
	from bs4 import BeautifulSoup


	class InputText(luigi.ExternalTask):