Peter M. Landwehr pmlandwehr

## get_token_count_tfidf_df.py
def get_token_count_tfidf_df(texts, tokenizer=None, preprocessor=None, analyzer='word', ngram_range=(1,1)):
    """
    Take a list of texts, preprocess and tokenize them, and returns the counts and TF-IDF values for each feature.
    :param list|pd.Series texts: collection of texts
    :param tokenizer: tokenizer for the vectorizers. By default tries to load the punkt sentence tokenizer from NLTK.
    :param preprocessor: Preprocessor for texts. By default converts numbers to "<NUM>"
    :return pandas.DataFrame: DataFrame of sorted features, counts, and TF-IDFs.
    """
    from sklearn.feature_extraction.text import CountVectorizer
    from sklearn.feature_extraction.text import TfidfVectorizer

## custom.css
/*
Placeholder for custom user CSS
mainly to be overridden in profile/static/custom/custom.css
This will always be an empty file in IPython
*/

@import url('http://fonts.googleapis.com/css?family=Crimson+Text');
@import url('http://fonts.googleapis.com/css?family=Kameron');
@import url('http://fonts.googleapis.com/css?family=Lato:200');
@import url('http://fonts.googleapis.com/css?family=Lato:300');

## string-pad.fish
function string-pad --description 'Pad strings to a particular length using a character'

    set -l options
    set -a options "h/help"
    set -a options "l/left"
    set -a options "r/right"
    set -a options "m/max=!_validate_int"
    set -a options "n/count=!_validate_int"
    set -a options "c/char="
    argparse --name="string-pad" $options -- $argv

## string-pad.fish
function string-pad --description 'Pad strings to a particular length using a character'

    set -l options
    set -a options "h/help"
    set -a options "l/left"
    set -a options "r/right"
    set -a options "m/max=!_validate_int"
    set -a options "n/count=!_validate_int"
    set -a options "c/char="
    argparse --name="string-pad" $options -- $argv

## vanilla.yaml
{% set name = "package_name" %}
{% set version = "insert_real_version" %}
{% set bundle = "tar.gz" %}
{% set hash_type = "sha256" %}
{% set hash = "insert-real-hash" %}

package:
  name: {{ name|lower }}
  version: {{ version }}

## jsonlines2csv.py
import csv
import ujson as json
import gzip
import sys
from tqdm import tqdm


def validate_to_set(x):
    if x is None:
        return set()

## run_datascience_docker.sh
mkdir data_work

docker run -d -p 8888:8888 \
-e GEN_CERT=yes \
-v data_work:/home/jovyan/work \
jupyter/datascience-notebook start-notebook.sh \
--NotebookApp.password='sha1:74ba40f8a388:c913541b7ee99d15d5ed31d4226bf7838f83a50e'  # put hashed password here

## conda_jupyter_deploy.sh
#!/usr/bin/env bash

# Install any necessary packages with apt-get
sudo apt-get install -y curl wget

# Get installer
curl https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
--output miniconda_installer.sh

# Run installer

## cf_cloud_stats.py
from bs4 import BeautifulSoup
import numpy as np
import pandas
import requests
from tqdm import tqdm

TOTAL_CF_REPO_PAGES = 45

package_tags = list(chain.from_iterable([
    BeautifulSoup(requests.get('https://anaconda.org/conda-forge/repo',

## infodump_to_others.py
def mefi_strptime(x):
    from datetime import datetime
    try:
        return datetime.strptime(x, '%b %d %Y %I:%M:%S:%f%p')
    except ValueError:
        return datetime(1900, 1, 1, 0, 0, 0)


def int_with_filled_nans(x):
    import numpy as np
	def get_token_count_tfidf_df(texts, tokenizer=None, preprocessor=None, analyzer='word', ngram_range=(1,1)):
	"""
	Take a list of texts, preprocess and tokenize them, and returns the counts and TF-IDF values for each feature.
	:param list\|pd.Series texts: collection of texts
	:param tokenizer: tokenizer for the vectorizers. By default tries to load the punkt sentence tokenizer from NLTK.
	:param preprocessor: Preprocessor for texts. By default converts numbers to "<NUM>"
	:return pandas.DataFrame: DataFrame of sorted features, counts, and TF-IDFs.
	"""
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.feature_extraction.text import TfidfVectorizer
	/*
	Placeholder for custom user CSS
	mainly to be overridden in profile/static/custom/custom.css
	This will always be an empty file in IPython
	*/

	@import url('http://fonts.googleapis.com/css?family=Crimson+Text');
	@import url('http://fonts.googleapis.com/css?family=Kameron');
	@import url('http://fonts.googleapis.com/css?family=Lato:200');
	@import url('http://fonts.googleapis.com/css?family=Lato:300');
	function string-pad --description 'Pad strings to a particular length using a character'

	set -l options
	set -a options "h/help"
	set -a options "l/left"
	set -a options "r/right"
	set -a options "m/max=!_validate_int"
	set -a options "n/count=!_validate_int"
	set -a options "c/char="
	argparse --name="string-pad" $options -- $argv
	{% set name = "package_name" %}
	{% set version = "insert_real_version" %}
	{% set bundle = "tar.gz" %}
	{% set hash_type = "sha256" %}
	{% set hash = "insert-real-hash" %}

	package:
	name: {{ name\|lower }}
	version: {{ version }}
	import csv
	import ujson as json
	import gzip
	import sys
	from tqdm import tqdm


	def validate_to_set(x):
	if x is None:
	return set()
	mkdir data_work

	docker run -d -p 8888:8888 \
	-e GEN_CERT=yes \
	-v data_work:/home/jovyan/work \
	jupyter/datascience-notebook start-notebook.sh \
	--NotebookApp.password='sha1:74ba40f8a388:c913541b7ee99d15d5ed31d4226bf7838f83a50e' # put hashed password here
	#!/usr/bin/env bash

	# Install any necessary packages with apt-get
	sudo apt-get install -y curl wget

	# Get installer
	curl https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
	--output miniconda_installer.sh

	# Run installer
	from bs4 import BeautifulSoup
	import numpy as np
	import pandas
	import requests
	from tqdm import tqdm

	TOTAL_CF_REPO_PAGES = 45

	package_tags = list(chain.from_iterable([
	BeautifulSoup(requests.get('https://anaconda.org/conda-forge/repo',
	def mefi_strptime(x):
	from datetime import datetime
	try:
	return datetime.strptime(x, '%b %d %Y %I:%M:%S:%f%p')
	except ValueError:
	return datetime(1900, 1, 1, 0, 0, 0)


	def int_with_filled_nans(x):
	import numpy as np