This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_token_count_tfidf_df(texts, tokenizer=None, preprocessor=None, analyzer='word', ngram_range=(1,1)): | |
""" | |
Take a list of texts, preprocess and tokenize them, and returns the counts and TF-IDF values for each feature. | |
:param list|pd.Series texts: collection of texts | |
:param tokenizer: tokenizer for the vectorizers. By default tries to load the punkt sentence tokenizer from NLTK. | |
:param preprocessor: Preprocessor for texts. By default converts numbers to "<NUM>" | |
:return pandas.DataFrame: DataFrame of sorted features, counts, and TF-IDFs. | |
""" | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.feature_extraction.text import TfidfVectorizer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Placeholder for custom user CSS | |
mainly to be overridden in profile/static/custom/custom.css | |
This will always be an empty file in IPython | |
*/ | |
@import url('http://fonts.googleapis.com/css?family=Crimson+Text'); | |
@import url('http://fonts.googleapis.com/css?family=Kameron'); | |
@import url('http://fonts.googleapis.com/css?family=Lato:200'); | |
@import url('http://fonts.googleapis.com/css?family=Lato:300'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function string-pad --description 'Pad strings to a particular length using a character' | |
set -l options | |
set -a options "h/help" | |
set -a options "l/left" | |
set -a options "r/right" | |
set -a options "m/max=!_validate_int" | |
set -a options "n/count=!_validate_int" | |
set -a options "c/char=" | |
argparse --name="string-pad" $options -- $argv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function string-pad --description 'Pad strings to a particular length using a character' | |
set -l options | |
set -a options "h/help" | |
set -a options "l/left" | |
set -a options "r/right" | |
set -a options "m/max=!_validate_int" | |
set -a options "n/count=!_validate_int" | |
set -a options "c/char=" | |
argparse --name="string-pad" $options -- $argv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{% set name = "package_name" %} | |
{% set version = "insert_real_version" %} | |
{% set bundle = "tar.gz" %} | |
{% set hash_type = "sha256" %} | |
{% set hash = "insert-real-hash" %} | |
package: | |
name: {{ name|lower }} | |
version: {{ version }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import ujson as json | |
import gzip | |
import sys | |
from tqdm import tqdm | |
def validate_to_set(x): | |
if x is None: | |
return set() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mkdir data_work | |
docker run -d -p 8888:8888 \ | |
-e GEN_CERT=yes \ | |
-v data_work:/home/jovyan/work \ | |
jupyter/datascience-notebook start-notebook.sh \ | |
--NotebookApp.password='sha1:74ba40f8a388:c913541b7ee99d15d5ed31d4226bf7838f83a50e' # put hashed password here |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Install any necessary packages with apt-get | |
sudo apt-get install -y curl wget | |
# Get installer | |
curl https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ | |
--output miniconda_installer.sh | |
# Run installer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import numpy as np | |
import pandas | |
import requests | |
from tqdm import tqdm | |
TOTAL_CF_REPO_PAGES = 45 | |
package_tags = list(chain.from_iterable([ | |
BeautifulSoup(requests.get('https://anaconda.org/conda-forge/repo', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def mefi_strptime(x): | |
from datetime import datetime | |
try: | |
return datetime.strptime(x, '%b %d %Y %I:%M:%S:%f%p') | |
except ValueError: | |
return datetime(1900, 1, 1, 0, 0, 0) | |
def int_with_filled_nans(x): | |
import numpy as np |
NewerOlder