alvations

## alchemy_call_limit.py
"""Query AlchemyAPI to determine number of API calls still available"""
# -*- coding: utf-8 -*-
import json
import requests

def get_api_key():
    # Load API key (40 HEX character key) from local file
    key = open('api_key.txt').readline().strip()
    return key

## nltk-intro.py
import nltk

text = """The Buddha, the Godhead, resides quite as comfortably in the circuits of a digital
computer or the gears of a cycle transmission as he does at the top of a mountain
or in the petals of a flower. To think otherwise is to demean the Buddha...which is
to demean oneself."""

# Used when tokenizing words
sentence_re = r'''(?x)      # set flag to allow verbose regexps
      ([A-Z])(\.[A-Z])+\.?  # abbreviations, e.g. U.S.A.

## bulba-parser.rb
# This script parses a dump of Bulbapedia's Pokémon pages into a JSON file
# with details about what Pokémon are obtainable in respective regions
# (specifically, the latest series of games set in a specific region).

require 'nokogiri'
require 'json'

# An XML dump of all of Bulbapedia's Pokémon pages is required to exist at
# this path. It can be generated using this special page:
#   http://bulbapedia.bulbagarden.net/wiki/Special:Export

## google_twunter_lol
easterEgg.BadWorder.list={
"4r5e":1,
"5h1t":1,
"5hit":1,
a55:1,
anal:1,
anus:1,
ar5e:1,
arrse:1,
arse:1,

## docx2md.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              0 stars
            
          
                alvations
                / docx2md.md
            
            
              Created
              May 4, 2017 09:07
                — forked from vdavez/docx2md.md
            
              
                Convert a Word Document into MD
              
          
    Converting a Word Document to Markdown in Two Moves

The Problem

A lot of important government documents are created and saved in Microsoft Word (*.docx). But Microsoft Word is a proprietary format, and it's not really useful for presenting documents on the web. So, I wanted to find a way to convert a .docx file into markdown.
The Solution

As it turns out, there are several open-source tools that allow for conversion between file types. Pandoc is one of them, and it's powerful. In fact, pandoc's website says "If you need to convert files from one markup format into another, pandoc is your swiss-army knife." But, although pandoc can convert from markdown into .docx, it doesn't work in the other direction.

  
## colors.py
class ColorPrinter:
    """
    Usage:
    cprint = ColorPrinter()
    cprint.cfg('c','m','bux').out('Hello','World!')
    cprint.rst().out('Bye now...')

    See: http://stackoverflow.com/a/21786287/472610
    See: https://en.wikipedia.org/wiki/ANSI_escape_code
    """

## colors.py
class ColorPrinter:
    """
    Usage:
    cprint = ColorPrinter()
    cprint.cfg('c','m','bux').out('Hello','World!')
    cprint.rst().out('Bye now...')

    See: http://stackoverflow.com/a/21786287/472610
    See: https://en.wikipedia.org/wiki/ANSI_escape_code
    """

## mini_sequence_labeler.py
"""
PyTorch implementation of a sequence labeler (POS taggger).

Basic architecture:
 - take words
 - run though bidirectional GRU
 - predict labels one word at a time (left to right), using a recurrent neural network "decoder"

The decoder updates hidden state based on:
 - most recent word

## dynet-tagger.py
"""
DyNet implementation of a sequence labeler (POS taggger).
This is a translation of this tagger in PyTorch: https://gist.github.com/hal3/8c170c4400576eb8d0a8bd94ab231232

Basic architecture:
 - take words
 - run though bidirectional GRU
 - predict labels one word at a time (left to right), using a recurrent neural network "decoder"
The decoder updates hidden state based on:
 - most recent word

## mean_target_encoding.py
import os
import os.path as op
from time import time
import dask.dataframe as ddf
import dask.array as da
from dask import delayed, compute
from distributed import Client


def make_categorical_data(n_samples=int(1e7), n_features=10):
	"""Query AlchemyAPI to determine number of API calls still available"""
	# -- coding: utf-8 --
	import json
	import requests

	def get_api_key():
	# Load API key (40 HEX character key) from local file
	key = open('api_key.txt').readline().strip()
	return key
	import nltk

	text = """The Buddha, the Godhead, resides quite as comfortably in the circuits of a digital
	computer or the gears of a cycle transmission as he does at the top of a mountain
	or in the petals of a flower. To think otherwise is to demean the Buddha...which is
	to demean oneself."""

	# Used when tokenizing words
	sentence_re = r'''(?x) # set flag to allow verbose regexps
	([A-Z])(\.[A-Z])+\.? # abbreviations, e.g. U.S.A.
	# This script parses a dump of Bulbapedia's Pokémon pages into a JSON file
	# with details about what Pokémon are obtainable in respective regions
	# (specifically, the latest series of games set in a specific region).

	require 'nokogiri'
	require 'json'

	# An XML dump of all of Bulbapedia's Pokémon pages is required to exist at
	# this path. It can be generated using this special page:
	# http://bulbapedia.bulbagarden.net/wiki/Special:Export
	easterEgg.BadWorder.list={
	"4r5e":1,
	"5h1t":1,
	"5hit":1,
	a55:1,
	anal:1,
	anus:1,
	ar5e:1,
	arrse:1,
	arse:1,
	class ColorPrinter:
	"""
	Usage:
	cprint = ColorPrinter()
	cprint.cfg('c','m','bux').out('Hello','World!')
	cprint.rst().out('Bye now...')

	See: http://stackoverflow.com/a/21786287/472610
	See: https://en.wikipedia.org/wiki/ANSI_escape_code
	"""
	"""
	PyTorch implementation of a sequence labeler (POS taggger).

	Basic architecture:
	- take words
	- run though bidirectional GRU
	- predict labels one word at a time (left to right), using a recurrent neural network "decoder"

	The decoder updates hidden state based on:
	- most recent word
	"""
	DyNet implementation of a sequence labeler (POS taggger).
	This is a translation of this tagger in PyTorch: https://gist.github.com/hal3/8c170c4400576eb8d0a8bd94ab231232

	Basic architecture:
	- take words
	- run though bidirectional GRU
	- predict labels one word at a time (left to right), using a recurrent neural network "decoder"
	The decoder updates hidden state based on:
	- most recent word
	import os
	import os.path as op
	from time import time
	import dask.dataframe as ddf
	import dask.array as da
	from dask import delayed, compute
	from distributed import Client


	def make_categorical_data(n_samples=int(1e7), n_features=10):