Skip to content

Instantly share code, notes, and snippets.

@dimidd
dimidd / gist:7ce98d21640949954136
Created June 9, 2015 11:24
unzip all archives to their dirs
for f in `ls *.zip`; do unzip "$f" -d "${f%.zip}"; done
@dimidd
dimidd / vocabulary.rb
Created October 27, 2015 19:22 — forked from wktk/vocabulary.rb
Read word2vec binary file in Ruby
class Vocabulary
# @param [String] file_name Path to a word2vec vocabulary binary file
def initialize(file_name)
@file_name = file_name
@size = 0
@vocabulary = {}
@word_count = 0
read_file
end
@dimidd
dimidd / file1.py
Last active November 18, 2015 11:00 — forked from anonymous/file1.py
Pasted from IPython
import pymongo
cl = pymongo.MongoClient()
db = cl.ner-dict
db = cl['ner-dict']
c = db.dict
c.count
c.count()
qs = c.find()
#!/bin/zsh
#
# Highlight a given file and copy it as RTF.
#
# Simon Olofsson <simon@olofsson.de>
#
set -o errexit
set -o nounset
@dimidd
dimidd / rails_postgres_enum.rb
Created June 14, 2016 12:50 — forked from checkbutton/rails_postgres_enum.rb
Support for PostgreSQL enum types in Rails 4.2 (including schema dump)
module ActiveRecord
class SchemaDumper
def dump(stream)
header(stream)
extensions(stream)
enums(stream)
tables(stream)
trailer(stream)
stream
end
module Main where
import Syntax
import Parser
import Eval
import Pretty
import Counter
import Control.Monad
import Control.Monad.Trans
@dimidd
dimidd / dump_fixtures.rake
Last active November 24, 2016 19:50 — forked from ivanilves/dump_fixtures.rake
Dump fixtures from the current environment's database
# Source: http://blog.ivanilves.com/2016/rails-dump-fixtures-from-db/
# Usage:
# * rake db:fixtures:dump to dump all models.
# * rake db:fixtures:dump MODELS="user billing_account" to dump only User and BillingAccount models.
#
namespace :db do
namespace :fixtures do
#!/usr/bin/env ruby
require "bundler/setup"
require "shoryuken/cli"
environment_path = File.expand_path("config/environment.rb")
require environment_path if File.exist?(environment_path)
Rails.application.eager_load! if defined?(Rails)
import re
import spacy
def annotate(xml):
# xml matches the pattern above
if xml[1] == "/":
return xml[2:-1] + "-end"
{
"dataset_reader": {
"type": "conll2003",
"tag_label": "ner",
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
},
"token_characters": {