Skip to content

Instantly share code, notes, and snippets.

View lumpidu's full-sized avatar

Daniel lumpidu

View GitHub Profile
import argparse
import hashlib
import os
import urllib
import warnings
from collections import OrderedDict
import torch
from torch import nn
from tqdm import tqdm
@lumpidu
lumpidu / gist:f9d068146564f9aea94e42ed2c04f68d
Created May 4, 2023 17:40
Create embeddings from given LM
import torch
import transformers
import argparse
from pytictoc import TicToc
def load_model_and_tokenizer(model_name):
the_tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
tokenizer_model = transformers.AutoModel.from_pretrained(model_name)
return tokenizer_model, the_tokenizer
@lumpidu
lumpidu / android_ares_config.patch
Created February 25, 2019 11:55
Build gRPC for Android
46c46
< #define GETSERVBYPORT_R_ARGS 6
---
> #define GETSERVBYPORT_R_ARGS
131c131
< #define HAVE_GETSERVBYPORT_R
---
> /* #undef HAVE_GETSERVBYPORT_R */
46c46
< #define GETSERVBYPORT_R_ARGS 6
@lumpidu
lumpidu / gcc 5 on ubuntu 14.04
Created April 29, 2016 14:19 — forked from beci/gcc 5 on ubuntu 14.04
use gcc 5.x on ubuntu 14.04
sudo add-apt-repository ppa:ubuntu-toolchain-r/test
sudo apt-get update
sudo apt-get install gcc-5 g++-5
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 60 --slave /usr/bin/g++ g++ /usr/bin/g++-5
#!/usr/bin/ruby
require 'json'
require 'json-schema'
schema = '{
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "demo",
"type": "object",
"properties": {
#!/usr/bin/ruby
require 'json'
require 'json-schema'
schema = '{
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "demo",
"type": "object",
"properties": {
@lumpidu
lumpidu / gist:2768030
Created May 22, 2012 10:00
Post Mapping to ES
{
"posts" : {
"post" : {
"properties" : {
"converted_text" : {
"type" : "string"
},
"converted_text_size" : {
"type" : "long"
},
@lumpidu
lumpidu / Model.rb
Created May 22, 2012 08:08
Post Model
class Post < ActiveRecord::Base
include Tire::Model::Search
include Tire::Model::Callbacks
mapping do
indexes :id, :index => :not_analyzed
indexes :title, :analyzer => 'snowball', :boost => 3
indexes :url, :analyzer => 'url'
indexes :created_at, :type => 'date', :include_in_all => false
@lumpidu
lumpidu / gist:2710254
Created May 16, 2012 13:12
Search ES
# Searches ElasticSearch fulltext engine via Tire
#
# @param term Search term to be used
# It has to start with a prefix specifying the field(s) to be searched:
# e.g.
# _all:Apple
# converted_text:Apple
# field_summary:Apple, etc.
#
# Each Lucene search term can be used
@lumpidu
lumpidu / valid_utf8.rb
Created April 3, 2012 21:12
Test valid UTF-8
# Tests str for valid UTF-8 as described here:
# https://secure.wikimedia.org/wikipedia/en/wiki/Utf8
#
# The following options are supported:
#
# :bmp_only Basic Multilingual Plane: only 1,2 byte characters are valid
# :debug prints debug info to stdout in case of an error
#
def valid_utf8?(str, options={})
debug = options[:debug] || false