Skip to content

Instantly share code, notes, and snippets.

View allanj's full-sized avatar
🎯
Focusing

Allan Jie allanj

🎯
Focusing
View GitHub Profile
@allanj
allanj / DependencyTree.java
Created October 15, 2017 06:23
Dependency Tree class for StatNLP framework
package org.statnlp.example.nerelation.struct;
import org.statnlp.commons.types.Sentence;
import gnu.trove.list.TIntList;
import gnu.trove.list.array.TIntArrayList;
import gnu.trove.map.TIntObjectMap;
import gnu.trove.map.hash.TIntObjectHashMap;
import gnu.trove.stack.TIntStack;
@allanj
allanj / memory.lua
Created October 24, 2017 08:10 — forked from vzhong/memory.lua
require 'nn'
require 'dpnn'
require 'rnn'
require 'nngraph'
local opt = {
n_seq = 3,
d_hid = 4,
d_mem = 20,
n_batch = 2,
@allanj
allanj / word2vec_bin2txt.py
Created March 12, 2019 05:51
Convert the word2vec bin file to txt
#
# @author: Allan
#
def convert(input, output):
from gensim.models.keyedvectors import KeyedVectors
embedding = KeyedVectors.load_word2vec_format(input, binary=True)
f= open(output, 'w', encoding='utf-8')
@allanj
allanj / java_statnlp_print_feature.java
Created March 21, 2019 05:57
Print the feature string in the StatNLP Java framework (Put to main file)
//main.java
//First of all, after create `GlobalNetworkParam` object.
// run the following code:
GlobalNetworkParam gnp = new GlobalNetworkParam(optimizer, gnnp);
gnp.setStoreFeatureReps();
/************************
After the model has been trained.
model.train(...)
@allanj
allanj / iob1toiob2_funct.py
Last active March 29, 2021 14:37
Convert the tags from IOB1 to IOB2 tagging scheme
"""
IOB1: O I I B I
IOB2: O B I B I
"""
from typing import List
def iob2(tags: List[str]):
"""
Check that tags have a valid IOB format.
@allanj
allanj / BIOtoBIOES.py
Last active March 15, 2022 11:49
Convert the IOB2 tagging scheme to BIOES tagging scheme
def iob_iobes(tags):
"""
IOB2 (BIO) -> IOBES
"""
new_tags = []
for i, tag in enumerate(tags):
if tag == 'O':
new_tags.append(tag)
elif tag.split('-')[0] == 'B':
if i + 1 != len(tags) and \
@allanj
allanj / dep_parse.java
Created April 10, 2019 09:54
Script for dependency parsing for the dataset
package corenlp.process;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.parser.nndep.DependencyParser;
import streamlit as st
# To make things easier later, we're also importing numpy and pandas for working with sample data.
import numpy
import pandas
# Don't worry, we'll explain this method in the next section. We need to make at least one
# call to Streamlit in order to generate a report.
st.title("Demo Test")
# streamlit.header("I'm a large heading")
# streamlit.subheader("I'm not a large heading")
from typing import List, TypeVar, Callable
import numpy as np
T = TypeVar('T')
def bootstrap_paired_ttest(results_a: List[T],
results_b: List[T],
evaluate_func: Callable[[List[T]], float],
sample_times: int = 10000,
@allanj
allanj / coref_bert.jsonnet
Created October 16, 2019 03:30
Coreference with BERT implementation using Latest AllenNLP package (0.9.0)
local bert_model = "bert-base-uncased";
local train_path = "./datasets/coref/train.english.v4_gold_conll";
local dev_path = "./datasets/coref/dev.english.v4_gold_conll";
local test_path = "./datasets/coref/test.english.v4_gold_conll";
{
"dataset_reader": {
"type": "coref",
"token_indexers": {
"bert": {