Install poppler-utils
on ubuntu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pages = [ | |
"https://jobs.lever.co/lever/?department=Engineering&team=Engineering&commitment=Full-time", | |
"https://jobs.lever.co/yelp/?department=Engineering%20and%20Product&team=Engineering&commitment=Full%20Time", | |
"https://jobs.lever.co/formswift?team=Engineering", | |
"https://jobs.lever.co/coursera?department=Data&team=Data%20Science", | |
"https://jobs.lever.co/medium?team=Engineering", | |
"https://jobs.lever.co/headspace?department=Technology", | |
"https://jobs.lever.co/eventbrite?team=Data", | |
"https://jobs.lever.co/eventbrite?team=Engineering", | |
"https://jobs.lever.co/eventbrite?team=IT", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script generates the folder structure for ljspeech-1.1 processing from mimic-recording-studio database | |
# This is a modified version of what was written by Thorsten Müller | |
import glob | |
import sqlite3 | |
import ffmpeg | |
import os | |
from shutil import copyfile | |
from shutil import rmtree |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import sys | |
import re | |
MAX_TOKENS = 15 | |
ENGLISH = ['and', 'the', 'is'] | |
matched = 0 | |
total = 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Returns the number of dupes and which ones are duped | |
cat corpus-clean2.tsv | sort -f | uniq -c | sort -gr | grep -v ' 1 ' > hd.tsv | |
# Returns the non dups | |
cat corpus-clean2.tsv | sort -f | uniq -c | sort -gr | wc -l | |
sort -u |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Command to run $ python3 clean.py < corpus.tsv > corpus-clean.tsv | |
import csv | |
import sys | |
import re | |
MAX_TOKENS = 15 | |
ENGLISH = ['and', 'the', 'is'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
try: | |
from xml.etree.cElementTree import XML | |
except ImportError: | |
from xml.etree.ElementTree import XML | |
import os | |
import re | |
import pathlib | |
import zipfile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const mysql = require("mysql"); | |
const Json2csvParser = require("json2csv").Parser; | |
const fs = require("fs"); | |
// Create a connection to the database | |
const connection = mysql.createConnection({ | |
host: "localhost", | |
user: "***********", | |
password: "*******", | |
database: "*******" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Returns the first character of the string str | |
var firstCharacter = function(str) { | |
return str.slice(0, 1); | |
}; | |
// Returns the last character of a string str | |
var lastCharacter = function(str) { | |
return str.slice(-1); | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @param {number[]} nums | |
* @param {number} target | |
* @return {number[]} | |
*/ | |
var twoSum = function(nums, target) { | |
let numsObj = new Map() | |
for(let i=0;i<nums.length;i++){ | |
let num = nums[i]; |