Skip to content

Instantly share code, notes, and snippets.

""" Finetuning the library models for sequence classification on GLUE (Bert, XLM, XLNet, RoBERTa).-mostly copy paste from huggingface tun_glue"""
from __future__ import absolute_import, division, print_function
import argparse
import glob
import logging
import os
import random
""" Finetuning the library models for sequence classification on GLUE (Bert, XLM, XLNet, RoBERTa).-mostly copy paste from huggingface tun_glue"""
from __future__ import absolute_import, division, print_function
import argparse
import glob
import logging
import os
import random
#!/bin/bash
mkdir extract
file=$1
src=$2
dst=$3
echo processing $file
sudo aws s3 cp s3://aol-on-sandbox/$src/"$file" /home/ubuntu/"$file"
youtube-dl -i -f 'bestvideo[height=480]/best' -o 'extract/%(id)s.%(ext)s' --batch-file="$file"
tar -zcvf "$file".tar.gz extract
sudo aws s3 mv /home/ubuntu/"$file".tar.gz s3://aol-on-sandbox/$dst/"$file".tar.gz
#!/bin/bash
mkdir extract
mkdir text
file=$1
src=$2
dst=$3
echo processing $file
aws s3 cp s3://aol-on-sandbox/$src/"$file" /home/ubuntu/"$file"
tar -xzf /home/ubuntu/"$file" -C extract
python /home/ubuntu/html2text.py /home/ubuntu/extract/root/mkdir\ / /home/ubuntu/text
from readability import Document
import os
import sys
import io
for fn in os.listdir(sys.argv[1]):
try:
with io.open(os.path.join(sys.argv[1], fn), encoding='utf-8') as f:
text = f.read()
if 'batch_fails' not in fn :
doc = Document(text)
#!/bin/bash
rm -rf extract
rm -rf text
while IFS= read -r file; do
mkdir extract
mkdir text
echo processing $file
aws s3 cp s3://aol-on-sandbox/googlenews/links/html/$file /home/ubuntu/$file
tar -xzf /home/ubuntu/$file -C extract
#!/bin/bash
while read line
do
echo processing "$1"
name="${line##*( )}"
echo $name
aws s3 cp s3://aol-on-sandbox/clickture/bing/features/"'$name'".csv . && cat "'$1'".csv test/$1 > "'$1'".csv.temp && aws s3 cp "'$1'".csv.temp s3://aol-on-sandbox/clickture/bing/features/test/"'$name'".csv
done <$1
#!/bin/bash
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-7.5/targets/x86_64-linux/lib/
echo processing "$1"
mkdir images
aws s3 cp s3://aol-on-sandbox/clickture/bing/html/"'$1'".tar.gz .
tar -xzf "'$1'".tar.gz -C images
rm -f "'$1'".tar.gz
gm mogrify -format jpeg images/root/images/*.GIF
rm -f images/root/images/*.GIF
gm mogrify -format jpeg images/root/images/*.BMP
#!/bin/bash
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-7.5/targets/x86_64-linux/lib/
while IFS= read -r file; do
echo processing $file
name=$(echo $file | cut -f 1 -d '.')
mkdir images
aws s3 cp s3://aol-on-sandbox/clickture/bing/html/"'$name".tar.gz .
tar -xzf "'$name".tar.gz -C images
rm -f "'$name".tar.gz
gm mogrify -format jpeg images/root/images/*.GIF
require 'torch'
require 'paths'
--print(arg)
function string:split( inSplitPattern, outResults )
if not outResults then
outResults = {}
end
local theStart = 1