start new:
tmux
start new with session name:
tmux new -s myname
https://www.ucc-bsnl.co.in/ |
sudo lshw -C disk | |
sudo mkfs.ext4 /dev/sdb1 | |
sudo mkdir /mnt/hdd1 | |
sudo mount /dev/sde /mnt/hdd1 | |
sudo chmod -R 777 /mnt/hdd1/ |
Assuming the CSV looks something like : | |
"Distance","E","G","C","B" | |
"A","3","5","7","3" | |
"B","4","5","6","0" | |
"C","4","1","0","6" | |
"D","5","8","3","6" | |
Code: |
#https://stackoverflow.com/questions/49457443/python-pdfminer-converts-pdf-file-into-one-chunk-of-string-with-no-spaces-betwee | |
# | |
#According to this thread some pdfs mark the entire text as figure and by default PDFMiner doesn't try to perform layout analysis for figure text. To override this behavior the all_texts parameter needs to be set to True. | |
# | |
import io | |
import pdfminer | |
from pdfminer.converter import TextConverter |
def make_heatmap(text, values, save=None, polarity=1): | |
cell_height=.325 | |
cell_width=.15 | |
n_limit = 74 | |
text = list(map(lambda x: x.replace('\n', '\\n'), text)) | |
num_chars = len(text) | |
total_chars = math.ceil(num_chars/float(n_limit))*n_limit | |
mask = np.array([0]*num_chars + [1]*(total_chars-num_chars)) | |
text = np.array(text+[' ']*(total_chars-num_chars)) | |
values = np.array(values+[0]*(total_chars-num_chars)) |
For small files | |
wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1rC6LWGNkHaZkuojCEWDqSKcDGwFMBTYZ' -O transformer_semeval.clf | |
For large files | |
wget --load-cookies cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1rC6LWGNkHaZkuojCEWDqSKcDGwFMBTYZ' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1rC6LWGNkHaZkuojCEWDqSKcDGwFMBTYZ" -O transformer_semeval.clf |
Learning Structured Representation for Text Classification | |
via Reinforcement Learning | |
https://github.com/keavil/AAAI18-code |
def reduce_mem_usage(df): | |
""" iterate through all the columns of a dataframe and modify the data type | |
to reduce memory usage. | |
""" | |
start_mem = df.memory_usage().sum() / 1024**2 | |
print('Memory usage of dataframe is {:.2f} MB'.format(start_mem)) | |
for col in df.columns: | |
col_type = df[col].dtype | |
import tensorflow as tf | |
import tensorflow_hub as hub | |
module_url = "https://tfhub.dev/google/universal-sentence-encoder-large/3" | |
# Import the Universal Sentence Encoder's TF Hub module | |
embed = hub.Module(module_url) | |
# Compute a representation for each message, showing various lengths supported. | |
messages = ["That band rocks!", "That song is really cool."] | |
with tf.Session() as session: |