Skip to content

Instantly share code, notes, and snippets.

View angelosalatino's full-sized avatar
💭
Stay positive

Angelo A. Salatino angelosalatino

💭
Stay positive
View GitHub Profile
@angelosalatino
angelosalatino / latex_comments_cleaner.py
Last active July 19, 2024 11:39
LaTeX comments remover. Scrolls the file line by line and removes the ones that begin as a comments.
#!/usr/bin/env python
# coding: utf-8
# In[1]:
from datetime import datetime as dt
import shutil
import os
@angelosalatino
angelosalatino / news_immigration_from_aylien.py
Last active November 15, 2023 16:41
This Python script is for download thematic news (on immigration) from Aylien
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 16 17:37:07 2023
@author: aas358
"""
from __future__ import print_function
@angelosalatino
angelosalatino / word_similarity_with_bert_and_annoy.py
Last active February 1, 2023 17:56
Using ANNOY for finding most similar word via embeddings
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 30 16:31:14 2023
@author: aas358
"""
import torch
from transformers import AutoTokenizer, AutoModel
@angelosalatino
angelosalatino / get_bert_embeddings.py
Last active February 1, 2023 17:56
Get contextual embeddings from each individual word in a sentence.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 30 11:47:48 2023
@author: aas358
"""
import torch
from transformers import AutoTokenizer, AutoModel
@angelosalatino
angelosalatino / neo4j_aura_to_igraph.py
Last active January 29, 2023 16:56
Exporting Neo4J Aura graph into iGraph - python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 21 16:23:33 2023
@author: aas358
"""
from neo4j import GraphDatabase
# Exporting entire database:
mysqldump -u user -p database --opt | gzip > database.sql.gz
# Export table
mysqldump -uroot --opt --databases DB_NAME --tables TABLE_NAME | gzip > /tmp/TABLE_NAME.export.sql.gz
# Importing:
gunzip < database.sql.gz | mysql -u user -p database
# Credit http://failshell.io/mysql/using-gzip-and-gunzip-with-mysql-to-importexport-backups/
@angelosalatino
angelosalatino / download-es-index.py
Created May 6, 2020 14:37
Download ElasticSearch Index (just to be sure)
from elasticsearch import Elasticsearch
import logging
import json
# import time
TIMEOUT_TIME = 1500
# start_time = time.time()
logging.basicConfig(filename='mylog.log',format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
@angelosalatino
angelosalatino / creating-word2vec-model.py
Last active September 19, 2019 13:33
Script that trains the word2vec model for the CSO Classifier
from gensim.models import word2vec
######################################################
# READ SENTENCES
sentences = read_sentences(file="corpus.txt")
######################################################
######################################################
#PARAMETHERS
model_name='cso-classifier'
@angelosalatino
angelosalatino / caching-word2vec-model.py
Last active September 18, 2019 17:15
Dictionary that connects all token available in the vocabulary of the word2vec model with the CSO topics
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 22:52:14 2019
@author: angelosalatino
"""
import Levenshtein.StringMatcher as ls
import json
@angelosalatino
angelosalatino / scrolling_elasticsearch.py
Last active January 30, 2023 16:57
Scroll on ElasticSearch
from elasticsearch import Elasticsearch
import logging
logging.basicConfig(filename='mylog.log',format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
logging.info("Files opened ...")
es = Elasticsearch([{'host': '0.0.0.0', 'port': 9200}])