Skip to content

Instantly share code, notes, and snippets.

# SpaCy NER and sentence parser version 1
import os
import pandas as pd
import spacy
from spacy.gold import biluo_tags_from_offsets
from spacy.vocab import Vocab
from spacy.tokens import Doc
from itertools import combinations
@whitestones011
whitestones011 / google_custom_search.py
Last active July 25, 2019 17:26
Google Custom Search
"""Custom Search Parameters:
https://developers.google.com/custom-search/json-api/v1/reference/cse/list
"""
import pprint
import pandas as pd
from googleapiclient.discovery import build
def google_search(name,keyword,page_start):
# Build a service object for interacting with the API. Visit
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@whitestones011
whitestones011 / elasticsearch_bulk_upload.ipynb
Last active October 1, 2018 15:47
Bulk upload into ElasticSearch on AWS
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@whitestones011
whitestones011 / email_me.py
Created September 18, 2018 16:22
Send automatic email with Python
import win32com.client
def sendEmail(subject,body,attachment):
olMailItem = 0x0
obj = win32com.client.Dispatch("Outlook.Application")
newMail = obj.CreateItem(olMailItem)
newMail.Subject = subject
newMail.Body = body
@whitestones011
whitestones011 / selenium_webdriver.py
Created September 18, 2018 16:19
Scrap webdata with Selenium webdriver
import os
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def make_dir(directory):
@whitestones011
whitestones011 / gleif.py
Created September 18, 2018 16:10
Parse XML with ElementTree for gleif-concatenated-file-rr.xml
import pandas as pd
import xml.etree.ElementTree as ET
xmlFile='20180209-gleif-concatenated-file-rr.xml'
tree = ET.parse(xmlFile)
root=tree.getroot()
xmlns_xs = '{http://www.w3.org/2001/XMLSchema}'