Skip to content

Instantly share code, notes, and snippets.

import spacy
def hello_w():
ok
@NewscatcherAPI
NewscatcherAPI / init_matcher.py
Last active December 15, 2021 18:29
spaCy Matcher Blog
import spacy
nlp=spacy.load('en_core_web_sm')
matcher = Matcher(nlp.vocab)
@NewscatcherAPI
NewscatcherAPI / clean_text.py
Last active November 17, 2021 09:31
Sentiment analysis
def clean_text(text, all_mentions):
# If retweet, delete RT and name of the account
text = re.sub('(RT\s.*):', '', text)
# Find all links and delete them
all_links = re.findall('(https:.*?)\s', text + ' ')
for i in all_links:
text = text.replace(i, '')
for i in all_mentions:
@NewscatcherAPI
NewscatcherAPI / all_summary.py
Last active December 30, 2021 09:55
spacy_vs_nltk_newscatcher_blog
summary = [article['summary'] for article in articles]
sentence = summary[0]
def is_company_acquisition(headline_doc):
# check if the acquisition lemma (why lemma?)
for token in headline_doc:
if 'acquire' not in [token.lemma_ for token in headline_doc]:
return False
# check that at least 2 ORG entities
elif len([ent.label_ for ent in headline_doc.ents if ent.label_ == 'ORG']) < 2:
return False
return True
@NewscatcherAPI
NewscatcherAPI / cloudbuild_CloudRun.yaml
Created September 28, 2021 13:22
Google Kubernetes Engine as an alternative to Cloud Run
steps:
# Build the container image
- name: 'gcr.io/cloud-builders/docker'
args: ['build', '-t', 'gcr.io/$PROJECT_ID/${_SERVICE_NAME}:${_VERSION}', '-f', './${_DIRECTORY_PROJECT}/Dockerfile', '.', '--build-arg', 'directory=${_DIRECTORY_PROJECT}','--build-arg', 'number_workers=${_NB_WORKERS}']
# Push the container image to Container Registry
- name: 'gcr.io/cloud-builders/docker'
args: ['push', 'gcr.io/$PROJECT_ID/${_SERVICE_NAME}:${_VERSION}']
# Deploy container image to Cloud Run
- name: 'gcr.io/google.com/cloudsdktool/cloud-sdk'
entrypoint: gcloud
@NewscatcherAPI
NewscatcherAPI / Dockerfile_CloudRun
Last active September 30, 2021 13:02
Google Kubernetes Engine as an alternative to Cloud Run
# Use Python36
FROM python:3.6
# Arguments
ARG directory
ARG number_workers # number of threads
WORKDIR /app
# Copy requirements.txt to the docker image and install packages