Skip to content

Instantly share code, notes, and snippets.

import re
import nltk
from nltk.corpus import stopwords
import pandas as pd
stop_words = set(stopwords.words("english"))
def get_first_title(title):
# keep "co-founder, co-ceo, etc"
title = re.sub(r"[Cc]o[\-\ ]","", title)
Platform CPU GOOS GARCH GOARM SDK CGO_FLAGS CARCH
Windows x86_64 windows amd64 x86_64
Windows x86_32 windows 386 x86_32
MacOS x86_64 darwin amd64 macos x86_64
MacOS M1 darwin arm64 macos arm64
Linux x86_64 linux amd64 x86_64
Linux x86_32 linux 386 x86_32
Linux arm64 linux arm64 arm64
iOS Simulator x86_64 darwin amd64 iphonesimulator -fembed-bitcode x86_64
iOS arm darwin arm64 iphoneos -fembed-bitcode arm64
#!/usr/bin/env python3
import urllib.request
import json
import requests
full_name = "Professor John Roberts Smith Jr."
domain = "example.com"
api_key = "<neverbounce_api_key>"
name_parts = full_name.lower().split(" ")
import re
import requests
import nltk
from lxml import html
from nltk.corpus import stopwords
stop_words = set(stopwords.words("english"))
# A Company Name
company_name = "The Boring Company"
search_terms = ["flamethrower"] # The Boring Company sells flamethrowers
LexicalAnalyzer.prototype.generateParseError = function(lineNumber=null, message="") {
error = {"line": lineNumber, "message": message};
return error;
}
function LexicalAnalyzer() {
this.tokenTypes = {
"include": /^include$/,
"string": /^\"[^\"]+\"$/,
"unaryop": /^(sin|cos|tan|exp|ln|sqrt)$/,
"exp": /^(\+|\-|\*|\/|\^)$/,
"comma": /^\,$/,
"openparen": /^\($/,
"closeparen": /^\)$/,
"openbracket": /^\[$/,
LexicalAnalyzer.prototype.getClassifiedTokenFromLines = function(tokenLines) {
classifiedTokens = [];
errors = [];
// loop through each line of code
for (var lineId = 0; lineId < tokenLines.length; lineId++) {
line = tokenLines[lineId];
// loop through each token in each line
for (var tokenId = 0; tokenId < line.length; tokenId++) {
token = line[tokenId];
classifiedToken = null;
LexicalAnalyzer.prototype.run = function(tokenLines) {
lexOutput = this.getClassifiedTokenFromLines(tokenLines);
return lexOutput;
};
Tokenizer.prototype.trimWhitespace = function(text) {
cleanedText = text.replace(/\n+/g, "\n");
cleanedText = cleanedText.replace(/[ ]+/g, " ");
cleanedText = cleanedText.trim();
return cleanedText;
}
Tokenizer.prototype.getTokenLines = function(text) {
lines = text.split("\n");
tokenLines = [];
for (var lineId = 0; lineId < lines.length; lineId++) {
line = lines[lineId];
tokens = line.split(" ");
tokenLines.push(tokens);
}
return tokenLines;
}