(?<!\d)([0-9]{5}-[0-9]{4}-[0-9]{2})(?!\d)
- ?> lookbehind
- ?<! negative lookbehind
- \d digit only
- ? lookahead
- ?! negative lookahead
- [0-9]{5}-[0-9]{4}-[0-9]{2} a specific pattern #####-####-##
# Experiment to confirm the effect of stratify option in Scikit Learn, tran_test_split() method. | |
# by Shayan Amani | |
from sklearn.model_selection import train_test_split | |
import pandas as pd | |
raw_data = pd.read_csv("codebase/adrel/dataset/train.csv") | |
cnt = raw_data.groupby('label').count() | |
''' experiment begins ''' |
# alphabetically sorted | |
# 50 states + D.C = 51 | |
STATES = ["AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", | |
"GA", "HI", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", | |
"MD", "ME", "MI", "MN", "MO", "MS", "MT", "NC", "ND", "NE", | |
"NH", "NJ", "NM", "NV", "NY", "OH", "OK", "OR", "PA", "RI", | |
"SC", "SD", "TN", "TX", "UT", "VA", "VT", "WA", "WI", "WV", | |
"WY"] |
import base64 | |
import mimetypes | |
import os | |
from bs4 import BeautifulSoup | |
with open('example.html', 'rb') as file_handle: # Read in as a binary file | |
soup = BeautifulSoup(file_handle) |
import logging | |
import pathlib | |
import sys | |
from ml.common.const import LOG_DIR_PATH, ML_DIR | |
def create_log_file_path(file_path, root_dir=ML_DIR, log_dir=LOG_DIR_PATH): | |
path_parts = list(pathlib.Path(file_path).parts) | |
relative_path_parts = path_parts[path_parts.index(root_dir) + 1:] |
from pyrogram import Client | |
with Client(CLIENT_NAME, API_ID, API_HASH) as client: | |
chats = [] | |
for d in client.get_dialogs(): | |
if d.chat.is_creator or \ | |
(d.chat.permissions and d.chat.permissions.can_change_info): | |
chats.append(d.chat.title) | |
print(*chats, sep='\n') | |
print('\t Chat count:', len(chats)) |
import concurrent.futures | |
import itertools | |
import math | |
import pdf2image # $ brew install poppler | |
from tqdm import tqdm | |
BLACK = (0, 0, 0) | |
RED = (255, 0, 0) | |
GREEN = (0, 255, 0) |