(?<!\d)([0-9]{5}-[0-9]{4}-[0-9]{2})(?!\d)
Explained:
- ?> lookbehind
- ?<! negative lookbehind
- \d digit only
- ? lookahead
- ?! negative lookahead
- [0-9]{5}-[0-9]{4}-[0-9]{2} a specific pattern #####-####-##
(?<!\d)([0-9]{5}-[0-9]{4}-[0-9]{2})(?!\d)
# alphabetically sorted | |
# 50 states + D.C = 51 | |
STATES = ["AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", | |
"GA", "HI", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", | |
"MD", "ME", "MI", "MN", "MO", "MS", "MT", "NC", "ND", "NE", | |
"NH", "NJ", "NM", "NV", "NY", "OH", "OK", "OR", "PA", "RI", | |
"SC", "SD", "TN", "TX", "UT", "VA", "VT", "WA", "WI", "WV", | |
"WY"] |
# Experiment to confirm the effect of stratify option in Scikit Learn, tran_test_split() method. | |
# by Shayan Amani | |
from sklearn.model_selection import train_test_split | |
import pandas as pd | |
raw_data = pd.read_csv("codebase/adrel/dataset/train.csv") | |
cnt = raw_data.groupby('label').count() | |
''' experiment begins ''' |