Accumulate LAPD roster totals by division, rank, etc.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from utils.list_files import list_files | |
mapping = { | |
'name': 'name', | |
'employeename': 'name', | |
'first name': 'first name', | |
'firstname': 'first name', | |
'last name': 'last name', | |
'lastname': 'last name', | |
'rank': 'rank', | |
'ranktile': 'rank', | |
'title': 'rank', | |
'ethnicity': 'ethnicity', | |
'sex': 'gender', | |
'gender': 'gender', | |
'area': 'division', | |
'division': 'division', | |
'number': 'serial', | |
'serial number': 'serial', | |
'no': 'serial', | |
'serial no.': 'serial', | |
'serial_no': 'serial', | |
'serial': 'serial', | |
'serialno': 'serial', | |
'supervisor': 'supervisor', | |
# this is not useful, always equals A | |
# 'status': None, | |
# these are missing | |
# 'div': None, | |
# 'mo/yr': None, | |
'hire_date': 'hire date', | |
'sworn entry date': 'hire date', | |
'year of hire': 'hire date', | |
'biweekly rate': 'biweekly rate' | |
} | |
# step 1, load and combine by serial | |
from collections import defaultdict | |
import os | |
by_serial = defaultdict(list) | |
def load_acronyms(fn): | |
division_acronyms = pd.read_csv(fn) | |
return dict(division_acronyms[['Acronym', 'Name']].values) | |
acronyms = ['division', 'ethnicity', 'gender', 'rank'] | |
acronyms = {k:load_acronyms(f'{k}-acronyms.csv') for k in acronyms} | |
for fn in sorted(list_files('cleaned')): | |
roster_id = os.path.split(fn)[-1].split('.')[0] | |
print(roster_id) | |
df = pd.read_csv(fn) | |
print(','.join(df.columns)) | |
# drop unnamed columns | |
df = df.loc[:, ~df.columns.str.contains('^Unnamed')] | |
# convert column names to lowercase | |
df.columns = df.columns.str.lower() | |
# drop columns that aren't in mapping | |
df = df[df.columns.intersection(mapping.keys())] | |
# rename columns based on mapping | |
df = df.rename(columns=mapping) | |
# add the source of the data | |
df['src'] = roster_id | |
print(','.join(df.columns)) | |
# replace all acronyms in given columns | |
for k,v in acronyms.items(): | |
if k in df: | |
print('remapping', k) | |
df[k] = df[k].replace(v) | |
# drop all rows with non-int 'serial' (i.e., civilians in 2021-01-14 roster) | |
df = df[df['serial'].astype(str).str.isnumeric()] | |
df['serial'] = df['serial'].astype(int) | |
for e in df.to_dict('records'): | |
# ignore empty cells | |
record = {k:v for k,v in e.items() if v == v} | |
# extract serial | |
serial = record['serial'] | |
del record['serial'] | |
by_serial[serial].append(record) | |
def accumulate_by_src_and_key(key): | |
accumulated_by_src_and_key = defaultdict(lambda:defaultdict(int)) | |
# propagate forward and backward in time | |
for serial,data in by_serial.items(): | |
# forward | |
cached = None | |
for i,record in enumerate(data): | |
if key in record: | |
cached = record[key] | |
elif cached is not None: | |
by_serial[serial][i][key] = cached | |
# backward | |
cached = None | |
for i,record in enumerate(data[::-1]): | |
if key in record: | |
cached = record[key] | |
elif cached is not None: | |
by_serial[serial][-1-i][key] = cached | |
for serial,data in by_serial.items(): | |
for record in data: | |
src = record['src'] | |
if key in record: | |
v = record[key] | |
accumulated_by_src_and_key[src][v] += 1 | |
df = pd.DataFrame(accumulated_by_src_and_key).fillna(0).astype(int) | |
df['sum'] = df.sum(axis=1) | |
df = df.sort_values(by='sum', ascending=False).drop(columns=['sum']) | |
df.to_csv(f'{key}-totals-by-date.csv') | |
accumulate_by_src_and_key('ethnicity') | |
accumulate_by_src_and_key('gender') | |
accumulate_by_src_and_key('division') | |
accumulate_by_src_and_key('rank') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Acronym | Name | |
---|---|---|
77TH | 77TH STREET AREA | |
ASB | ADMINISTRATIVE SERVICES BUREAU | |
ASD | AIR SUPPORT DIVISION | |
ADSD | APPLICATION DEVELOPMENT AND SUPPORT DIVISION | |
AD | AUDIT DIVISION | |
BSS | BEHAVIORAL SCIENCE SERVICES | |
CENT | CENTRAL AREA | |
CB | CENTRAL BUREAU | |
CTD | CENTRAL TRAFFIC DIVISION | |
COS | CHIEF OF STAFF | |
CCD | COMMERCIAL CRIMES DIVISION | |
CID | COMMISSION INVESTIGATION DIVISION | |
COMM | COMMUNICATIONS DIVISION | |
CSPB | COMMUNITY SAFETY PARTNERSHIP BUREAU | |
CST | COMPSTAT DIVISION | |
CTSOB | COUNTER TERRORISM AND SPECIAL OPERATIONS BUREAU | |
CIRD | CRITICAL INCIDENT REVIEW DIVISION | |
CSD | CUSTODY SERVICES DIVISION | |
DB | DETECTIVE BUREAU | |
DSG | DETECTIVE SERVICES GROUP | |
DSVD | DETECTIVE SUPPORT AND VICE DIVISION | |
DEV | DEVONSHIRE AREA | |
DEID | DIVERSITY EQUITY AND INCLUSION DIVISION | |
DEIG | DIVERSITY EQUITY AND INCLUSION GROUP | |
ECCCSD | EMERGENCY CMD CNTRL COMMUNICATIONS SYS DIV | |
ESD | EMERGENCY SERVICES DIVISION | |
ERG | EMPLOYEE RELATIONS GROUP | |
EPMD | EVIDENCE AND PROPERTY MANAGEMENT DIVISION | |
FMD | FACILITIES MANAGEMENT DIVISION | |
FTHL | FOOTHILL AREA | |
FID | FORCE INVESTIGATION DIVISION | |
FSD | FORENSIC SCIENCE DIVISION | |
GND | GANG AND NARCOTICS DIVISION | |
HARB | HARBOR AREA | |
HOBK | HOLLENBECK AREA | |
HWD | HOLLYWOOD AREA | |
ISTD | IN-SERVICE TRAINING DIVISION | |
ITB | INFORMATION TECHNOLOGY BUREAU | |
ITD | INFORMATION TECHNOLOGY DIVISION | |
ITG | INFORMATION TECHNOLOGY GROUP | |
IMD | INNOVATION MANAGEMENT DIVISION | |
IG | INSPECTOR GENERAL | |
IAD | INTERNAL AFFAIRS DIVISION | |
JUV | JUVENILE DIVISION | |
MCD | MAJOR CRIMES DIVISION | |
MRD | MEDIA RELATIONS DIVISION | |
METRO | METROPOLITAN DIVISION | |
MISN | MISSION AREA | |
NEWT | NEWTON AREA | |
NHWD | NORTH HOLLYWOOD AREA | |
NE | NORTHEAST AREA | |
CP | OFFICE OF CHIEF OF POLICE | |
OCPP | OFFICE OF CONSTITUTIONAL POLICING AND POLICY | |
OO | OFFICE OF OPERATIONS | |
OSO | OFFICE OF SPECIAL OPERATIONS | |
OSS | OFFICE OF SUPPORT SERVICES | |
OLYM | OLYMPIC AREA | |
PAC | PACIFIC AREA | |
PAC-LAX | PACIFIC AREA - LAX SUBSTATION | |
PER | PERSONNEL DIVISION | |
PER-M | PERSONNEL ON MILITARY LEAVE | |
PC | POLICE COMMISSION | |
PTE | POLICE TRAINING AND EDUCATION | |
PSB | PROFESSIONAL STANDARDS BUREAU | |
PCG | PUBLIC COMMUNICATIONS GROUP | |
RAMP | RAMPART AREA | |
RED | RECRUITMENT AND EMPLOYMENT DIVISION | |
PER-RW | RETURN TO WORK PROGRAM | |
RMLAD | RISK MANAGEMENT LEGAL AFFAIRS DIVISION | |
RHD | ROBBERY-HOMICIDE DIVISION | |
SECSD | SECURITY SERVICES DIVISION | |
SB | SOUTH BUREAU | |
SBHD | SOUTH BUREAU HOMICIDE DIVISION | |
STD | SOUTH TRAFFIC DIVISION | |
SE | SOUTHEAST AREA | |
SW | SOUTHWEST AREA | |
SOD | SPECIAL OPERATIONS DIVISION | |
SSG | SUPPORT SERVICES GROUP | |
TOP | TOPANGA AREA | |
TRFG | TRAFFIC GROUP | |
TRB | TRAINING BUREAU | |
TD | TRAINING DIVISION | |
TD-REC | TRAINING DIVISION-RECRUIT | |
TSB | TRANSIT SERVICES BUREAU | |
TSD | TRANSIT SERVICES DIVISION | |
TRSG | TRANSIT SERVICES GROUP | |
VB | VALLEY BUREAU | |
VTD | VALLEY TRAFFIC DIVISION | |
VNY | VAN NUYS AREA | |
WB | WEST BUREAU | |
WLA | WEST LOS ANGELES AREA | |
WTD | WEST TRAFFIC DIVISION | |
WVAL | WEST VALLEY AREA | |
WIL | WILSHIRE AREA |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Acronym | Name | |
---|---|---|
AMIND | AMERICAN INDIAN | |
AMERIND | AMERICAN INDIAN | |
AMERIIND | AMERICAN INDIAN | |
ASIAN/PAC | ASIAN/PACIFIC | |
ASIAN | ASIAN/PACIFIC | |
CAUC | CAUCASIAN | |
HISP | HISPANIC | |
FLPNO | FILIPINO |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Acronym | Name | |
---|---|---|
F | FEMALE | |
M | MALE | |
N | NONBINARY | |
NONBINNARY | NONBINARY |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Acronym | Name | |
---|---|---|
AC | ASSISTANT CHIEF | |
CAPT 1 | CAPTAIN 1 | |
CAPT 2 | CAPTAIN 2 | |
CAPT 3 | CAPTAIN 3 | |
CAPTAIN | CAPTAIN 1 | |
CMDR | COMMANDER | |
DEP CHF | DEPUTY CHIEF | |
DET 1 | DETECTIVE 1 | |
DET 2 | DETECTIVE 2 | |
DET 3 | DETECTIVE 3 | |
DETECTIVE I | DETECTIVE 1 | |
DETECTIVE II | DETECTIVE 2 | |
DETECTIVE III | DETECTIVE 3 | |
LIEUTENANT | LIEUTENANT 1 | |
LT 1 | LIEUTENANT 1 | |
LT 2 | LIEUTENANT 2 | |
PO 1 | POLICE OFFICER 1 | |
PO 2 | POLICE OFFICER 2 | |
PO 3 | POLICE OFFICER 3 | |
PO SPEC | POLICE SPECIALIST | |
POLICE OFFICER I | POLICE OFFICER 1 | |
POLICE OFFICER II | POLICE OFFICER 2 | |
POLICE OFFICER III | POLICE OFFICER 3 | |
SERGEANT I | SERGEANT 1 | |
SERGEANT II | SERGEANT 2 | |
SGT 1 | SERGEANT 1 | |
SGT 2 | SERGEANT 2 | |
CAPTAIN I | CAPTAIN 1 | |
CAPTAIN II | CAPTAIN 2 | |
CAPTAIN III | CAPTAIN 3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment