Skip to content

Instantly share code, notes, and snippets.

@kylemcdonald
Created May 9, 2023 04:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kylemcdonald/f8222393b1c5821ece4ca93e6fb5941d to your computer and use it in GitHub Desktop.
Save kylemcdonald/f8222393b1c5821ece4ca93e6fb5941d to your computer and use it in GitHub Desktop.
Accumulate LAPD roster totals by division, rank, etc.
import pandas as pd
from utils.list_files import list_files
mapping = {
'name': 'name',
'employeename': 'name',
'first name': 'first name',
'firstname': 'first name',
'last name': 'last name',
'lastname': 'last name',
'rank': 'rank',
'ranktile': 'rank',
'title': 'rank',
'ethnicity': 'ethnicity',
'sex': 'gender',
'gender': 'gender',
'area': 'division',
'division': 'division',
'number': 'serial',
'serial number': 'serial',
'no': 'serial',
'serial no.': 'serial',
'serial_no': 'serial',
'serial': 'serial',
'serialno': 'serial',
'supervisor': 'supervisor',
# this is not useful, always equals A
# 'status': None,
# these are missing
# 'div': None,
# 'mo/yr': None,
'hire_date': 'hire date',
'sworn entry date': 'hire date',
'year of hire': 'hire date',
'biweekly rate': 'biweekly rate'
}
# step 1, load and combine by serial
from collections import defaultdict
import os
by_serial = defaultdict(list)
def load_acronyms(fn):
division_acronyms = pd.read_csv(fn)
return dict(division_acronyms[['Acronym', 'Name']].values)
acronyms = ['division', 'ethnicity', 'gender', 'rank']
acronyms = {k:load_acronyms(f'{k}-acronyms.csv') for k in acronyms}
for fn in sorted(list_files('cleaned')):
roster_id = os.path.split(fn)[-1].split('.')[0]
print(roster_id)
df = pd.read_csv(fn)
print(','.join(df.columns))
# drop unnamed columns
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
# convert column names to lowercase
df.columns = df.columns.str.lower()
# drop columns that aren't in mapping
df = df[df.columns.intersection(mapping.keys())]
# rename columns based on mapping
df = df.rename(columns=mapping)
# add the source of the data
df['src'] = roster_id
print(','.join(df.columns))
# replace all acronyms in given columns
for k,v in acronyms.items():
if k in df:
print('remapping', k)
df[k] = df[k].replace(v)
# drop all rows with non-int 'serial' (i.e., civilians in 2021-01-14 roster)
df = df[df['serial'].astype(str).str.isnumeric()]
df['serial'] = df['serial'].astype(int)
for e in df.to_dict('records'):
# ignore empty cells
record = {k:v for k,v in e.items() if v == v}
# extract serial
serial = record['serial']
del record['serial']
by_serial[serial].append(record)
def accumulate_by_src_and_key(key):
accumulated_by_src_and_key = defaultdict(lambda:defaultdict(int))
# propagate forward and backward in time
for serial,data in by_serial.items():
# forward
cached = None
for i,record in enumerate(data):
if key in record:
cached = record[key]
elif cached is not None:
by_serial[serial][i][key] = cached
# backward
cached = None
for i,record in enumerate(data[::-1]):
if key in record:
cached = record[key]
elif cached is not None:
by_serial[serial][-1-i][key] = cached
for serial,data in by_serial.items():
for record in data:
src = record['src']
if key in record:
v = record[key]
accumulated_by_src_and_key[src][v] += 1
df = pd.DataFrame(accumulated_by_src_and_key).fillna(0).astype(int)
df['sum'] = df.sum(axis=1)
df = df.sort_values(by='sum', ascending=False).drop(columns=['sum'])
df.to_csv(f'{key}-totals-by-date.csv')
accumulate_by_src_and_key('ethnicity')
accumulate_by_src_and_key('gender')
accumulate_by_src_and_key('division')
accumulate_by_src_and_key('rank')
Acronym Name
77TH 77TH STREET AREA
ASB ADMINISTRATIVE SERVICES BUREAU
ASD AIR SUPPORT DIVISION
ADSD APPLICATION DEVELOPMENT AND SUPPORT DIVISION
AD AUDIT DIVISION
BSS BEHAVIORAL SCIENCE SERVICES
CENT CENTRAL AREA
CB CENTRAL BUREAU
CTD CENTRAL TRAFFIC DIVISION
COS CHIEF OF STAFF
CCD COMMERCIAL CRIMES DIVISION
CID COMMISSION INVESTIGATION DIVISION
COMM COMMUNICATIONS DIVISION
CSPB COMMUNITY SAFETY PARTNERSHIP BUREAU
CST COMPSTAT DIVISION
CTSOB COUNTER TERRORISM AND SPECIAL OPERATIONS BUREAU
CIRD CRITICAL INCIDENT REVIEW DIVISION
CSD CUSTODY SERVICES DIVISION
DB DETECTIVE BUREAU
DSG DETECTIVE SERVICES GROUP
DSVD DETECTIVE SUPPORT AND VICE DIVISION
DEV DEVONSHIRE AREA
DEID DIVERSITY EQUITY AND INCLUSION DIVISION
DEIG DIVERSITY EQUITY AND INCLUSION GROUP
ECCCSD EMERGENCY CMD CNTRL COMMUNICATIONS SYS DIV
ESD EMERGENCY SERVICES DIVISION
ERG EMPLOYEE RELATIONS GROUP
EPMD EVIDENCE AND PROPERTY MANAGEMENT DIVISION
FMD FACILITIES MANAGEMENT DIVISION
FTHL FOOTHILL AREA
FID FORCE INVESTIGATION DIVISION
FSD FORENSIC SCIENCE DIVISION
GND GANG AND NARCOTICS DIVISION
HARB HARBOR AREA
HOBK HOLLENBECK AREA
HWD HOLLYWOOD AREA
ISTD IN-SERVICE TRAINING DIVISION
ITB INFORMATION TECHNOLOGY BUREAU
ITD INFORMATION TECHNOLOGY DIVISION
ITG INFORMATION TECHNOLOGY GROUP
IMD INNOVATION MANAGEMENT DIVISION
IG INSPECTOR GENERAL
IAD INTERNAL AFFAIRS DIVISION
JUV JUVENILE DIVISION
MCD MAJOR CRIMES DIVISION
MRD MEDIA RELATIONS DIVISION
METRO METROPOLITAN DIVISION
MISN MISSION AREA
NEWT NEWTON AREA
NHWD NORTH HOLLYWOOD AREA
NE NORTHEAST AREA
CP OFFICE OF CHIEF OF POLICE
OCPP OFFICE OF CONSTITUTIONAL POLICING AND POLICY
OO OFFICE OF OPERATIONS
OSO OFFICE OF SPECIAL OPERATIONS
OSS OFFICE OF SUPPORT SERVICES
OLYM OLYMPIC AREA
PAC PACIFIC AREA
PAC-LAX PACIFIC AREA - LAX SUBSTATION
PER PERSONNEL DIVISION
PER-M PERSONNEL ON MILITARY LEAVE
PC POLICE COMMISSION
PTE POLICE TRAINING AND EDUCATION
PSB PROFESSIONAL STANDARDS BUREAU
PCG PUBLIC COMMUNICATIONS GROUP
RAMP RAMPART AREA
RED RECRUITMENT AND EMPLOYMENT DIVISION
PER-RW RETURN TO WORK PROGRAM
RMLAD RISK MANAGEMENT LEGAL AFFAIRS DIVISION
RHD ROBBERY-HOMICIDE DIVISION
SECSD SECURITY SERVICES DIVISION
SB SOUTH BUREAU
SBHD SOUTH BUREAU HOMICIDE DIVISION
STD SOUTH TRAFFIC DIVISION
SE SOUTHEAST AREA
SW SOUTHWEST AREA
SOD SPECIAL OPERATIONS DIVISION
SSG SUPPORT SERVICES GROUP
TOP TOPANGA AREA
TRFG TRAFFIC GROUP
TRB TRAINING BUREAU
TD TRAINING DIVISION
TD-REC TRAINING DIVISION-RECRUIT
TSB TRANSIT SERVICES BUREAU
TSD TRANSIT SERVICES DIVISION
TRSG TRANSIT SERVICES GROUP
VB VALLEY BUREAU
VTD VALLEY TRAFFIC DIVISION
VNY VAN NUYS AREA
WB WEST BUREAU
WLA WEST LOS ANGELES AREA
WTD WEST TRAFFIC DIVISION
WVAL WEST VALLEY AREA
WIL WILSHIRE AREA
Acronym Name
AMIND AMERICAN INDIAN
AMERIND AMERICAN INDIAN
AMERIIND AMERICAN INDIAN
ASIAN/PAC ASIAN/PACIFIC
ASIAN ASIAN/PACIFIC
CAUC CAUCASIAN
HISP HISPANIC
FLPNO FILIPINO
Acronym Name
F FEMALE
M MALE
N NONBINARY
NONBINNARY NONBINARY
Acronym Name
AC ASSISTANT CHIEF
CAPT 1 CAPTAIN 1
CAPT 2 CAPTAIN 2
CAPT 3 CAPTAIN 3
CAPTAIN CAPTAIN 1
CMDR COMMANDER
DEP CHF DEPUTY CHIEF
DET 1 DETECTIVE 1
DET 2 DETECTIVE 2
DET 3 DETECTIVE 3
DETECTIVE I DETECTIVE 1
DETECTIVE II DETECTIVE 2
DETECTIVE III DETECTIVE 3
LIEUTENANT LIEUTENANT 1
LT 1 LIEUTENANT 1
LT 2 LIEUTENANT 2
PO 1 POLICE OFFICER 1
PO 2 POLICE OFFICER 2
PO 3 POLICE OFFICER 3
PO SPEC POLICE SPECIALIST
POLICE OFFICER I POLICE OFFICER 1
POLICE OFFICER II POLICE OFFICER 2
POLICE OFFICER III POLICE OFFICER 3
SERGEANT I SERGEANT 1
SERGEANT II SERGEANT 2
SGT 1 SERGEANT 1
SGT 2 SERGEANT 2
CAPTAIN I CAPTAIN 1
CAPTAIN II CAPTAIN 2
CAPTAIN III CAPTAIN 3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment