Created
August 23, 2018 18:43
-
-
Save dbreunig/1505906e86266eb7e6ad80fb1ce971ed to your computer and use it in GitHub Desktop.
A quick and dirty script for adding DMA names to a CSV containing DMA codes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# ---------------------------------------- | |
# DESCRIPTION | |
# This script takes in a CSV file containing a column with DMA code numbers. | |
# It outputs to stdout a CSV file exactly like the input file but with a | |
# column of associated DMA names appended. | |
# | |
# ISSUES: | |
# - No flag for headers, auto detects based on simple heuristic | |
# - Outputs to stdout, not a specified file | |
# - A neat feature would be auto detecting the DMA code column | |
require 'csv' | |
# ---------------------------------------- | |
# HELPERS | |
def exit_with_help | |
puts "usage: dma_expander input_file.csv -c [DMA CODE COLUMN INDEX]" | |
exit | |
end | |
class String | |
def is_i? | |
!!(self =~ /\A[-+]?[0-9]+\z/) | |
end | |
end | |
# ---------------------------------------- | |
# SETUP: DMA definition and arugment validation and | |
@dmas = { | |
"501"=>"New York", | |
"803"=>"Los Angeles", | |
"602"=>"Chicago", | |
"504"=>"Philadelphia", | |
"506"=>"Boston (Manchester)", | |
"807"=>"San Francisco - Oakland - San Jose", | |
"623"=>"Dallas - Fort Worth", | |
"511"=>"Washington DC (Hagerstown)", | |
"524"=>"Atlanta", | |
"618"=>"Houston", | |
"505"=>"Detroit", | |
"539"=>"Tampa - Saint Petersburg (Sarasota)", | |
"819"=>"Seattle - Tacoma", | |
"753"=>"Phoenix", | |
"613"=>"Minneapolis - Saint Paul", | |
"510"=>"Cleveland", | |
"528"=>"Miami - Fort Lauderdale", | |
"751"=>"Denver", | |
"862"=>"Sacramento - Stockton - Modesto", | |
"534"=>"Orlando - Daytona Beach - Melbourne", | |
"609"=>"Saint Louis", | |
"508"=>"Pittsburgh", | |
"820"=>"Portland, OR", | |
"512"=>"Baltimore", | |
"527"=>"Indianapolis", | |
"825"=>"San Diego", | |
"517"=>"Charlotte", | |
"533"=>"Hartford & New Haven", | |
"560"=>"Raleigh - Durham (Fayetteville)", | |
"659"=>"Nashville", | |
"616"=>"Kansas City", | |
"535"=>"Columbus, OH", | |
"617"=>"Milwaukee", | |
"515"=>"Cincinnati", | |
"567"=>"Greenville - Spartansburg - Asheville - Anderson", | |
"770"=>"Salt Lake City", | |
"641"=>"San Antonio", | |
"548"=>"West Palm Beach - Fort Pierce", | |
"563"=>"Grand Rapids - Kalamazoo - Battle Creek", | |
"630"=>"Birmingham (Anniston and Tuscaloosa)", | |
"566"=>"Harrisburg - Lancaster - Lebanon - York", | |
"544"=>"Norfolk - Portsmouth - Newport News", | |
"622"=>"New Orleans", | |
"640"=>"Memphis", | |
"650"=>"Oklahoma City", | |
"790"=>"Albuquerque - Santa Fe", | |
"518"=>"Greensboro - High Point - Winston-Salem", | |
"839"=>"Las Vegas", | |
"514"=>"Buffalo", | |
"529"=>"Louisville", | |
"521"=>"Providence - New Bedford", | |
"561"=>"Jacksonville, Brunswick", | |
"635"=>"Austin, TX", | |
"577"=>"Wilkes Barre - Scranton", | |
"532"=>"Albany - Schenectady - Troy", | |
"866"=>"Fresno - Visalia", | |
"693"=>"Little Rock - Pine Bluff", | |
"557"=>"Knoxville", | |
"542"=>"Dayton", | |
"556"=>"Richmond - Petersburg", | |
"671"=>"Tulsa", | |
"686"=>"Mobile - Pensacola (Fort Walton Beach)", | |
"541"=>"Lexington", | |
"564"=>"Charleston-Huntington", | |
"513"=>"Flint - Saginaw - Bay City", | |
"571"=>"Fort Myers - Naples", | |
"678"=>"Wichita - Hutchinson", | |
"573"=>"Roanoke - Lynchburg", | |
"658"=>"Green Bay - Appleton", | |
"547"=>"Toledo", | |
"789"=>"Tucson (Sierra Vista)", | |
"744"=>"Honolulu", | |
"679"=>"Des Moines - Ames", | |
"500"=>"Portland - Auburn", | |
"652"=>"Omaha", | |
"555"=>"Syracuse", | |
"619"=>"Springfield, MO", | |
"881"=>"Spokane", | |
"538"=>"Rochester, NY", | |
"632"=>"Paducah - Cape Girardeau - Harrisburg - Mt Vernon", | |
"612"=>"Shreveport", | |
"648"=>"Champaign & Springfield - Decatur", | |
"546"=>"Columbia, SC", | |
"691"=>"Huntsville - Decatur (Florence)", | |
"669"=>"Madison", | |
"575"=>"Chattanooga", | |
"588"=>"South Bend - Elkhart", | |
"637"=>"Cedar Rapids - Waterloo & Dubuque", | |
"718"=>"Jackson, MS", | |
"523"=>"Burlington - Plattsburgh", | |
"531"=>"Tri-Cities, TN-VA", | |
"636"=>"Harlingen - Weslaco - Brownsville - McAllen", | |
"625"=>"Waco - Temple - Bryan", | |
"682"=>"Davenport - Rock Island - Moline", | |
"716"=>"Baton Rouge", | |
"507"=>"Savannah", | |
"574"=>"Johnstown - Altoona", | |
"765"=>"El Paso", | |
"649"=>"Evansville", | |
"519"=>"Charleston, SC", | |
"536"=>"Youngstown", | |
"722"=>"Lincoln & Hastings - Kearney", | |
"670"=>"Fort Smith - Fayetteville - Springdale - Rogers", | |
"545"=>"Greenville - New Bern - Washington", | |
"509"=>"Fort Wayne", | |
"570"=>"Florence - Myrtle Beach", | |
"543"=>"Springfield - Holyoke", | |
"530"=>"Tallahassee - Thomasville", | |
"551"=>"Lansing", | |
"709"=>"Tyler - Longview (Lufkin & Nacogdoches)", | |
"811"=>"Reno", | |
"540"=>"Traverse City - Cadillac", | |
"725"=>"Sioux Falls (Mitchell)", | |
"520"=>"Augusta", | |
"698"=>"Montgomery (Selma)", | |
"675"=>"Peoria - Bloomington", | |
"724"=>"Fargo - Valley City", | |
"757"=>"Boise", | |
"503"=>"Macon", | |
"801"=>"Eugene", | |
"855"=>"Santa Barbara - Santa Maria - San Luis Obispo", | |
"702"=>"La Crosse - Eau Claire", | |
"642"=>"Lafayette, LA", | |
"828"=>"Monterey - Salinas", | |
"810"=>"Yakima - Pasco - Richland - Kennewick", | |
"673"=>"Columbus - Tupelo - West Point", | |
"800"=>"Bakersfield", | |
"600"=>"Corpus Christi", | |
"868"=>"Chico - Redding", | |
"634"=>"Amarillo", | |
"522"=>"Columbus, GA", | |
"610"=>"Rockford", | |
"705"=>"Wausau - Rhinelander", | |
"628"=>"Monroe - El Dorado", | |
"605"=>"Topeka", | |
"676"=>"Duluth - Superior", | |
"604"=>"Columbia - Jefferson City", | |
"550"=>"Wilmington", | |
"692"=>"Beaumont - Port Arthur", | |
"813"=>"Medford - Klamath Falls", | |
"516"=>"Erie", | |
"624"=>"Sioux City", | |
"627"=>"Wichita Falls & Lawton", | |
"603"=>"Joplin - Pittsburg", | |
"651"=>"Lubbock", | |
"525"=>"Albany, GA", | |
"576"=>"Salisbury", | |
"559"=>"Bluefield - Beckley - Oak Hill", | |
"581"=>"Terre Haute", | |
"537"=>"Bangor", | |
"611"=>"Rochester - Mason City - Austin", | |
"804"=>"Palm Springs", | |
"554"=>"Wheeling - Steubenville", | |
"743"=>"Anchorage", | |
"502"=>"Binghamton", | |
"656"=>"Panama City", | |
"746"=>"Biloxi - Gulfport", | |
"633"=>"Odessa - Midland", | |
"687"=>"Minot - Bismarck - Dickinson", | |
"657"=>"Sherman, TX - Ada, OK", | |
"592"=>"Gainesville", | |
"758"=>"Idaho Falls - Pocatello", | |
"662"=>"Abilene - Sweetwater", | |
"598"=>"Clarksburg - Weston", | |
"526"=>"Utica", | |
"710"=>"Hattiesburg - Laurel", | |
"762"=>"Missoula", | |
"717"=>"Quincy - Hannibal - Keokuk", | |
"771"=>"Yuma - El Centro", | |
"756"=>"Billings", | |
"606"=>"Dothan", | |
"565"=>"Elmira", | |
"639"=>"Jackson, TN", | |
"643"=>"Lake Charles", | |
"644"=>"Alexandria, LA", | |
"764"=>"Rapid City", | |
"549"=>"Watertown", | |
"734"=>"Jonesboro", | |
"553"=>"Marquette", | |
"569"=>"Harrisonburg", | |
"647"=>"Greenwood - Greenville", | |
"736"=>"Bowling Green", | |
"711"=>"Meridian", | |
"558"=>"Lima", | |
"584"=>"Charlottesville", | |
"773"=>"Grand Junction - Montrose", | |
"749"=>"Laredo", | |
"755"=>"Great Falls", | |
"597"=>"Parkersburg", | |
"582"=>"Lafayette, IN", | |
"760"=>"Twin Falls", | |
"754"=>"Butte - Bozeman", | |
"802"=>"Eureka", | |
"759"=>"Cheyenne - Scottsbluff", | |
"821"=>"Bend, OR", | |
"661"=>"San Angelo", | |
"767"=>"Casper - Riverton", | |
"631"=>"Ottumwa - Kirksville", | |
"737"=>"Mankato", | |
"638"=>"Saint Joseph", | |
"596"=>"Zanesville", | |
"745"=>"Fairbanks", | |
"552"=>"Presque Isle", | |
"626"=>"Victoria", | |
"766"=>"Helena", | |
"747"=>"Juneau", | |
"583"=>"Alpena", | |
"740"=>"North Platte", | |
"798"=>"Glendive" | |
} | |
@input = ARGV[0] | |
# Confirm it's a CSV | |
unless @input =~ /(\.csv)$/ | |
exit_with_help() | |
end | |
# Check for column flag | |
unless ARGV[1] == '-c' | |
exit_with_help() | |
end | |
# Check for column numeral | |
unless ARGV[2].is_i? | |
exit_with_help() | |
end | |
@column = ARGV[2].to_i | |
# ---------------------------------------- | |
# FILE SETUP: Load the CSV, check that the DMA column looks like codes, detect header presence | |
header_flag = true | |
CSV.foreach(@input) do | row | | |
header_flag = false if $. == 1 and row[@column] =~ /^\d{3}$/ | |
if $. == 2 and row[@column] !~ /^\d{3}$/ | |
puts "The specified column does not appear to be DMA codes." | |
exit_with_help | |
end | |
break if $. == 3 | |
end | |
CSV do | csv_out | | |
CSV.foreach(@input) do | row | | |
# Handle header | |
if $. == 1 and header_flag | |
row.insert(@column + 1, "DMA Name") | |
else | |
dma_name = @dmas[row[@column]] | |
row.insert(@column + 1, dma_name) | |
end | |
csv_out << row | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment