Skip to content

Instantly share code, notes, and snippets.

@djfan
Last active October 30, 2017 03:38
Show Gist options
  • Save djfan/a2c92650a186892a531c6532984b7422 to your computer and use it in GitHub Desktop.
Save djfan/a2c92650a186892a531c6532984b7422 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Spyder Editor
This is a temporary script file.
"""
import pandas as pd
import numpy as np
import re
add_input = './DCM Lookup Table.xlsx'
sheet_name = 'Sheet2'
add_output = './output2.xlsx'
#lookup Channel
df1=pd.read_excel(add_input, sheetname=sheet_name)
df1 = df1.dropna(how='all')
df1['Channel'] = np.where(df1['Ad group'].astype(str).str.contains('TrueView')|df1['Ad group'].astype(str).str.contains('trueview')|df1['Ad group'].astype(str).str.contains('truewiew')|df1['Ad group'].astype(str).str.contains('_YT_')==True, 'Display-TrueView', 'Display')
print('Channel Finished')
#lookup Brand
colname = 'Brand'
campaign = df1.Campaign
brands = []
for s in campaign:
s = s.lower()
if s.find('evinrude') != -1:
brands.append('Evinrude')
elif s.find('spyder') != -1:
brands.append('Spyder')
elif s.find('ski-doo') != -1:
brands.append('Ski-doo')
elif s.find('sea-doo') != -1:
brands.append('Sea-doo')
else:
brands.append('Can-Am ORV')
# append to corresponding column
df1[colname] = brands
print('Brand Finished')
#Lookup Size
colname = 'Size'
# Patterns
# pa1 = re.compile(r"video|_OLV_")
# pa2 = re.compile(r"[0-9\s]+x[0-9\s]+")
pa2 = re.compile(r"[0-9]+x[0-9]+")
# pa21 = re.compile(r"[_\s]([0-9\s]+x[0-9\s]+)")
# pa22 = re.compile(r"[_\w]([0-9][0-9\s]*x[0-9\s]+)")
pa3 = re.compile(r"[0-9]+s|[_:]15[_\s]|[_:]30[_\s]|[_:]45[_\s]|[_:]60[_\s]")
pa4 = re.compile(r'[0-9]+s')
# data pre-processing
d = df1.loc[:,'Ad group']
d = d.dropna()
# Video or Display
video = []
for i in range(len(d)):
flag = (d[i].lower().find('video') != -1) or (d[i].find('_OLV_') != -1)
if flag:
video.append(1)
else:
video.append(0)
# remove space function
remove_space = lambda x: x.replace(' ', '')
# extract size for each record
all_size = []
for i in range(len(d)):
result = re.findall(pa3, d[i])
if len(result) != 0:
res_sec = re.findall(pa4, result[0])
if len(res_sec) > 0:
all_size.append(result)
else:
all_size.append([result[0].replace('_', '').replace(' ','').replace(':', '') + 's'])
else:
result = re.findall(pa2, d[i].replace(' x ', 'x'))
res_no_space = map(remove_space, result)
res_unique = list(set(res_no_space))
if len(res_unique) > 1:
try:
res_unique.remove(u'1x1')
res_unique.remove(u'0x0')
except ValueError:
pass
if (len(res_unique) > 1 or res_unique == [u'0x0']):
all_size.append(False)
else:
if video[i] == 1 and res_unique == [u'1x1']:
all_size.append(False)
else:
all_size.append(res_unique)
all_size = [s[0] if s else False for s in all_size]
print('length:', len(all_size), '#False:', all_size.count(False))
df1[colname] = all_size
print('Size Finished')
# '''
# Partner Type
# '''
colname = 'Partner Type'
df = df1.loc[:,['Publisher', 'Ad group', 'Size', 'Channel']]
df.Size = df.Size
# df.Size = map(lambda x: x.lower(), df.Size)
# df.Size = [s.lower() if s else False for s in df.Size]
# print df.Size[0]
PBU = ['PBU Canada', 'Appnexus', 'DOUBLECLICK TECH', 'TubeMogul']
video_pattern = re.compile(r"[0-9]+s")
results = []
for i in range(len(df)):
if df.loc[i, 'Channel'] == 'Display-TrueView':
results.append('PBU Video')
else:
if df.loc[i, 'Publisher'] in PBU:
try:
flag = len(re.findall(video_pattern, df.loc[i, 'Size'])) > 0
except TypeError:
flag = False
if flag:
results.append('PBU Video')
else:
results.append('PBU Display')
else:
try:
flag = len(re.findall(video_pattern, df.loc[i, 'Size'])) > 0
except TypeError:
flag = False
if flag:
results.append('Contextual Video')
else:
results.append('Contextual Display')
print(results.count('PBU Video'), results.count('PBU Display'), results.count('Contextual Video'), results.count('Contextual Display'))
df1[colname] = results
print('Partner Type Finished')
# write out
df1.to_excel(add_output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment