Last active
October 30, 2017 03:38
-
-
Save djfan/a2c92650a186892a531c6532984b7422 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Spyder Editor | |
This is a temporary script file. | |
""" | |
import pandas as pd | |
import numpy as np | |
import re | |
add_input = './DCM Lookup Table.xlsx' | |
sheet_name = 'Sheet2' | |
add_output = './output2.xlsx' | |
#lookup Channel | |
df1=pd.read_excel(add_input, sheetname=sheet_name) | |
df1 = df1.dropna(how='all') | |
df1['Channel'] = np.where(df1['Ad group'].astype(str).str.contains('TrueView')|df1['Ad group'].astype(str).str.contains('trueview')|df1['Ad group'].astype(str).str.contains('truewiew')|df1['Ad group'].astype(str).str.contains('_YT_')==True, 'Display-TrueView', 'Display') | |
print('Channel Finished') | |
#lookup Brand | |
colname = 'Brand' | |
campaign = df1.Campaign | |
brands = [] | |
for s in campaign: | |
s = s.lower() | |
if s.find('evinrude') != -1: | |
brands.append('Evinrude') | |
elif s.find('spyder') != -1: | |
brands.append('Spyder') | |
elif s.find('ski-doo') != -1: | |
brands.append('Ski-doo') | |
elif s.find('sea-doo') != -1: | |
brands.append('Sea-doo') | |
else: | |
brands.append('Can-Am ORV') | |
# append to corresponding column | |
df1[colname] = brands | |
print('Brand Finished') | |
#Lookup Size | |
colname = 'Size' | |
# Patterns | |
# pa1 = re.compile(r"video|_OLV_") | |
# pa2 = re.compile(r"[0-9\s]+x[0-9\s]+") | |
pa2 = re.compile(r"[0-9]+x[0-9]+") | |
# pa21 = re.compile(r"[_\s]([0-9\s]+x[0-9\s]+)") | |
# pa22 = re.compile(r"[_\w]([0-9][0-9\s]*x[0-9\s]+)") | |
pa3 = re.compile(r"[0-9]+s|[_:]15[_\s]|[_:]30[_\s]|[_:]45[_\s]|[_:]60[_\s]") | |
pa4 = re.compile(r'[0-9]+s') | |
# data pre-processing | |
d = df1.loc[:,'Ad group'] | |
d = d.dropna() | |
# Video or Display | |
video = [] | |
for i in range(len(d)): | |
flag = (d[i].lower().find('video') != -1) or (d[i].find('_OLV_') != -1) | |
if flag: | |
video.append(1) | |
else: | |
video.append(0) | |
# remove space function | |
remove_space = lambda x: x.replace(' ', '') | |
# extract size for each record | |
all_size = [] | |
for i in range(len(d)): | |
result = re.findall(pa3, d[i]) | |
if len(result) != 0: | |
res_sec = re.findall(pa4, result[0]) | |
if len(res_sec) > 0: | |
all_size.append(result) | |
else: | |
all_size.append([result[0].replace('_', '').replace(' ','').replace(':', '') + 's']) | |
else: | |
result = re.findall(pa2, d[i].replace(' x ', 'x')) | |
res_no_space = map(remove_space, result) | |
res_unique = list(set(res_no_space)) | |
if len(res_unique) > 1: | |
try: | |
res_unique.remove(u'1x1') | |
res_unique.remove(u'0x0') | |
except ValueError: | |
pass | |
if (len(res_unique) > 1 or res_unique == [u'0x0']): | |
all_size.append(False) | |
else: | |
if video[i] == 1 and res_unique == [u'1x1']: | |
all_size.append(False) | |
else: | |
all_size.append(res_unique) | |
all_size = [s[0] if s else False for s in all_size] | |
print('length:', len(all_size), '#False:', all_size.count(False)) | |
df1[colname] = all_size | |
print('Size Finished') | |
# ''' | |
# Partner Type | |
# ''' | |
colname = 'Partner Type' | |
df = df1.loc[:,['Publisher', 'Ad group', 'Size', 'Channel']] | |
df.Size = df.Size | |
# df.Size = map(lambda x: x.lower(), df.Size) | |
# df.Size = [s.lower() if s else False for s in df.Size] | |
# print df.Size[0] | |
PBU = ['PBU Canada', 'Appnexus', 'DOUBLECLICK TECH', 'TubeMogul'] | |
video_pattern = re.compile(r"[0-9]+s") | |
results = [] | |
for i in range(len(df)): | |
if df.loc[i, 'Channel'] == 'Display-TrueView': | |
results.append('PBU Video') | |
else: | |
if df.loc[i, 'Publisher'] in PBU: | |
try: | |
flag = len(re.findall(video_pattern, df.loc[i, 'Size'])) > 0 | |
except TypeError: | |
flag = False | |
if flag: | |
results.append('PBU Video') | |
else: | |
results.append('PBU Display') | |
else: | |
try: | |
flag = len(re.findall(video_pattern, df.loc[i, 'Size'])) > 0 | |
except TypeError: | |
flag = False | |
if flag: | |
results.append('Contextual Video') | |
else: | |
results.append('Contextual Display') | |
print(results.count('PBU Video'), results.count('PBU Display'), results.count('Contextual Video'), results.count('Contextual Display')) | |
df1[colname] = results | |
print('Partner Type Finished') | |
# write out | |
df1.to_excel(add_output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment