Last active
October 30, 2017 00:43
-
-
Save djfan/487ceb2631a095f0887d2c925e3a06ad to your computer and use it in GitHub Desktop.
1:Size 2:Look_up Looping 3:Campaign -> Brands 4:Publisher
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import re | |
''' | |
Size | |
''' | |
# Input | |
input_address = './DCM Lookup Table_Updated.xlsx' | |
output_address = './Size_Output.xlsx' | |
sheet_name = 'Sheet2' | |
colname = 'Size' | |
# Load Data | |
data = pd.read_excel(input_address, sheetname=sheet_name) | |
# Patterns | |
# pa1 = re.compile(r"video|_OLV_") | |
# pa2 = re.compile(r"[0-9\s]+x[0-9\s]+") | |
pa2 = re.compile(r"[0-9]+x[0-9]+") | |
# pa21 = re.compile(r"[_\s]([0-9\s]+x[0-9\s]+)") | |
# pa22 = re.compile(r"[_\w]([0-9][0-9\s]*x[0-9\s]+)") | |
pa3 = re.compile(r"[0-9]+s|[_:]15[_\s]|[_:]30[_\s]|[_:]45[_\s]|[_:]60[_\s]") | |
pa4 = re.compile(r'[0-9]+s') | |
# data pre-processing | |
d = data.loc[:,'Ad group'] | |
d = d.dropna() | |
# Video or Display | |
video = [] | |
for i in range(len(d)): | |
flag = (d[i].lower().find('video') != -1) or (d[i].lower().find('_OLV_') != -1) | |
if flag: | |
video.append(1) | |
else: | |
video.append(0) | |
# remove space function | |
remove_space = lambda x: x.replace(' ', '') | |
# extract size for each record | |
all_size = [] | |
for i in range(len(d)): | |
result = re.findall(pa3, d[i]) | |
if len(result) != 0: | |
res_sec = re.findall(pa4, result[0]) | |
if len(res_sec) > 0: | |
all_size.append(result) | |
else: | |
all_size.append([result[0].replace('_', '').replace(' ','').replace(':', '') + 's']) | |
else: | |
result = re.findall(pa2, d[i].replace(' x ', 'x')) | |
res_no_space = map(remove_space, result) | |
res_unique = list(set(res_no_space)) | |
if len(res_unique) > 1: | |
try: | |
res_unique.remove(u'1x1') | |
res_unique.remove(u'0x0') | |
except ValueError: | |
pass | |
if (len(res_unique) > 1 or res_unique == [u'0x0']): | |
all_size.append(False) | |
else: | |
if video[i] == 1 and res_unique == [u'1x1']: | |
all_size.append(False) | |
else: | |
all_size.append(res_unique) | |
all_size = [s[0] if s else False for s in all_size] | |
print('length:', len(all_size), '#False:', all_size.count(False)) | |
#Write Out | |
data[colname] = all_size | |
data.to_excel(output_address) | |
''' | |
Look_up Looping | |
''' | |
# address | |
add_sizeLookUp = './size lookup.xlsx' | |
add_data = './DCM Lookup Table_Updated.xlsx' | |
add_output = 'look_up_result.xlsx' | |
sheet_name = 'Sheet2' | |
colname = 'look_up' | |
# read data: target & data | |
size_look_up = pd.read_excel(add_sizeLookUp) | |
target = size_look_up['find'].values; print target | |
data = pd.read_excel(add_data, sheetname = sheet_name) | |
# sample data | |
data2 = data.copy().head(3) | |
ad_group = data2.loc[:, 'Ad group'] | |
# loop | |
result_all = [] | |
for ad in ad_group: | |
result = [] | |
for t in target: | |
if t in ad: | |
result.append(t) | |
if len(result) == 0: | |
result = False | |
result_all.append(result) | |
# output data | |
data2[colname] = result_all | |
data2.to_excel(add_output) | |
''' | |
Campaign -> Brands | |
''' | |
colname = 'Brand' | |
# read data | |
data = pd.read_excel('./DCM Lookup Table_Updated.xlsx',sheetname='Sheet2') | |
campaign = data.Campaign | |
# loop | |
brands = [] | |
for s in campaign: | |
s = s.lower() | |
if s.find('evinrude') != -1: | |
brands.append('Evinrude') | |
elif s.find('spyder') != -1: | |
brands.append('Spyder') | |
elif s.find('ski-doo') != -1: | |
brands.append('Ski-doo') | |
elif s.find('sea-doo') != -1: | |
brands.append('Sea-doo') | |
else: | |
brands.append('Can-Am ORV') | |
# append to corresponding column | |
data[colname] = brands | |
''' | |
Publisher | |
''' | |
add_input = './DCM Lookup Table_Updated.xlsx' | |
sheet_name = 'Sheet1' | |
add_output = './type.xlsx' | |
colname = 'Type' | |
data = pd.read_excel(add_input, sheetname=sheet_name) | |
data = data.dropna() | |
df = data.loc[:,['Publisher', 'Ad group', 'Size', 'Channel']] | |
df.Size = map(lambda x: x.lower(), df.Size) | |
PBU = ['PBU Canada', 'Appnexus', 'DOUBLECLICK TECH', 'TubeMogul'] | |
video_pattern = re.compile(r"[0-9]+s") | |
results = [] | |
for i in range(len(df)): | |
if df.loc[i, 'Channel'] == 'Display-TrueView': | |
results.append('PBU Video') | |
else: | |
if df.loc[i, 'Publisher'] in PBU: | |
if len(re.findall(video_pattern, df.loc[i, 'Size'])) > 0: | |
results.append('PBU Video') | |
else: | |
results.append('PBU Display') | |
else: | |
if len(re.findall(video_pattern, df.loc[i, 'Size'])) > 0: | |
results.append('Contextual Video') | |
else: | |
results.append('Contextual Display') | |
# print results.count('PBU Video'), results.count('PBU Display'), results.count('Contextual Video'), results.count('Contextual Display') | |
data[colname] = results | |
data.to_excel(add_output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment