Skip to content

Instantly share code, notes, and snippets.

View djfan's full-sized avatar

Dongjie Fan djfan

  • NY, NY
View GitHub Profile
@djfan
djfan / DCM.py
Last active October 30, 2017 00:43
1:Size 2:Look_up Looping 3:Campaign -> Brands 4:Publisher
import pandas as pd
import numpy as np
import re
'''
Size
'''
# Input
input_address = './DCM Lookup Table_Updated.xlsx'
output_address = './Size_Output.xlsx'
@djfan
djfan / DCM2.py
Last active October 30, 2017 03:38
# -*- coding: utf-8 -*-
"""
Spyder Editor
This is a temporary script file.
"""
import pandas as pd
import numpy as np
import re
@djfan
djfan / DCM3.py
Last active October 31, 2017 05:56
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 30 09:26:59 2017
@author: yvette.wang
"""
import pandas as pd
import numpy as np
import re
import pandas as pd
df = pd.read_excel('./searchall.xlsx', sheetname='Sheet1')
df1 = df.copy()
print df1['Partner Type (Same)'].value_counts()
df1['Partner Type (Same)'] = df1['Partner Type (Same)'].fillna('Search')
print df1['Partner Type (Same)'].value_counts()
# df1['Partner Type (Same)']
import pandas as pd
import numpy as np
meta = pd.read_excel('./Search Lookup Table.xlsx')
meta['Raw file name'] = meta['Raw file name'].fillna(False)
meta['address'] = zip(meta['Country'], meta['file name'])
# meta
temp_col = meta.Template.unique()
# temp_col
# replace 'temp = df[.......'
df.loc[df.Channel == 'Display-TrueView', ['Cost', 'Clicks']] = 0 # should be ['Cost', 'Clicks', ...., 'Video....']
df.loc[df['Partner...'].isin([...])......, 'Cost'] = 0
df.to_excel(...)
import pandas as pd
add_input = './Desktop/DCM_output.xlsx'
sheet_name = 'Sheet1'
add_output2 = './Desktop/spendcleanoutput.xlsx'
df=pd.read_excel(add_input, sheetname=sheet_name)
df = df.dropna(how='all')
df.loc[df.Channel=='Display-TrueView',['Cost','Views','Clicks','Video played to 25%','Video played to 50%','Video played to 75%','Video played to 100%']] = 0
@djfan
djfan / lib_version.txt
Created November 20, 2017 19:45
python lib version in Azure cluster
Sphinx 1.4.6
setuptools 27.2.0
Babel 2.3.4
argcomplete 1.0.0
decorator 4.0.10
mistune 0.7.3
chest 0.2.3
jupyter-client 4.4.0
widgetsnbextension 1.2.6
alabaster 0.7.9
@djfan
djfan / fix_gdal_py2.sh
Last active December 18, 2017 20:35
azure cluster
#!/usr/bin/env bash
/usr/bin/anaconda/bin/pip install -r https://gist.githubusercontent.com/djfan/ccdfac26415196021adacb2d52455c60/raw/36e12d84a418d2274ef79b1d853073cc200535cc/r.txt
#/usr/bin/anaconda/bin/conda update --all
/usr/bin/anaconda/bin/conda install hdf4
#!/usr/bin/env bash
git clone https://github.com/djfan/magellan.git
cd magellan
git checkout mb/fix-pyspark-support
cd python
/usr/bin/anaconda/bin/python setup.py install
/usr/bin/anaconda/envs/py35/bin/python setup.py install