Last active
June 25, 2018 19:58
-
-
Save MikeiLL/47e43ea3e1d744779d2cea18f00de23c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Import Google Doc and export to CVS formatted for WP import. | |
Python3 | |
First use Requests to get the file as recommended: http://www.madhur.co.in/blog/2016/05/13/google-docs-spreadsheet.html | |
Check to make sure that the file headers are what we expect and if not, exit. | |
If headers do match, create rows in the new file, matching the needed columns with those from the import. | |
""" | |
import requests | |
import csv | |
import io | |
headers={} | |
headers["User-Agent"]= "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0" | |
headers["DNT"]= "1" | |
headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" | |
headers["Accept-Encoding"] = "deflate" | |
headers["Accept-Language"]= "en-US,en;q=0.5" | |
file_id="some_google_doc_file_id" | |
url = "https://docs.google.com/spreadsheets/d/{0}/export?format=csv".format(file_id) | |
r = requests.get(url) | |
f1 = open('counties.csv', 'w') | |
sio = io.StringIO( r.text, newline=None) | |
# Headers our WP setup requires for WP Ultimate CSV Import Pro Plugin | |
new_headers = ["post_title","ID","post_content","post_excerpt","post_date","post_name","post_author","post_status","featured_image","nextgen-gallery","post_parent","_address","_county_chair","_edit_last","_edit_lock","_email","_google_map_address","_group_name","_phone_number","_website","_wp_old_slug","address","county_chair","email","google_map_address","group_name","phone_number","website","post_category","post_tag"] | |
# The headers in the current Google Doc | |
expected_headers = ["County", "First", "Last", "Mail Address", "Mail City", "Mail Zip", "Street Address", "Ship City ", "Ship Zip", "Work Phone", "Home Phone", "Cell Phone", "Fax Number", "Personal Email", "Email", "Headquarter Phone", "Headquarter Fax", "Website", "Executive Director", "Executive Director Email"] | |
reader = csv.DictReader(sio, dialect=csv.excel, fieldnames=expected_headers) | |
writer = csv.DictWriter(f1, fieldnames=new_headers, quoting=csv.QUOTE_ALL) | |
# If headers differ we have a problem | |
if((reader.fieldnames>expected_headers)-(reader.fieldnames<expected_headers) != 0): | |
print('Headers do not match. Exiting.') | |
exit() | |
# Pull the header row out of the reader as we don't need it. | |
next(reader, None) | |
rownum = 0 | |
# Begin our new file | |
writer.writeheader() | |
for row in reader: | |
if rownum == 1: | |
# Provide some feedback | |
print('{} Headers in the fetched file match the expected headers.'.format(len(row))) | |
print('Generating new file with {} headers.'.format(len(new_headers))) | |
for col in row: | |
# strip out start and trailing whitespace | |
row[col] = row[col].lstrip() | |
row[col] = row[col].rstrip() | |
# Start building out the rows in the new file | |
new_row = { | |
"post_title": row['County'], | |
"address": row['Mail Address'], | |
"county_chair": row['First'] + ' ' + row['Last'], | |
"email": row['County'], | |
# "google_map_address": row['County'], | |
"group_name": row['County'] + ' ' + 'Democrats', | |
"phone_number": row['Work Phone'], | |
"email": row['Email'], | |
"website": row['Website'] | |
} | |
writer.writerow(new_row) | |
rownum = rownum + 1 | |
print('Wrote {} rows.'.format(rownum)) | |
f1.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment