MikeiLL/convert googleDoc to CSV

## convert googleDoc to CSV
"""
Import Google Doc and export to CVS formatted for WP import.

Python3

First use Requests to get the file as recommended: http://www.madhur.co.in/blog/2016/05/13/google-docs-spreadsheet.html

Check to make sure that the file headers are what we expect and if not, exit.

If headers do match, create rows in the new file, matching the needed columns with those from the import.

"""

import requests
import csv
import io

headers={}
headers["User-Agent"]= "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0"
headers["DNT"]= "1"
headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
headers["Accept-Encoding"] = "deflate"
headers["Accept-Language"]= "en-US,en;q=0.5"

file_id="some_google_doc_file_id"
url = "https://docs.google.com/spreadsheets/d/{0}/export?format=csv".format(file_id)

r = requests.get(url)

f1 = open('counties.csv', 'w')

sio = io.StringIO( r.text, newline=None)

# Headers our WP setup requires for WP Ultimate CSV Import Pro Plugin
new_headers = ["post_title","ID","post_content","post_excerpt","post_date","post_name","post_author","post_status","featured_image","nextgen-gallery","post_parent","_address","_county_chair","_edit_last","_edit_lock","_email","_google_map_address","_group_name","_phone_number","_website","_wp_old_slug","address","county_chair","email","google_map_address","group_name","phone_number","website","post_category","post_tag"]

# The headers in the current Google Doc
expected_headers = ["County", "First", "Last", "Mail Address", "Mail City", "Mail Zip", "Street Address", "Ship City ", "Ship Zip", "Work Phone", "Home Phone", "Cell Phone", "Fax Number", "Personal Email", "Email", "Headquarter Phone", "Headquarter Fax", "Website", "Executive Director", "Executive Director Email"]

reader = csv.DictReader(sio, dialect=csv.excel, fieldnames=expected_headers)
writer = csv.DictWriter(f1, fieldnames=new_headers, quoting=csv.QUOTE_ALL)

# If headers differ we have a problem
if((reader.fieldnames>expected_headers)-(reader.fieldnames<expected_headers) != 0):
	print('Headers do not match. Exiting.')
	exit()

# Pull the header row out of the reader as we don't need it.
next(reader, None)

rownum = 0

# Begin our new file
writer.writeheader()

for row in reader:
	if rownum == 1:
		# Provide some feedback
		print('{} Headers in the fetched file match the expected headers.'.format(len(row)))
		print('Generating new file with {} headers.'.format(len(new_headers)))
	for col in row:
		# strip out start and trailing whitespace
		row[col] = row[col].lstrip()
		row[col] = row[col].rstrip()
	# Start building out the rows in the new file
	new_row = {
				"post_title": row['County'],
				"address": row['Mail Address'],
				"county_chair": row['First'] + ' ' + row['Last'],
				"email": row['County'],
				# "google_map_address": row['County'],
				"group_name": row['County'] + ' ' + 'Democrats',
				"phone_number": row['Work Phone'],
				"email": row['Email'],
				"website": row['Website']
			}
	writer.writerow(new_row)

	rownum = rownum + 1

print('Wrote {} rows.'.format(rownum))

f1.close()
	"""
	Import Google Doc and export to CVS formatted for WP import.

	Python3

	First use Requests to get the file as recommended: http://www.madhur.co.in/blog/2016/05/13/google-docs-spreadsheet.html

	Check to make sure that the file headers are what we expect and if not, exit.

	If headers do match, create rows in the new file, matching the needed columns with those from the import.

	"""

	import requests
	import csv
	import io

	headers={}
	headers["User-Agent"]= "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0"
	headers["DNT"]= "1"
	headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8"
	headers["Accept-Encoding"] = "deflate"
	headers["Accept-Language"]= "en-US,en;q=0.5"

	file_id="some_google_doc_file_id"
	url = "https://docs.google.com/spreadsheets/d/{0}/export?format=csv".format(file_id)

	r = requests.get(url)

	f1 = open('counties.csv', 'w')

	sio = io.StringIO( r.text, newline=None)

	# Headers our WP setup requires for WP Ultimate CSV Import Pro Plugin
	new_headers = ["post_title","ID","post_content","post_excerpt","post_date","post_name","post_author","post_status","featured_image","nextgen-gallery","post_parent","_address","_county_chair","_edit_last","_edit_lock","_email","_google_map_address","_group_name","_phone_number","_website","_wp_old_slug","address","county_chair","email","google_map_address","group_name","phone_number","website","post_category","post_tag"]

	# The headers in the current Google Doc
	expected_headers = ["County", "First", "Last", "Mail Address", "Mail City", "Mail Zip", "Street Address", "Ship City ", "Ship Zip", "Work Phone", "Home Phone", "Cell Phone", "Fax Number", "Personal Email", "Email", "Headquarter Phone", "Headquarter Fax", "Website", "Executive Director", "Executive Director Email"]

	reader = csv.DictReader(sio, dialect=csv.excel, fieldnames=expected_headers)
	writer = csv.DictWriter(f1, fieldnames=new_headers, quoting=csv.QUOTE_ALL)

	# If headers differ we have a problem
	if((reader.fieldnames>expected_headers)-(reader.fieldnames<expected_headers) != 0):
	print('Headers do not match. Exiting.')
	exit()

	# Pull the header row out of the reader as we don't need it.
	next(reader, None)

	rownum = 0

	# Begin our new file
	writer.writeheader()

	for row in reader:
	if rownum == 1:
	# Provide some feedback
	print('{} Headers in the fetched file match the expected headers.'.format(len(row)))
	print('Generating new file with {} headers.'.format(len(new_headers)))
	for col in row:
	# strip out start and trailing whitespace
	row[col] = row[col].lstrip()
	row[col] = row[col].rstrip()
	# Start building out the rows in the new file
	new_row = {
	"post_title": row['County'],
	"address": row['Mail Address'],
	"county_chair": row['First'] + ' ' + row['Last'],
	"email": row['County'],
	# "google_map_address": row['County'],
	"group_name": row['County'] + ' ' + 'Democrats',
	"phone_number": row['Work Phone'],
	"email": row['Email'],
	"website": row['Website']
	}
	writer.writerow(new_row)

	rownum = rownum + 1

	print('Wrote {} rows.'.format(rownum))

	f1.close()