This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from lxml import etree | |
from lxml.etree import fromstring | |
import csv | |
username ="" | |
password ="" | |
count = 0 | |
column = 0 | |
list = [] | |
headers = [] | |
#method to perform post requests | |
def post_req(): | |
host = "https://xxx.xx/" | |
path = "/xxx/xx.asmx/Authenticate" | |
post_url=host+path | |
rq = requests.post(post_url, data={'Username':username,'Password':password}) | |
return rq | |
#method to perform get requests | |
def get_req(): | |
host = "https://xxx.xx/" | |
path = "/xxx/xx.asmx/Authenticate" | |
get_url=host+path | |
rq = requests.get(get_url, auth=(username,password)) | |
# print(rq.status_code, rq.reason, rq.text) | |
return rq | |
def parse_data_write_to_csv(count,column): | |
req = post_req() | |
data = req.content | |
root = etree.fromstring(data) | |
#find all table tags in the xml doc | |
elements = root.findall('.//Table') | |
#loop through the elements and write the xml file headers to the csv file only on the first iteration | |
#write the xml rows to the csv doc on the subsequent iterations till all the elements are exhausted | |
with open('pwc.csv', 'wb') as myfile: | |
writer = csv.writer(myfile, delimiter='|') | |
for element in elements: | |
if (count<1): | |
for el in element.getchildren(): | |
headers.append(el.tag) | |
count += 1 | |
writer.writerow(headers) | |
for ele in element.getchildren(): | |
list.append(ele.text) | |
column += 1 | |
# list.append(etree.tostring(element, method='text')) | |
if (column == 39): | |
writer.writerow(list) | |
column = 0 | |
##########Start To do define function create_bq_dataset_tables | |
# param csv file | |
# param dataset_name | |
# param table_name | |
def create_bq_dataset_tables(csv_file, dataset_name, table_name): | |
bq_client = bigquery.Client(project='xxxxx-xxx-xx') | |
# bq_client = bigquery.Client() | |
# authenticate bq user | |
# TO DO change to use json service account | |
# bq_get_headers from sheets and make formatted json string | |
dataset = bq_client.dataset(dataset_name) | |
schema = "" | |
list = [] | |
with open(csv_file, "rb") as f: | |
d_reader = csv.DictReader(f) | |
headers = (str(d_reader.fieldnames).replace("['", "")).replace("']", "").split("|") | |
arr_editor = Array_Editor(headers) | |
headers = arr_editor.prepend_underscore(arr_editor.get_unique_list(headers)) | |
for line in headers: | |
list.append(re.sub('[^0-9a-zA-Z]+', '_', str.strip(line)) + ":string,") | |
schema_str = (''.join(map(str, list)))[:-1] | |
schema = schema_str | |
if not dataset.exists(): | |
# execute create table statement after copying existing dataset | |
dataset.create() | |
format_csv_file = ' awk \'{if(t){print;t=0;next;}x=$0;n=gsub(/"/,"",x);if(n%2){printf $0" ";t=1;}else print $0} \' ' + csv_file + ">" + csv_file + "_bq" | |
bq_delete_table_str = "~/google-cloud-sdk/bin/bootstrapping/bq.py rm -f -t " + dataset_name + "." + table_name | |
bq_create_table_str = "~/google-cloud-sdk/bin/bootstrapping/bq.py load --field_delimiter='|' --skip_leading_rows=1 --encoding=UTF-8 --max_bad_records=150 " + dataset_name + "." + re.sub( | |
'[^0-9a-zA-Z]+', '_', table_name) + " '" + csv_file + "_bq' " + schema | |
# To do check if table exist and copy to backup table and drop existing tables | |
# To do log out to txt file | |
# rename former csv file %date:~10,4%%date:~7,2%%date:~4,2%_%time:~0,2%%time:~3,2% | |
os.system(format_csv_file) | |
# delete current bq table | |
os.system(bq_delete_table_str) | |
os.system(bq_create_table_str) | |
if __name__=='__main__': | |
parse_data_write_to_csv(count,column) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment