Skip to content

Instantly share code, notes, and snippets.

@jennirinker
Last active March 15, 2020 16:03
Show Gist options
  • Save jennirinker/d19da338740d2f522415 to your computer and use it in GitHub Desktop.
Save jennirinker/d19da338740d2f522415 to your computer and use it in GitHub Desktop.
Download local copy of online mat files

update-data

Routines to update local copies of M4 data from NREL

Summary

Routines to update local version of folder structure and .mat files from online directory at specified URL. Works for both 20-Hz and 10-min M4 data. Repository contains Python module with helper functions (URLDataDownload.py) and script to call from command line to perform update (update_nrel.py).

Currently structured for Windows file paths/filewrite options. To get it to work on Linux/Mac, modify the base paths below and the path construction/file writing flags in help function updateDirectory.

Usage

From a Python interpreter:
>>> execfile('update_nrel.py')
From terminal:
> python update_nrel.py

Contacts

For issues or questions about the data, email Andrew Clifton at Andrew.Clifton@nrel.gov. Lots of information can be found online at the NWTC portal. For issues with the script, email me at jennifer.rinker@duke.edu.

if (__name__ == '__main__'):
""" Do when called as script from command line.
"""
from URLDataDownload import updateDirectory
# 20-Hz base URL and directory
# *** NO TRAILING SLASHES ***
baseURL = 'http://wind.nrel.gov/MetData/' + \
'135mData/M4Twr/20Hz/mat'
basedir = 'G:\\data\\nrel-20Hz'
# 10-min base URL and directory
# *** NO TRAILING SLASHES ***
## baseURL = 'http://wind.nrel.gov/MetData/' + \
## '135mData/M4Twr/10min/mat'
## basedir = 'G:\\data\\nrel-10min'
# update the directory, return list of err'd files
errList = updateDirectory(baseURL,basedir)
# print a comment if any errors
if errList:
print('\n{} file(s) '.format(len(errList)) \
+ 'with errors saved in errList.')
# and that's it!
print('\nScript complete.\n')
def listURLTable(URL):
""" List of elements from a table located online at specified URL.
Args:
URL (string): address to webpage with table
Returns:
listTable (list): list of table contents at URL
"""
import requests # testing URLs
from bs4 import BeautifulSoup # web scraping
# check URL
result = requests.get(URL)
# if request successful
if result.status_code < 400:
# initialize list of table contents
listTable = []
# get table and extract rows
URLdata = result.content # webpage contents
soup = BeautifulSoup(URLdata) # organize content
table = soup.find('table') # find table
rows = table.find_all('tr') # separate rows
# loop through rows, cleaning and appending data to list
for row in rows:
cols = row.find_all('td') # column in row
cols = [ele.text.strip() for \
ele in cols] # extract text
if cols:
newRow = [str(ele) for \
ele in cols if ele] # remove empty, unicode -> str
listTable.append(newRow) # append to list
return listTable
# halt if URL request failed
else:
print('Error: URL request failed. for ' + \
URL)
return []
def updateDirectory(baseURL,basedir):
""" Update a directory at basedir so that the folders and .mat files match
that at the specified URL in baseURL. Returns a list of files with
issues loading the URL path.
Args:
baseURL (string): top-level URL to update
basedir (string): corresponding top-level local directory to update
Returns:
errList (list): list of files with errors, if any
"""
import os # testing existence of files
from datetime import datetime # checking mod dates
import requests # testing URLs
import urllib # downloading files
print('Entering ' + basedir)
# format of date string on M4 website
datestrfmt = '%d-%b-%Y %H:%M'
# initialize error list
errList = []
# get list of table elements on URL
tableList = listURLTable(baseURL)
# loop through rows in the table
for row in tableList:
ele = row[0] # first column
# if element is a folder
if ('/' in ele):
# create the URL/paths for subfolder
foldname = ele.strip('/')
foldURL = '/'.join([baseURL,foldname])
foldpath = '\\'.join([basedir,foldname])
# make the local folder if it doesn't exist
if not os.path.isdir(foldpath):
os.mkdir(foldpath)
# recursivley update the subfolder, save any errors
errList_new = updateDirectory(foldURL,foldpath)
errList.extend(errList_new)
# if element is a .mat file
elif ('.mat' in ele):
filename = ele
# create the URL/paths for specific file
fileURL = '/'.join([baseURL,filename])
filepath = '\\'.join([basedir,filename])
# if local file doesn't exist, download it
if not os.path.exists(filepath):
# check URL
if (requests.get(fileURL).status_code < 400):
# download file
webFile = urllib.urlopen(fileURL)
with open(filepath,'wb') as locfile:
locfile.write(webFile.read())
webFile.close()
# save error list if issue
else:
print('Error downloading {}'.format(filepath))
errList.extend([fileURL])
# if the local file does exist
else:
# get the date last modified
URLdatestr = row[1]
URLmoddate = datetime.strptime(URLdatestr,datestrfmt)
locmoddate = datetime.fromtimestamp( \
os.path.getmtime(filepath))
# download it if the URL is newer than the local version
if URLmoddate > locmoddate:
# check URL
if (requests.get(fileURL).status_code < 400):
# download file
webFile = urllib.urlopen(fileURL)
with open(filepath,'wb') as locfile:
locfile.write(webFile.read())
webFile.close()
# save error list if issue
else:
print('Error downloading {}'.format(filepath))
errList.extend([fileURL])
print(' Files at ' + basedir + ' are up to date.')
return errList
@aganbal
Copy link

aganbal commented Aug 3, 2017

Dear Jenni, I tried to use "update_data.py" but this is only updating function and no result was returned after I run.
and in depth, there is an error message regarding SNI to TLS is not available.
so i am wondering whether this is still working on.
and i could not find "update_nrel.py.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment