Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?
One of the first python programs I wrote. For some reason, I chose to download the html and save to text files. I think I was having trouble working with the raw html with bs4. Anyway, I am leaving it 'as is' mostly for the sake of having a reference point in regards to how my python programming improve (or doesn't) over time.
#-----------------------------------------------------------------------------#
# Parses the converted html files to find the swell heights and conditions
# and then prints these out into formatted tables
#-----------------------------------------------------------------------------#
import urllib2
from bs4 import BeautifulSoup
# Scarpe the site and convert html to text
def html_to_text(address, text_file):
html = urllib2.urlopen(address).read()
soup = BeautifulSoup(html)
s_doc = soup.prettify()
target = open(text_file, 'w')
target.truncate()
target.write(html)
target.close()
# Parse the text file to extract forecast data
def parse_txt(txt):
# Open File
in_file = open(txt)
## Search doc for a list of keywords and record the positions in the file
# Define keywords
word = ['Today', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday', 'Sunday']
# Initialize itr var to use to track file position
i = 0
l_pos = -99
# Initialize arrays to use to store keyword and file poistion data
pos_array = []
word_array = []
line1_array = []
line2_array = []
line3_array = []
# Loop through doc, search for list of words, and record the line and line #
for line in in_file:
for w in word:
if w in line:
pos_array.append(i)
word_array.append(w)
line1_array.append(line)
l_pos = i
# Need to pull the extra lines to get the swell height and conditions
if i == (l_pos + 3):
line2_array.append(line)
if i == (l_pos + 4):
line3_array.append(line)
i += 1
#Seems to be pulling in an extra line from time to time
# - Remove last item from all arrays if len over 7
if len(word_array) > 7:
pos_array.pop()
word_array.pop()
line1_array.pop()
line2_array.pop()
line3_array.pop()
# Grab the condition information from the respective lines
am_cond = []
am_swell = []
pm_cond = []
pm_swell = []
# Pull AM condition information from line1_array
for line in line1_array:
lhs, rhs = line.split("AccordionPanelTab ")
lhs, rhs = rhs.split("_")
am_cond.append(lhs)
# Pull AM swell height information from line2_array
for line in line2_array:
lhs, rhs = line.split(">", 1)
lhs, rhs = rhs.split("<")
am_swell.append(lhs)
# Pull PM conditions and PM Swell from line3_array
for line in line3_array:
#Grab Swell Height
lhs, rhs = line.split(">", 1)
lhs, rhs = rhs.split("<")
pm_swell.append(lhs)
#Grab Conditions
lhs, mid, rhs = line.split('"')
pm_cond.append(mid)
# Convert Condition values to readable values
def conv_cond(cond):
ret_array = []
for val in cond:
if val == 'cond1':
ret_array.append("Good")
elif val == 'cond2':
ret_array.append("Fair")
elif val == 'cond3':
ret_array.append("Poor")
else:
ret_array.append("!Error!")
return(ret_array)
am_cond = conv_cond(am_cond)
pm_cond = conv_cond(pm_cond)
#Pull out the Beach Name from the txt file name
beach, lhs = txt.split('.')
# Print Swell Table
print "\nSwell Report for %s" % beach
print "----------------------------------------------"
print "Day \t\t AM \t\t PM"
print "----------------------------------------------"
for j in range(len(word_array)):
if j == 0:
print "%s: \t\t %s %s \t %s %s" % (word_array[j], am_swell[j], am_cond[j],
pm_swell[j], pm_cond[j])
else:
print "%s: \t %s %s \t %s %s" % (word_array[j], am_swell[j], am_cond[j],
pm_swell[j], pm_cond[j])
print "\n"
in_file.close()
# Get the forecast
#Create list of spots we want to check
address = []
address.append('http://www.swellinfo.com/surf-forecast/ocean-beach-california-nw')
address.append('http://www.swellinfo.com/surf-forecast/ocean-beach-california')
address.append('http://www.swellinfo.com/surf-forecast/half-moon-bay-california')
address.append('http://www.swellinfo.com/surf-forecast/pescadero-california')
address.append('http://www.swellinfo.com/surf-forecast/davenport-california')
address.append('http://www.swellinfo.com/surf-forecast/santa-cruz-california')
# Create a list of text files to store the html in
txt = []
txt.append('Ocean Beach North.txt')
txt.append('Ocean Beach South.txt')
txt.append('Half Moon Bay.txt')
txt.append('Pescadero.txt')
txt.append('Davenport.txt')
txt.append('Santa Cruz.txt')
for i in range(len(address)):
# Grab html and convert to text
html_to_text(address[i], txt[i])
# Set up the arrays to loop through the text files
parse_txt(txt[i])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment