Skip to content

Instantly share code, notes, and snippets.

@tyteen4a03
Created September 5, 2014 20:16
Show Gist options
  • Save tyteen4a03/d8b466dcfba54ae1cddb to your computer and use it in GitHub Desktop.
Save tyteen4a03/d8b466dcfba54ae1cddb to your computer and use it in GitHub Desktop.
import MySQLdb
import csv
import time
import traceback
import warnings
warnings.filterwarnings('ignore', category=MySQLdb.Warning)
def handle_yes_no(input):
if input == "Yes":
return True
elif input == "No":
return False
else:
return None
def handle_bitfields(inputs):
num = 0
i = 0
for ivalue in inputs:
if ivalue == "Yes":
num += 2 ** i
i += 1
return num
def handle_four_seasons(theStr):
num = 0
if theStr == "Year Round":
num = 15
else:
if "Spring" in theStr:
num += 1
if "Summer" in theStr:
num += 2
if "Fall" in theStr:
num += 4
if "Winter" in theStr:
num += 8
return num
db = MySQLdb.connect(user="root", passwd="", db="test")
c = db.cursor()
families = []
finalvalues = []
t = time.time()
reader = csv.reader(open('data.csv', 'rb'))
iterreader = iter(reader)
# Skip first row
next(iterreader)
print "Processing CSV file..."
for row in iterreader:
finalrow = []
# Names
finalrow.extend(row[0:3])
# Family
if row[3] not in families:
families.append(row[3])
finalrow.append(len(families))
else:
# Add one because A_I starts from 1
finalrow.append(families.index(row[3]) + 1)
# links
finalrow.extend(row[4:6])
# active growth period
finalrow.append(handle_four_seasons(row[6]))
# after harvest regrowth rate, bloat, CN Ratio
finalrow.extend(row[7:10])
# coppice potential
finalrow.append(handle_yes_no(row[10]))
# foilage colour, Foliage Porosity Summer/Winter, Foliage Texture, Fruit Color
finalrow.extend(row[11:16])
# Fruit/Seed Conspicuous
finalrow.append(handle_yes_no(row[16]))
# Growth form/rate, height (base/mature)
finalrow.extend(row[17:21])
# known alleopath, leaf retention
finalrow.append(handle_yes_no(row[21]))
finalrow.append(handle_yes_no(row[22]))
# lifespan
finalrow.append(row[23])
# low growing grass
finalrow.append(handle_yes_no(row[24]))
# nitrogen fixation
finalrow.append(row[25])
# can resprout
finalrow.append(handle_yes_no(row[26]))
# shap & orientation, toxicity
finalrow.extend(row[27:29])
# soil adaption; 1 = coarse, 2 = medium, 4 = fine
finalrow.append(handle_bitfields([row[29], row[30], row[31]]))
# anaerobic/caco3 tolerance
finalrow.extend(row[32:34])
# Cold Stratification Required
finalrow.append(handle_yes_no(row[34]))
# Drought Tolerance, Drought Tolerance Fertility Requirement, Fire Tolerance, Frost Free Days Minimum, Hedge Tolerance, Moisture Use, pH (Minimum), pH (Maximum), Planting Density per Acre Minimum, Planting Density per Acre Maximum, Precipitation (Minimum), Precipitation (Maximum), Root Depth Minimum (inches), Salinity Tolerance, Shade Tolerance, Temperature Minimum (F), Bloom Period, Commercial Availability, Fruit/Seed Abundance, fruit/seed season begin, fruit/seed season end
finalrow.extend(row[35:56])
# fruit/seed persistence
finalrow.append(handle_yes_no(row[56]))
# Propogation method; 1 = bare root, 2 = bulbs, 4 = container, 8 = corms, 16 = cuttings, 32 = seed, 64 = Sod, 128 = sprigs, 256 = Tubers
finalrow.append(handle_bitfields([i for i in row[57:66]]))
# Seeds per Pound, Seed Spread Rate, Seedling Vigor
finalrow.extend(row[66:69])
# small grain
finalrow.append(handle_yes_no(row[69]))
# Vegetative Spread Rate
finalrow.append(row[70])
finalvalues.append(finalrow)
print "CSV Processing complete. Inserting into database...."
try:
r = c.execute("INSERT INTO families (name) VALUES " + ", ".join(["('{}')".format(i) for i in families]))
print "Inserted {} family entries.".format(r)
print "Inserting plant entries..."
i = 0
for r in finalvalues:
try:
c.execute("""
INSERT INTO plants
(symbol, scientific_name, common_name, family_id, fact_sheets, plant_guides, active_growth_period, after_harvest_regrowth_rate,
bloat, c_n_ratio, has_coppice_potential, foilage_colour, foliage_porosity_summer, foilage_porosity_winter, foilage_texture,
fruit_seed_colour, fruit_seed_conspicuous, growth_form, growth_rate, height_base_age, height_mature, has_known_allelopath,
leaf_retention, lifespan, is_low_growing_grass, nitrogen_fixation, can_resprout, shape_orientation, toxicity, soil_texture_adaption,
anaerobic_tolerance, caco3_tolerance, require_cold_tratification, drought_tolerance, fertility_requirement, fire_tolerance,
min_frost_free_days, hedge_tolerance, moisture_use, ph_min, ph_max, min_planting_density, max_planting_density, min_precipitation,
max_precipitation, min_root_depth, salinity_tolerance, shade_tolerance, min_temp, bloom_period, commercial_availability,
fruit_seed_abundance, fruit_seed_period_begin, fruit_seed_period_end, fruit_seed_persistence, propogation_method, seeds_per_pound,
seed_spread_rate, seedling_vigor, is_small_grain, vegetative_spread_rate)
VALUES
(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
""", r)
i += 1
except Exception as e:
print traceback.format_exc()
print "Error on row {}, content is {}".format(i, r)
db.rollback()
break
db.commit()
print "Complete! {} rows inserted, took {} seconds.".format(i, time.time() - t)
except MySQLdb.OperationalError as e:
import traceback
print "MySQL server error'd"
print traceback.format_exc()
db.rollback()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment