tkan/mundraeuber.py

## mundraeuber.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import json
from geojson import Feature, Point, FeatureCollection
from lxml import etree, html
from urllib2 import urlopen
import re
from tqdm import *

# open file
with open('plant.json') as data_file:
    data = json.load(data_file)

# declare some variables
i = 0
geo_json_list = []

# iterate over all elements of JSON
while i < len(data['features']):
    # loop for the tqdm progress bar
    for i in tqdm(range(len(data['features'])), desc = 'Getting data '):

        y = data['features'][i]['pos'][0]
        x = data['features'][i]['pos'][1]
        my_point = Point((float(x), float(y)))
        nid = data['features'][i]['properties']['nid']

        # get the description from the website
        url = "https://www.mundraub.org/node/" + nid
        soup = html.fromstring(urlopen(url).read().decode('utf-8'))
        croutons = soup.find_class("processed_text")

        for item in croutons:
            description = html.tostring(item,encoding='unicode', method='text')

        my_feature = Feature(geometry=my_point, properties={'nid': nid, 'description': description})
        geo_json_list.append(my_feature)

        i += 1

# write file
with open('plant.geojson', 'rb+') as f:
    json.dump(FeatureCollection(geo_json_list), f)
	#!/usr/bin/env python
	# -- coding: utf-8 --

	import json
	from geojson import Feature, Point, FeatureCollection
	from lxml import etree, html
	from urllib2 import urlopen
	import re
	from tqdm import *

	# open file
	with open('plant.json') as data_file:
	data = json.load(data_file)

	# declare some variables
	i = 0
	geo_json_list = []

	# iterate over all elements of JSON
	while i < len(data['features']):
	# loop for the tqdm progress bar
	for i in tqdm(range(len(data['features'])), desc = 'Getting data '):

	y = data['features'][i]['pos'][0]
	x = data['features'][i]['pos'][1]
	my_point = Point((float(x), float(y)))
	nid = data['features'][i]['properties']['nid']

	# get the description from the website
	url = "https://www.mundraub.org/node/" + nid
	soup = html.fromstring(urlopen(url).read().decode('utf-8'))
	croutons = soup.find_class("processed_text")

	for item in croutons:
	description = html.tostring(item,encoding='unicode', method='text')

	my_feature = Feature(geometry=my_point, properties={'nid': nid, 'description': description})
	geo_json_list.append(my_feature)

	i += 1

	# write file
	with open('plant.geojson', 'rb+') as f:
	json.dump(FeatureCollection(geo_json_list), f)