nvbn/classify_image.py

## classify_image.py
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Simple image classification with Inception.

Run image classification with Inception trained on ImageNet 2012 Challenge data
set.

This program creates a graph from a saved GraphDef protocol buffer,
and runs inference on an input JPEG image. It outputs human readable
strings of the top 5 predictions along with their probabilities.

Change the --image_file argument to any jpg image to compute a
classification of that image.

Please see the tutorial and website for a detailed description of how
to use this script to perform image recognition.

https://tensorflow.org/tutorials/image_recognition/
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import os.path
import re
import sys
import tarfile

import numpy as np
from six.moves import urllib
import tensorflow as tf


# pylint: disable=line-too-long
DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'


# pylint: enable=line-too-long

model_dir = '/tmp/imagenet'


class NodeLookup(object):
    """Converts integer node ID's to human readable labels."""

    def __init__(self,
                 label_lookup_path=None,
                 uid_lookup_path=None):
        if not label_lookup_path:
            label_lookup_path = os.path.join(
                model_dir, 'imagenet_2012_challenge_label_map_proto.pbtxt')
        if not uid_lookup_path:
            uid_lookup_path = os.path.join(
                model_dir, 'imagenet_synset_to_human_label_map.txt')
        self.node_lookup = self.load(label_lookup_path, uid_lookup_path)

    def load(self, label_lookup_path, uid_lookup_path):
        """Loads a human readable English name for each softmax node.

        Args:
          label_lookup_path: string UID to integer node ID.
          uid_lookup_path: string UID to human-readable string.

        Returns:
          dict from integer node ID to human-readable string.
        """
        if not tf.gfile.Exists(uid_lookup_path):
            tf.logging.fatal('File does not exist %s', uid_lookup_path)
        if not tf.gfile.Exists(label_lookup_path):
            tf.logging.fatal('File does not exist %s', label_lookup_path)

        # Loads mapping from string UID to human-readable string
        proto_as_ascii_lines = tf.gfile.GFile(uid_lookup_path).readlines()
        uid_to_human = {}
        p = re.compile(r'[n\d]*[ \S,]*')
        for line in proto_as_ascii_lines:
            parsed_items = p.findall(line)
            uid = parsed_items[0]
            human_string = parsed_items[2]
            uid_to_human[uid] = human_string

        # Loads mapping from string UID to integer node ID.
        node_id_to_uid = {}
        proto_as_ascii = tf.gfile.GFile(label_lookup_path).readlines()
        for line in proto_as_ascii:
            if line.startswith('  target_class:'):
                target_class = int(line.split(': ')[1])
            if line.startswith('  target_class_string:'):
                target_class_string = line.split(': ')[1]
                node_id_to_uid[target_class] = target_class_string[1:-2]

        # Loads the final mapping of integer node ID to human-readable string
        node_id_to_name = {}
        for key, val in node_id_to_uid.items():
            if val not in uid_to_human:
                tf.logging.fatal('Failed to locate: %s', val)
            name = uid_to_human[val]
            node_id_to_name[key] = name

        return node_id_to_name

    def id_to_string(self, node_id):
        if node_id not in self.node_lookup:
            return ''
        return self.node_lookup[node_id]


def create_graph():
    """Creates a graph from saved GraphDef file and returns a saver."""
    # Creates graph from saved graph_def.pb.
    with tf.gfile.FastGFile(os.path.join(
            model_dir, 'classify_image_graph_def.pb'), 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        _ = tf.import_graph_def(graph_def, name='')


def run_inference_on_image(image, num_top_predictions):
    """Runs inference on an image.

    Args:
      image: Image file name.

    Returns:
      Nothing
    """
    if not tf.gfile.Exists(image):
        tf.logging.fatal('File does not exist %s', image)
    image_data = tf.gfile.FastGFile(image, 'rb').read()

    result = []
    with tf.Session() as sess:
        # Some useful tensors:
        # 'softmax:0': A tensor containing the normalized prediction across
        #   1000 labels.
        # 'pool_3:0': A tensor containing the next-to-last layer containing 2048
        #   float description of the image.
        # 'DecodeJpeg/contents:0': A tensor containing a string providing JPEG
        #   encoding of the image.
        # Runs the softmax tensor by feeding the image_data as input to the graph.
        softmax_tensor = sess.graph.get_tensor_by_name('softmax:0')
        predictions = sess.run(softmax_tensor,
                               {'DecodeJpeg/contents:0': image_data})
        predictions = np.squeeze(predictions)

        # Creates node ID --> English string lookup.
        node_lookup = NodeLookup()

        top_k = predictions.argsort()[-num_top_predictions:][::-1]
        for node_id in top_k:
            human_string = node_lookup.id_to_string(node_id)
            score = predictions[node_id]
            result.append((human_string, score))
            print('%s (score = %.5f)' % (human_string, score))
    return result


def maybe_download_and_extract(dest_directory):
    """Download and extract model tar file."""
    if not os.path.exists(dest_directory):
        os.makedirs(dest_directory)
    filename = DATA_URL.split('/')[-1]
    filepath = os.path.join(dest_directory, filename)
    if not os.path.exists(filepath):
        def _progress(count, block_size, total_size):
            sys.stdout.write('\r>> Downloading %s %.1f%%' % (
                filename, float(count * block_size) / float(total_size) * 100.0))
            sys.stdout.flush()

        filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
        print()
        statinfo = os.stat(filepath)
        print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
    tarfile.open(filepath, 'r:gz').extractall(dest_directory)


def init():
    # Creates graph from saved GraphDef.
    create_graph()
    maybe_download_and_extract(model_dir)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    # classify_image_graph_def.pb:
    #   Binary representation of the GraphDef protocol buffer.
    # imagenet_synset_to_human_label_map.txt:
    #   Map from synset ID to a human readable string.
    # imagenet_2012_challenge_label_map_proto.pbtxt:
    #   Text representation of a protocol buffer mapping a label to synset ID.
    parser.add_argument(
        '--model_dir',
        type=str,
        default='/tmp/imagenet',
        help="""\
      Path to classify_image_graph_def.pb,
      imagenet_synset_to_human_label_map.txt, and
      imagenet_2012_challenge_label_map_proto.pbtxt.\
      """
    )
    parser.add_argument(
        '--image_file',
        type=str,
        default='',
        help='Absolute path to image file.'
    )
    parser.add_argument(
        '--num_top_predictions',
        type=int,
        default=5,
        help='Display this many predictions.'
    )
    FLAGS, unparsed = parser.parse_known_args()
    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

## playground.py
from datetime import datetime, timedelta
import glob
import json
import re
import matplotlib.pyplot as plt
import matplotlib.ticker as tkr
import tweepy
import PIL.Image
import PIL.ExifTags
import pandas as pd
from . import classify_image


pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', None)

TWITTER_CONSUMER_KEY = ''
TWITTER_CONSUMER_SECRET = ''
TWITTER_ACCESS_TOKEN = ''
TWITTER_ACCESS_TOKEN_SECRET = ''
USER_ID = '21653573'
MARKER = '✈'


def get_tweets():
    auth = tweepy.OAuthHandler(TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET)
    auth.set_access_token(TWITTER_ACCESS_TOKEN, TWITTER_ACCESS_TOKEN_SECRET)
    api = tweepy.API(auth)
    cursor = tweepy.Cursor(api.user_timeline,
                           user_id=USER_ID,
                           exclude_replies='false',
                           include_rts='false',
                           count=200)
    return cursor.items()


# Get tweets about flights
all_tweets = pd.DataFrame(
    [(tweet.text, tweet.created_at) for tweet in get_tweets()],
    columns=['text', 'created_at'])
tweets_in_dates = all_tweets[
    (all_tweets.created_at > datetime(2018, 9, 8)) & (all_tweets.created_at < datetime(2018, 9, 30))]
flights_tweets = tweets_in_dates[tweets_in_dates.text.str.upper() == tweets_in_dates.text]

flights_tweets = flights_tweets.assign(start=lambda df: df.text.str.split(MARKER).str[0])
flights_tweets = flights_tweets.assign(finish=lambda df: df.text.str.split(MARKER).str[-1])

flights = flights_tweets[['start', 'finish', 'created_at']]
flights = flights.sort_values('created_at')


def get_iata_to_city():
    with open('airports.json') as f:
        data = json.load(f)
        return {airport['iata']: airport['city']
                for airport in data.values()
                if airport['iata']}

iata_to_city = get_iata_to_city()
iata_to_city['EZE'] = 'Buenos-Aires'

flights = flights.assign(
    start=flights.start.apply(lambda code: iata_to_city[re.sub(r'\W+', '', code)]),
    finish=flights.finish.apply(lambda code: iata_to_city[re.sub(r'\W+', '', code)]))

cities = flights.assign(
    spent=flights.created_at - flights.created_at.shift(1),
    city=flights.start,
    arrived=flights.created_at.shift(1),
)[["city", "spent", "arrived"]]
cities = cities.assign(left=cities.arrived + cities.spent)[cities.spent.dt.days > 0]


formatter = tkr.FuncFormatter(lambda x, _: str(timedelta(seconds=x / 1000000000)))

cities.plot(x="city", y="spent", kind="bar",
            legend=False, title='Cities') \
      .yaxis.set_major_formatter(formatter)
plt.tight_layout()


def read_photos():
    for name in glob.glob('photos/*.jpg'):
        img = PIL.Image.open(name)
        exif = {
            PIL.ExifTags.TAGS[k]: v
            for k, v in img._getexif().items()
            if k in PIL.ExifTags.TAGS
        }
        yield name, datetime.strptime(exif['DateTime'], '%Y:%m:%d %H:%M:%S')


raw_photos = pd.DataFrame(list(read_photos()), columns=['name', 'created_at'])

photos_cities = raw_photos.assign(key=0).merge(cities.assign(key=0), how='outer')
photos = photos_cities[
    (photos_cities.created_at >= photos_cities.arrived)
    & (photos_cities.created_at <= photos_cities.left)
]

photos_by_city = photos \
    .groupby(by='city') \
    .agg({'name': 'count'}) \
    .rename(columns={'name': 'photos'}) \
    .reset_index()

photos_by_city.plot(x='city', y='photos', kind="bar",
                    title='Photos by city', legend=False)
plt.tight_layout()


classify_image.init()
tags = photos.name\
    .apply(lambda name: classify_image.run_inference_on_image(name, 1)[0]) \
    .apply(pd.Series)
tags.columns = ['tag', 'score']

tagged_photos = photos.copy()
tagged_photos[['tag', 'score']] = tags.apply(pd.Series)
tagged_photos['tag'] = tagged_photos.tag.apply(lambda tag: tag.split(', ')[0])

photos_by_tag = tagged_photos[['tag', 'name']] \
    .groupby(by='tag') \
    .agg({'name': 'count'}) \
    .rename(columns={'name': 'photos'}) \
    .reset_index() \
    .sort_values('photos', ascending=False) \
    .head(10)

photos_by_tag.plot(x='tag', y='photos', kind='bar',
                   legend=False, title='Popular tags'); plt.tight_layout()


popular_tags = photos_by_tag.head(5).tag
popular_tagged = tagged_photos[tagged_photos.tag.isin(popular_tags)]
not_popular_tagged = tagged_photos[~tagged_photos.tag.isin(popular_tags)].assign(
    tag='other')
by_tag_city = popular_tagged \
    .append(not_popular_tagged) \
    .groupby(by=['city', 'tag']) \
    .count()['name'] \
    .unstack(fill_value=0)


by_tag_city.plot(kind='bar', stacked=True)
plt.tight_layout()
	# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Simple image classification with Inception.

	Run image classification with Inception trained on ImageNet 2012 Challenge data
	set.

	This program creates a graph from a saved GraphDef protocol buffer,
	and runs inference on an input JPEG image. It outputs human readable
	strings of the top 5 predictions along with their probabilities.

	Change the --image_file argument to any jpg image to compute a
	classification of that image.

	Please see the tutorial and website for a detailed description of how
	to use this script to perform image recognition.

	https://tensorflow.org/tutorials/image_recognition/
	"""

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import argparse
	import os.path
	import re
	import sys
	import tarfile

	import numpy as np
	from six.moves import urllib
	import tensorflow as tf


	# pylint: disable=line-too-long
	DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'


	# pylint: enable=line-too-long

	model_dir = '/tmp/imagenet'


	class NodeLookup(object):
	"""Converts integer node ID's to human readable labels."""

	def __init__(self,
	label_lookup_path=None,
	uid_lookup_path=None):
	if not label_lookup_path:
	label_lookup_path = os.path.join(
	model_dir, 'imagenet_2012_challenge_label_map_proto.pbtxt')
	if not uid_lookup_path:
	uid_lookup_path = os.path.join(
	model_dir, 'imagenet_synset_to_human_label_map.txt')
	self.node_lookup = self.load(label_lookup_path, uid_lookup_path)

	def load(self, label_lookup_path, uid_lookup_path):
	"""Loads a human readable English name for each softmax node.

	Args:
	label_lookup_path: string UID to integer node ID.
	uid_lookup_path: string UID to human-readable string.

	Returns:
	dict from integer node ID to human-readable string.
	"""
	if not tf.gfile.Exists(uid_lookup_path):
	tf.logging.fatal('File does not exist %s', uid_lookup_path)
	if not tf.gfile.Exists(label_lookup_path):
	tf.logging.fatal('File does not exist %s', label_lookup_path)

	# Loads mapping from string UID to human-readable string
	proto_as_ascii_lines = tf.gfile.GFile(uid_lookup_path).readlines()
	uid_to_human = {}
	p = re.compile(r'[n\d][ \S,]')
	for line in proto_as_ascii_lines:
	parsed_items = p.findall(line)
	uid = parsed_items[0]
	human_string = parsed_items[2]
	uid_to_human[uid] = human_string

	# Loads mapping from string UID to integer node ID.
	node_id_to_uid = {}
	proto_as_ascii = tf.gfile.GFile(label_lookup_path).readlines()
	for line in proto_as_ascii:
	if line.startswith(' target_class:'):
	target_class = int(line.split(': ')[1])
	if line.startswith(' target_class_string:'):
	target_class_string = line.split(': ')[1]
	node_id_to_uid[target_class] = target_class_string[1:-2]

	# Loads the final mapping of integer node ID to human-readable string
	node_id_to_name = {}
	for key, val in node_id_to_uid.items():
	if val not in uid_to_human:
	tf.logging.fatal('Failed to locate: %s', val)
	name = uid_to_human[val]
	node_id_to_name[key] = name

	return node_id_to_name

	def id_to_string(self, node_id):
	if node_id not in self.node_lookup:
	return ''
	return self.node_lookup[node_id]


	def create_graph():
	"""Creates a graph from saved GraphDef file and returns a saver."""
	# Creates graph from saved graph_def.pb.
	with tf.gfile.FastGFile(os.path.join(
	model_dir, 'classify_image_graph_def.pb'), 'rb') as f:
	graph_def = tf.GraphDef()
	graph_def.ParseFromString(f.read())
	_ = tf.import_graph_def(graph_def, name='')


	def run_inference_on_image(image, num_top_predictions):
	"""Runs inference on an image.

	Args:
	image: Image file name.

	Returns:
	Nothing
	"""
	if not tf.gfile.Exists(image):
	tf.logging.fatal('File does not exist %s', image)
	image_data = tf.gfile.FastGFile(image, 'rb').read()

	result = []
	with tf.Session() as sess:
	# Some useful tensors:
	# 'softmax:0': A tensor containing the normalized prediction across
	# 1000 labels.
	# 'pool_3:0': A tensor containing the next-to-last layer containing 2048
	# float description of the image.
	# 'DecodeJpeg/contents:0': A tensor containing a string providing JPEG
	# encoding of the image.
	# Runs the softmax tensor by feeding the image_data as input to the graph.
	softmax_tensor = sess.graph.get_tensor_by_name('softmax:0')
	predictions = sess.run(softmax_tensor,
	{'DecodeJpeg/contents:0': image_data})
	predictions = np.squeeze(predictions)

	# Creates node ID --> English string lookup.
	node_lookup = NodeLookup()

	top_k = predictions.argsort()[-num_top_predictions:][::-1]
	for node_id in top_k:
	human_string = node_lookup.id_to_string(node_id)
	score = predictions[node_id]
	result.append((human_string, score))
	print('%s (score = %.5f)' % (human_string, score))
	return result


	def maybe_download_and_extract(dest_directory):
	"""Download and extract model tar file."""
	if not os.path.exists(dest_directory):
	os.makedirs(dest_directory)
	filename = DATA_URL.split('/')[-1]
	filepath = os.path.join(dest_directory, filename)
	if not os.path.exists(filepath):
	def _progress(count, block_size, total_size):
	sys.stdout.write('\r>> Downloading %s %.1f%%' % (
	filename, float(count * block_size) / float(total_size) * 100.0))
	sys.stdout.flush()

	filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
	print()
	statinfo = os.stat(filepath)
	print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
	tarfile.open(filepath, 'r:gz').extractall(dest_directory)



	def init():
	# Creates graph from saved GraphDef.
	create_graph()
	maybe_download_and_extract(model_dir)


	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	# classify_image_graph_def.pb:
	# Binary representation of the GraphDef protocol buffer.
	# imagenet_synset_to_human_label_map.txt:
	# Map from synset ID to a human readable string.
	# imagenet_2012_challenge_label_map_proto.pbtxt:
	# Text representation of a protocol buffer mapping a label to synset ID.
	parser.add_argument(
	'--model_dir',
	type=str,
	default='/tmp/imagenet',
	help="""\
	Path to classify_image_graph_def.pb,
	imagenet_synset_to_human_label_map.txt, and
	imagenet_2012_challenge_label_map_proto.pbtxt.\
	"""
	)
	parser.add_argument(
	'--image_file',
	type=str,
	default='',
	help='Absolute path to image file.'
	)
	parser.add_argument(
	'--num_top_predictions',
	type=int,
	default=5,
	help='Display this many predictions.'
	)
	FLAGS, unparsed = parser.parse_known_args()
	tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
	from datetime import datetime, timedelta
	import glob
	import json
	import re
	import matplotlib.pyplot as plt
	import matplotlib.ticker as tkr
	import tweepy
	import PIL.Image
	import PIL.ExifTags
	import pandas as pd
	from . import classify_image


	pd.set_option('display.max_colwidth', -1)
	pd.set_option('display.max_columns', None)

	TWITTER_CONSUMER_KEY = ''
	TWITTER_CONSUMER_SECRET = ''
	TWITTER_ACCESS_TOKEN = ''
	TWITTER_ACCESS_TOKEN_SECRET = ''
	USER_ID = '21653573'
	MARKER = '✈'


	def get_tweets():
	auth = tweepy.OAuthHandler(TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET)
	auth.set_access_token(TWITTER_ACCESS_TOKEN, TWITTER_ACCESS_TOKEN_SECRET)
	api = tweepy.API(auth)
	cursor = tweepy.Cursor(api.user_timeline,
	user_id=USER_ID,
	exclude_replies='false',
	include_rts='false',
	count=200)
	return cursor.items()


	# Get tweets about flights
	all_tweets = pd.DataFrame(
	[(tweet.text, tweet.created_at) for tweet in get_tweets()],
	columns=['text', 'created_at'])
	tweets_in_dates = all_tweets[
	(all_tweets.created_at > datetime(2018, 9, 8)) & (all_tweets.created_at < datetime(2018, 9, 30))]
	flights_tweets = tweets_in_dates[tweets_in_dates.text.str.upper() == tweets_in_dates.text]

	flights_tweets = flights_tweets.assign(start=lambda df: df.text.str.split(MARKER).str[0])
	flights_tweets = flights_tweets.assign(finish=lambda df: df.text.str.split(MARKER).str[-1])

	flights = flights_tweets[['start', 'finish', 'created_at']]
	flights = flights.sort_values('created_at')


	def get_iata_to_city():
	with open('airports.json') as f:
	data = json.load(f)
	return {airport['iata']: airport['city']
	for airport in data.values()
	if airport['iata']}

	iata_to_city = get_iata_to_city()
	iata_to_city['EZE'] = 'Buenos-Aires'

	flights = flights.assign(
	start=flights.start.apply(lambda code: iata_to_city[re.sub(r'\W+', '', code)]),
	finish=flights.finish.apply(lambda code: iata_to_city[re.sub(r'\W+', '', code)]))

	cities = flights.assign(
	spent=flights.created_at - flights.created_at.shift(1),
	city=flights.start,
	arrived=flights.created_at.shift(1),
	)[["city", "spent", "arrived"]]
	cities = cities.assign(left=cities.arrived + cities.spent)[cities.spent.dt.days > 0]


	formatter = tkr.FuncFormatter(lambda x, _: str(timedelta(seconds=x / 1000000000)))

	cities.plot(x="city", y="spent", kind="bar",
	legend=False, title='Cities') \
	.yaxis.set_major_formatter(formatter)
	plt.tight_layout()



	def read_photos():
	for name in glob.glob('photos/*.jpg'):
	img = PIL.Image.open(name)
	exif = {
	PIL.ExifTags.TAGS[k]: v
	for k, v in img._getexif().items()
	if k in PIL.ExifTags.TAGS
	}
	yield name, datetime.strptime(exif['DateTime'], '%Y:%m:%d %H:%M:%S')


	raw_photos = pd.DataFrame(list(read_photos()), columns=['name', 'created_at'])

	photos_cities = raw_photos.assign(key=0).merge(cities.assign(key=0), how='outer')
	photos = photos_cities[
	(photos_cities.created_at >= photos_cities.arrived)
	& (photos_cities.created_at <= photos_cities.left)
	]

	photos_by_city = photos \
	.groupby(by='city') \
	.agg({'name': 'count'}) \
	.rename(columns={'name': 'photos'}) \
	.reset_index()

	photos_by_city.plot(x='city', y='photos', kind="bar",
	title='Photos by city', legend=False)
	plt.tight_layout()


	classify_image.init()
	tags = photos.name\
	.apply(lambda name: classify_image.run_inference_on_image(name, 1)[0]) \
	.apply(pd.Series)
	tags.columns = ['tag', 'score']

	tagged_photos = photos.copy()
	tagged_photos[['tag', 'score']] = tags.apply(pd.Series)
	tagged_photos['tag'] = tagged_photos.tag.apply(lambda tag: tag.split(', ')[0])

	photos_by_tag = tagged_photos[['tag', 'name']] \
	.groupby(by='tag') \
	.agg({'name': 'count'}) \
	.rename(columns={'name': 'photos'}) \
	.reset_index() \
	.sort_values('photos', ascending=False) \
	.head(10)

	photos_by_tag.plot(x='tag', y='photos', kind='bar',
	legend=False, title='Popular tags'); plt.tight_layout()


	popular_tags = photos_by_tag.head(5).tag
	popular_tagged = tagged_photos[tagged_photos.tag.isin(popular_tags)]
	not_popular_tagged = tagged_photos[~tagged_photos.tag.isin(popular_tags)].assign(
	tag='other')
	by_tag_city = popular_tagged \
	.append(not_popular_tagged) \
	.groupby(by=['city', 'tag']) \
	.count()['name'] \
	.unstack(fill_value=0)


	by_tag_city.plot(kind='bar', stacked=True)
	plt.tight_layout()