Skip to content

Instantly share code, notes, and snippets.

@tuxdna
Created May 9, 2016 10:23
Show Gist options
  • Save tuxdna/872beea8db5600a23360dac3caa207e3 to your computer and use it in GitHub Desktop.
Save tuxdna/872beea8db5600a23360dac3caa207e3 to your computer and use it in GitHub Desktop.
Finding Top 20 Tags on StackOverflow main site
import json
import pandas as pd
import numpy as np
import xmltodict
import matplotlib.pyplot as plt
f = open("Tags.xml")
all_data = f.read()
o = xmltodict.parse(all_data)
df = pd.DataFrame.from_dict(o['tags']['row'])
df[['counts']] = df[['@Count']].astype(int)
df2 = df.sort_values(by=['counts'], ascending=False).head(20)[['counts', '@TagName']]
"""
In [53]: df2
counts @TagName
2 1067078 javascript
11 1025688 java
6 918586 c#
4 885422 php
703 800779 android
422 712360 jquery
10 542985 python
1 511091 html
7 431790 c++
19333 414394 ios
14 380535 mysql
3 372444 css
15 319001 sql
56 282582 asp.net
3199 253474 objective-c
2327 235532 ruby-on-rails
0 227675 .net
86 210953 iphone
5 210835 c
64 170559 arrays
"""
counts = df2['counts'].as_matrix()
x = np.array(range(len(counts)))
labels = df2['@TagName'].values
fig = plt.figure()
fig.set_size_inches(15, 10.5)
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
ax.set_xlabel("Tags")
ax.set_ylabel("Counts")
ax.bar(x, counts, align='center')
ax.set_xticks(x)
ax.set_xticklabels(labels)
fig.show()
fig.savefig('plot.png', format='png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment