public
Last active

Diagnosing Memory Leaks in Matplotlib

  • Download Gist
test_oo.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
# Object-oriented API
#
# Memory usage (iteration, object count, memory size)
# 100 5637 1562216
# 200 5529 1491528
# 300 5422 1426264
# 400 5758 1587376
# 500 5422 1426288
# 600 5416 1440456
# 700 5610 1515056
# 800 5422 1426032
# 900 5530 1493264
#
# 241.35 real 239.52 user 1.38 sys
 
import matplotlib as mpl
mpl.use('Agg')
 
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
 
import numpy as np
 
import guppy
 
heapy = guppy.hpy()
 
mem = open('memory_oo.txt', 'wb')
 
heapy.setref()
 
for i in range(1000):
 
if i % 100 == 0:
h = heapy.heap()
mem.write('%i %s %s\n' % (i, h.count, h.size))
 
fig = Figure()
canvas = FigureCanvas(fig)
ax = fig.add_subplot(1, 1, 1)
ax.scatter(np.random.random(10), np.random.random(10))
canvas.print_figure('test.png')
 
mem.close()
test_pyplot.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
 
# Original
#
# Memory usage (iteration, object count, memory size)
# 100 431941 134768552
# 200 862736 269479552
# 300 1295682 405239168
# 400 1726603 539967136
# 500 2157948 674784872
# 600 2589067 809598080
# 700 3020848 944697920
# 800 3451516 1079398704
# 900 3884074 1214923736
#
# 2542.85 real 2535.49 user 6.66 sys
 
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
 
import numpy as np
 
import guppy
 
heapy = guppy.hpy()
 
mem = open('memory_pyplot.txt', 'wb')
 
heapy.setref()
 
for i in range(1000):
 
if i % 100 == 0:
h = heapy.heap()
mem.write('%i %s %s\n' % (i, h.count, h.size))
 
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(np.random.random(10), np.random.random(10))
fig.savefig('test.png')
 
mem.flush()
 
mem.close()
test_pyplot_clf.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
# Added fig.clf()
#
# Memory usage (iteration, object count, memory size)
# 100 24523 7963408
# 200 43692 14487880
# 300 63681 21294464
# 400 82909 27825696
# 500 102409 34451408
# 600 121637 40974992
# 700 141545 47740552
# 800 160909 54312208
# 900 180409 60949200
#
# 454.47 real 450.69 user 3.20 sys
 
 
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
 
import numpy as np
 
import guppy
 
heapy = guppy.hpy()
 
mem = open('memory_pyplot_clf.txt', 'wb')
 
heapy.setref()
 
for i in range(1000):
 
if i % 100 == 0:
h = heapy.heap()
mem.write('%i %s %s\n' % (i, h.count, h.size))
 
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(np.random.random(10), np.random.random(10))
fig.savefig('test.png')
 
fig.clf()
 
mem.close()
test_pyplot_close.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
# Added plt.close()
#
# Memory usage (iteration, object count, memory size)
# 100 5461 1464216
# 200 5683 1569464
# 300 5610 1527120
# 400 5798 1617432
# 500 5320 1366000
# 600 5610 1530224
# 700 5724 1579600
# 800 5610 1527040
# 900 5616 1514112
#
# 271.73 real 270.46 user 0.77 sys
 
 
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
 
import numpy as np
 
import guppy
 
heapy = guppy.hpy()
 
mem = open('memory_pyplot_close.txt', 'wb')
 
heapy.setref()
 
for i in range(1000):
 
if i % 100 == 0:
h = heapy.heap()
mem.write('%i %s %s\n' % (i, h.count, h.size))
 
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(np.random.random(10), np.random.random(10))
fig.savefig('test.png')
 
plt.close()
 
mem.close()

The bottom line is, use the object-oriented interface to Matplotlib - no memory leak, and best performance!

so... is it plt.close() as opposed to doing no closing at all that really reduces the memory leak? And in test_oo.... it's the use of the object-oriented interface in the sense of grabbing all commands direct from matplotlib rather than using pylab/pyplot? Hmm... that will make me reconsider quite a lot of code.

Yes to both questions, and it was a real surprise to me too, so I'll probably be using the OO interface in future for new scripts!

I think what's going on is that when using pyplot, Matplotlib must keep an internal reference to the Figure instance, which is why it is not properly removed simply by dereferencing the figure in the script (i.e. there is still one reference count), and an additional call to plt.close() is needed (note, not fig.close()). On the other hand, in the OO interface, I think that fig must be the only reference to the Figure instance, and therefore the memory is reclaimed as expected from a Python script.

@astrofrog just came across this via google and can confirm your plt.close() solution works in matplotlib 1.1. thanks fo for posting this

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.