Script para extraer imágenes de archivos ppt, pps y pptx
#-*- coding: utf-8 -*- | |
""" | |
Copyright (c) 2011, Jhonathan Sneider Salguero Villa (http://www.novatoz.com/) | |
script para extraer imágenes de archivos ppt, pptx y pps | |
Requiere: | |
- pywin32 | |
- Microsoft PowerPoint 2007 | |
""" | |
try: | |
import win32com.client as win32 | |
except: | |
print "Debe instalar pywin32 http://sourceforge.net/projects/pywin32/" | |
sys.exit(1) | |
import os | |
import sys | |
import base64 | |
import tempfile | |
import pythoncom | |
import zipfile | |
TEMPDIR = tempfile.gettempdir() | |
class PPTProblem(Exception): | |
pass | |
def random_string(length): | |
"""retorna texto aleatorio""" | |
return base64.b32encode(os.urandom(length)) | |
def get_file_name(ext): | |
"""retorna nombre de archivo único""" | |
while True: | |
name = "{0}.{1}".format(random_string(10), ext) | |
if not os.path.exists(name): | |
break | |
return name | |
# Adaptado de http://code.activestate.com/recipes/308035/ | |
def get_ppt(file): | |
"""retorna archivo pptx abierto como zip""" | |
try: | |
name, ext = os.path.splitext(os.path.split(file)[1]) | |
if ext.lower() == "pptx": | |
return zipfile.ZipFile(file) | |
pythoncom.CoInitializeEx(pythoncom.COINIT_APARTMENTTHREADED) | |
pp = win32.DispatchEx('Powerpoint.Application') | |
ppfile = pp.Presentations.Open(file, False, False, False) | |
ppfile.SaveAs(r'{0}\{1}.pptx'.format(TEMPDIR, name)) | |
ppfile.Close() | |
del pp | |
del ppfile | |
pythoncom.CoUninitialize() | |
return zipfile.ZipFile(r'{0}\{1}.pptx'.format(TEMPDIR, name)) | |
except: | |
raise PPTProblem("can't open {0}".format(file)) | |
def extract_images(zip): | |
for f in zip.namelist(): | |
path, name = os.path.split(f) | |
# las imágenes se encuentran en ppt/media | |
if path == "ppt/media": | |
name, ext = os.path.splitext(name) | |
input = zip.open(f) | |
output = open(get_file_name(ext), 'wb') | |
while True: | |
data = input.read(1024*64) | |
if not data: | |
break | |
output.write(data) | |
output.close() | |
if __name__ == '__main__': | |
zip = get_ppt(os.path.abspath(sys.argv[1])) | |
extract_images(zip) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment