Created
November 16, 2011 21:48
-
-
Save sney2002/1371551 to your computer and use it in GitHub Desktop.
Script para extraer imágenes de archivos ppt, pps y pptx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#-*- coding: utf-8 -*- | |
""" | |
Copyright (c) 2011, Jhonathan Sneider Salguero Villa (http://www.novatoz.com/) | |
script para extraer imágenes de archivos ppt, pptx y pps | |
Requiere: | |
- pywin32 | |
- Microsoft PowerPoint 2007 | |
""" | |
try: | |
import win32com.client as win32 | |
except: | |
print "Debe instalar pywin32 http://sourceforge.net/projects/pywin32/" | |
sys.exit(1) | |
import os | |
import sys | |
import base64 | |
import tempfile | |
import pythoncom | |
import zipfile | |
TEMPDIR = tempfile.gettempdir() | |
class PPTProblem(Exception): | |
pass | |
def random_string(length): | |
"""retorna texto aleatorio""" | |
return base64.b32encode(os.urandom(length)) | |
def get_file_name(ext): | |
"""retorna nombre de archivo único""" | |
while True: | |
name = "{0}.{1}".format(random_string(10), ext) | |
if not os.path.exists(name): | |
break | |
return name | |
# Adaptado de http://code.activestate.com/recipes/308035/ | |
def get_ppt(file): | |
"""retorna archivo pptx abierto como zip""" | |
try: | |
name, ext = os.path.splitext(os.path.split(file)[1]) | |
if ext.lower() == "pptx": | |
return zipfile.ZipFile(file) | |
pythoncom.CoInitializeEx(pythoncom.COINIT_APARTMENTTHREADED) | |
pp = win32.DispatchEx('Powerpoint.Application') | |
ppfile = pp.Presentations.Open(file, False, False, False) | |
ppfile.SaveAs(r'{0}\{1}.pptx'.format(TEMPDIR, name)) | |
ppfile.Close() | |
del pp | |
del ppfile | |
pythoncom.CoUninitialize() | |
return zipfile.ZipFile(r'{0}\{1}.pptx'.format(TEMPDIR, name)) | |
except: | |
raise PPTProblem("can't open {0}".format(file)) | |
def extract_images(zip): | |
for f in zip.namelist(): | |
path, name = os.path.split(f) | |
# las imágenes se encuentran en ppt/media | |
if path == "ppt/media": | |
name, ext = os.path.splitext(name) | |
input = zip.open(f) | |
output = open(get_file_name(ext), 'wb') | |
while True: | |
data = input.read(1024*64) | |
if not data: | |
break | |
output.write(data) | |
output.close() | |
if __name__ == '__main__': | |
zip = get_ppt(os.path.abspath(sys.argv[1])) | |
extract_images(zip) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment