Skip to content

Instantly share code, notes, and snippets.

@sney2002
Created November 16, 2011 21:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sney2002/1371551 to your computer and use it in GitHub Desktop.
Save sney2002/1371551 to your computer and use it in GitHub Desktop.
Script para extraer imágenes de archivos ppt, pps y pptx
#-*- coding: utf-8 -*-
"""
Copyright (c) 2011, Jhonathan Sneider Salguero Villa (http://www.novatoz.com/)
script para extraer imágenes de archivos ppt, pptx y pps
Requiere:
- pywin32
- Microsoft PowerPoint 2007
"""
try:
import win32com.client as win32
except:
print "Debe instalar pywin32 http://sourceforge.net/projects/pywin32/"
sys.exit(1)
import os
import sys
import base64
import tempfile
import pythoncom
import zipfile
TEMPDIR = tempfile.gettempdir()
class PPTProblem(Exception):
pass
def random_string(length):
"""retorna texto aleatorio"""
return base64.b32encode(os.urandom(length))
def get_file_name(ext):
"""retorna nombre de archivo único"""
while True:
name = "{0}.{1}".format(random_string(10), ext)
if not os.path.exists(name):
break
return name
# Adaptado de http://code.activestate.com/recipes/308035/
def get_ppt(file):
"""retorna archivo pptx abierto como zip"""
try:
name, ext = os.path.splitext(os.path.split(file)[1])
if ext.lower() == "pptx":
return zipfile.ZipFile(file)
pythoncom.CoInitializeEx(pythoncom.COINIT_APARTMENTTHREADED)
pp = win32.DispatchEx('Powerpoint.Application')
ppfile = pp.Presentations.Open(file, False, False, False)
ppfile.SaveAs(r'{0}\{1}.pptx'.format(TEMPDIR, name))
ppfile.Close()
del pp
del ppfile
pythoncom.CoUninitialize()
return zipfile.ZipFile(r'{0}\{1}.pptx'.format(TEMPDIR, name))
except:
raise PPTProblem("can't open {0}".format(file))
def extract_images(zip):
for f in zip.namelist():
path, name = os.path.split(f)
# las imágenes se encuentran en ppt/media
if path == "ppt/media":
name, ext = os.path.splitext(name)
input = zip.open(f)
output = open(get_file_name(ext), 'wb')
while True:
data = input.read(1024*64)
if not data:
break
output.write(data)
output.close()
if __name__ == '__main__':
zip = get_ppt(os.path.abspath(sys.argv[1]))
extract_images(zip)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment