Skip to content

Instantly share code, notes, and snippets.

Created June 15, 2017 05:09
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save tanaikech/825f4b848a8cbff7018f71d33399e99b to your computer and use it in GitHub Desktop.
Save tanaikech/825f4b848a8cbff7018f71d33399e99b to your computer and use it in GitHub Desktop.
Converting PDF to TXT

Converting PDF to TXT

This is a sample script for converting a PDF file to a TXT file. 2 steps are required for this.

  1. Upload a PDF file as a Google Document
  2. Download a Google Document as a TXT file

In this sample, Python Quickstart is used. The detail information is Please read "Step 1: Turn on the Drive API" and "Step 2: Install the Google Client Library".

from __future__ import print_function
import httplib2
import os
import io

from apiclient import discovery
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage
from apiclient.http import MediaFileUpload, MediaIoBaseDownload

    import argparse
    flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
    flags = None

CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'Drive API Python Quickstart'

def get_credentials():
    credential_path = os.path.join("./", 'drive-python-quickstart.json')
    store = Storage(credential_path)
    credentials = store.get()
    if not credentials or credentials.invalid:
        flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
        flow.user_agent = APPLICATION_NAME
        if flags:
            credentials = tools.run_flow(flow, store, flags)
        else:  # Needed only for compatibility with Python 2.6
            credentials =, store)
        print('Storing credentials to ' + credential_path)
    return credentials

def main():
    credentials = get_credentials()
    http = credentials.authorize(httplib2.Http())
    service ='drive', 'v3', http=http)

    pdffile = 'sample.pdf'
    txtfile = 'sample.txt'

    mime = 'application/'
    res = service.files().create(
            'name': pdffile,
            'mimeType': mime
        media_body=MediaFileUpload(pdffile, mimetype=mime, resumable=True)

    dl = MediaIoBaseDownload(
        io.FileIO(txtfile, 'wb'),
        service.files().export_media(fileId=res['id'], mimeType="text/plain")
    done = False
    while done is False:
        status, done = dl.next_chunk()

if __name__ == '__main__':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment