Skip to content

Instantly share code, notes, and snippets.

View vinayak-mehta's full-sized avatar
🤕
Recovering

Vinayak Mehta vinayak-mehta

🤕
Recovering
View GitHub Profile
@vinayak-mehta
vinayak-mehta / disease_outbreaks_camelot.ipynb
Last active November 5, 2023 18:54
A jupyter notebook showing how Camelot can be used to extract tables from PDFs scraped from the IDSP website.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import os
import sys
import json
import hmac
import hashlib
import traceback
from binascii import b2a_hex
from datetime import datetime, timezone
import zmq
import sys
from jupyter_client import KernelManager
try:
manager = KernelManager()
manager.start_kernel()
client = manager.client()
> Dependencies.exe -imports "C:\Users\Vinayak Mehta\pdftopng-package-data\pdftopng.cp38-win_amd64.pyd"
[-] Import listing for file : C:\Users\Vinayak Mehta\pdftopng-package-data\pdftopng.cp38-win_amd64.pyd
Import from module MSVCP140.dll :
Function ?_Random_device@std@@YAIXZ
Function ?__ExceptionPtrCreate@@YAXPEAX@Z
Function ?__ExceptionPtrCopy@@YAXPEAXPEBX@Z
Function ?_Xout_of_range@std@@YAXPEBD@Z
Function ?__ExceptionPtrAssign@@YAXPEAXPEBX@Z
Function ?_Xlength_error@std@@YAXPEBD@Z
Function ?__ExceptionPtrToBool@@YA_NPEBX@Z
> Dependencies.exe -imports "C:\Users\Vinayak Mehta\pdftopng-dll-mangling\pdftopng.cp38-win_amd64.pyd"
[-] Import listing for file : C:\Users\Vinayak Mehta\pdftopng-dll-mangling\pdftopng.cp38-win_amd64.pyd
Import from module MSVCP140.dll :
Function ?_Random_device@std@@YAIXZ
Function ?__ExceptionPtrCreate@@YAXPEAX@Z
Function ?__ExceptionPtrCopy@@YAXPEAXPEBX@Z
Function ?_Xout_of_range@std@@YAXPEBD@Z
Function ?__ExceptionPtrAssign@@YAXPEAXPEBX@Z
Function ?_Xlength_error@std@@YAXPEBD@Z
Function ?__ExceptionPtrToBool@@YA_NPEBX@Z
// Dependencies.exe -imports -json "C:\Users\Vinayak Mehta\pdftopng-package-data\pdftopng.cp38-win_amd64.pyd"
{
"Imports": [
{
"Flags": 0,
"Name": "MSVCP140.dll",
"NumberOfEntries": 38,
"ImportList": [
{
"Hint": 605,
// Dependencies.exe -imports -json "C:\Users\Vinayak Mehta\pdftopng-dll-mangling\pdftopng.cp38-win_amd64.pyd"
{
"Imports": [
{
"Flags": 0,
"Name": "MSVCP140.dll",
"NumberOfEntries": 38,
"ImportList": [
{
"Hint": 605,
@vinayak-mehta
vinayak-mehta / pdf2png.txt
Created September 6, 2020 11:59 — forked from zooba/pdf2png.txt
Step by step converting a PDF page to PNG using WinRT
Python 3.7.8 (tags/v3.7.8:4b47a5b6ba, Jun 28 2020, 10:03:53) [MSC v.1916 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import os, time
>>> PDF_FILENAME = input("Path to PDF: ")
>>> OUT_FILE = os.path.abspath(input("Path to output PNG: "))
>>>
>>> import winrt.windows.data.pdf as PDF
>>> from winrt.windows.storage import StorageFile
>>> op = StorageFile.get_file_from_path_async(PDF_FILENAME)
>>> time.sleep(0.5) # should really await, but this is easier
s = [" ", " ", " ", " ", " ", " ", " ", " ", " "]
def render_grid(s):
grid = "\n-----------------\n".join(
[
f" {s[0]} | {s[1]} | {s[2]}\n (1) | (2) | (3)",
f" {s[3]} | {s[4]} | {s[5]}\n (4) | (5) | (6)",
f" {s[6]} | {s[7]} | {s[8]}\n (7) | (8) | (9)"
]
@vinayak-mehta
vinayak-mehta / pdf_table_extract.py
Created September 22, 2018 11:54
A Python2 script to extract tables from a PDF file using pdf-table-extract; saves tables as CSV files inside the current working directory.
#!/usr/bin/env python
"""
Usage: python pdf_table_extract.py <filename>
"""
import os
import sys
import pandas as pd
import pdftableextract as pdf