HVS vinovator

## pdfTextMiner.py
# pdfTextMiner.py
# Python 2.7.6
# For Python 3.x use pdfminer3k module
# This link has useful information on components of the program
# https://euske.github.io/pdfminer/programming.html
# http://denis.papathanasiou.org/posts/2010.08.04.post.html


''' Important classes to remember
PDFParser - fetches data from pdf file

## portScanner.py
# python 2.7.6.
# portScanner.py

import socket
from datetime import datetime
import sys

# Here we are scanning your own terminal
# Replace this with gethostbyname("host") to scan a remote host

## timeZoneExplorer.py
# Python 2.7.6
# timeZoneExplorer.py

from pytz import timezone, common_timezones # import all_timezones for more exhaustive list
from datetime import datetime
import os

# Log file will be created in the same folder as the python script
my_path = "."
log_path = os.path.join(my_path + "/" + "loc_log.txt")

## jsonToCsv2.py
# jsonToCSV.py
# Python 2.7.6

'''
Place all the json payloads as separate text files in base folder
Program will extract each payload and generate single csv file
csv file will have key value pairs in separate columns
'''

import json

## forbes2kMiner.py
# forbes2kMiner.py
# Python 3.4


"""
Extracts the Forbes Global 2000 list of companies and imports into a CSV file
Since Forbes is a JS rendered site, selenium is used to mimic user action
BeautifulSoup is used to scrape html content
Since selenium is used, Firefox is needed as webdiver
"""

## persistListOfDicts.py
# persistListOfDicts.py
# Python 2.7.6


import json
import os
import pickle # To persist each dict

json_path = "./JSON"


## Logger.py
# Logger.py
# Python2.7.6
# For more details - https://docs.python.org/3/howto/logging.html#logging-basic-tutorial
# logging.error - just displays the error message
# logging.exception - displays the stack trace along with the error message

import logging # For logs
import sys # To read parameters from command line

# Define the format of the logging

## persistListOfDicts1.py
# persistListOfDicts.py
# Python 2.7.6


import json
import os

json_path = "./JSON"

# Write dicts into a pickle file each

## DemergePDF.py
#Python 2.7.6
#DemergePDF.py
#Gets raw_inputs of 1 PDF file names from user and demerge into 2

import PyPDF2
import os

def getFileNameFromUser (file, path):
	pdf_file_name = raw_input("Enter {0} name: ".format(file))
	if pdf_file_name in os.listdir(path):

## CombinePDF_Py2.py
#Python 2.7.6
#CombinePDF_Py2.py
#Gets raw_inputs of 2 PDF file names from user and combines them into 1

import PyPDF2
import os

def getFileNameFromUser (file, path):
	pdf_file_name = raw_input("Enter {0} name: ".format(file))
	if pdf_file_name in os.listdir(path):
	# pdfTextMiner.py
	# Python 2.7.6
	# For Python 3.x use pdfminer3k module
	# This link has useful information on components of the program
	# https://euske.github.io/pdfminer/programming.html
	# http://denis.papathanasiou.org/posts/2010.08.04.post.html


	''' Important classes to remember
	PDFParser - fetches data from pdf file
	# python 2.7.6.
	# portScanner.py

	import socket
	from datetime import datetime
	import sys

	# Here we are scanning your own terminal
	# Replace this with gethostbyname("host") to scan a remote host
	# Python 2.7.6
	# timeZoneExplorer.py

	from pytz import timezone, common_timezones # import all_timezones for more exhaustive list
	from datetime import datetime
	import os

	# Log file will be created in the same folder as the python script
	my_path = "."
	log_path = os.path.join(my_path + "/" + "loc_log.txt")
	# jsonToCSV.py
	# Python 2.7.6

	'''
	Place all the json payloads as separate text files in base folder
	Program will extract each payload and generate single csv file
	csv file will have key value pairs in separate columns
	'''

	import json
	# forbes2kMiner.py
	# Python 3.4


	"""
	Extracts the Forbes Global 2000 list of companies and imports into a CSV file
	Since Forbes is a JS rendered site, selenium is used to mimic user action
	BeautifulSoup is used to scrape html content
	Since selenium is used, Firefox is needed as webdiver
	"""
	# persistListOfDicts.py
	# Python 2.7.6


	import json
	import os
	import pickle # To persist each dict

	json_path = "./JSON"
	# Logger.py
	# Python2.7.6
	# For more details - https://docs.python.org/3/howto/logging.html#logging-basic-tutorial
	# logging.error - just displays the error message
	# logging.exception - displays the stack trace along with the error message

	import logging # For logs
	import sys # To read parameters from command line

	# Define the format of the logging
	#Python 2.7.6
	#DemergePDF.py
	#Gets raw_inputs of 1 PDF file names from user and demerge into 2

	import PyPDF2
	import os

	def getFileNameFromUser (file, path):
	pdf_file_name = raw_input("Enter {0} name: ".format(file))
	if pdf_file_name in os.listdir(path):
	#Python 2.7.6
	#CombinePDF_Py2.py
	#Gets raw_inputs of 2 PDF file names from user and combines them into 1

	import PyPDF2
	import os

	def getFileNameFromUser (file, path):
	pdf_file_name = raw_input("Enter {0} name: ".format(file))
	if pdf_file_name in os.listdir(path):