Skip to content

Instantly share code, notes, and snippets.

@JSONOrona
Last active August 29, 2015 13:56
Show Gist options
  • Save JSONOrona/9278048 to your computer and use it in GitHub Desktop.
Save JSONOrona/9278048 to your computer and use it in GitHub Desktop.
ATG Dynamo console web scraper
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'Jason V. Orona'
__version__ = 'prototype'
'''
Example:
./getSched.py --server=myserver --port=8080
==== Scheduled services for myserver@8080 ====
|job_id|last_time|next_time|times_run|description|thread_method|repeating|schedule_type|schedule|
|28|Wed Mar 05 12:30:34 CST 2014|Wed Mar 05 12:31:34 CST 2014|356|Updates index of all documents in the Nucleus docroot|reused|true|atg.service.scheduler.PeriodicSchedule|every 1 minute in 1 minute without catch up|
|31|Wed Mar 05 12:30:36 CST 2014|Wed Mar 05 12:31:36 CST 2014|357|Monitors Request Threads|reused|true|atg.service.scheduler.PeriodicSchedule|every 1 minute without catch up|
'''
# Built-in modules
import urllib2
import base64
import re
# Third-party modules
import argparse
from BeautifulSoup import BeautifulSoup
# Global variables
username = 'supersecretuser'
password = 'supersecretpass'
# Functions specific to this program
def get_args():
'''
This function parses and return arguments passed in
'''
# Assign description to the help doc
parser = argparse.ArgumentParser(
description='Script retrieves schedules from dynamo console')
# Add arguments
parser.add_argument(
'-s', '--server', type=str, help='Server name', required=True)
parser.add_argument(
'-p', '--port', type=str, help='Port number', required=True, nargs='+')
parser.add_argument(
'-k', '--keyword', type=str, help='Keyword search', required=False,
default=None)
# Array for all arguments passed to script
args = parser.parse_args()
# Assign args to variables
server = args.server
port = args.port[0].split(",")
keyword = args.keyword
# Return all variable values
return server, port, keyword
def get_http(username, password, url):
'''
Capture web page request and format
'''
try:
request = urllib2.Request(url)
base64string = base64.encodestring(
'%s:%s' % (username, password)).replace('\n', '')
request.add_header("Authorization", "Basic %s" % base64string)
soup = BeautifulSoup(urllib2.urlopen(request))
#sched = soup('table')[0].prettify()
table = soup.find('table')
# Format html table into a pipe delimited table
for row in table.findAll('tr')[1:]:
col = row.findAll('td')
job_id = col[1].string
name = col[2].string
last_time = col[3].string
next_time = col[4].string
times_run = col[5].string
#source = col[6]
#destination = col[7].string
description = col[8].string
thread_method = col[9].string
repeating = col[10].string
schedule_type = col[11].string
schedule = col[12].string
record = (
job_id, last_time, next_time, times_run, description,
thread_method, repeating, schedule_type, schedule)
line = ("|".join(record))
print "|%s|" % line
except urllib2.HTTPError, e:
print e.headers
print e.headers.in_key('WWW-Authenticate')
#def search(keyword):
# for line in resp.readlines():
# if re.search(keyword, line, re.I):
# print line
def main():
'''
Main program
'''
# Get arguments
server, port, keyword = get_args()
# Loop through ports list and output scheduled services
for p in port:
url = 'http://%s:%s/dyn/admin/nucleus/atg/dynamo/service/Scheduler/' % (
server, p)
print "\n==== Scheduled services for port %s@%s ====\n" % (server, p)
print 'job_id|last_time|next_time|times_run|description|thread_method|repeating|schedule_type|schedule|'
get_http(username, password, url)
print ""
# if keyword is not None:
# return search(keyword)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment