Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import subprocess
import time
import xml.dom.minidom
import os.path
import os
import datetime
import xml.etree.ElementTree as ET
import glob
#if clearXmlSave_location set to True, deletes all the files in this directory first, giving a clean start, will slow things down
def clearXmlSaveLocation(clearXmlSave_location):
if clearXmlSave_location:
xmlFiles = glob.glob(XmlSave_location + "*")
for xmlFile in xmlFiles:
os.remove(xmlFile)
#creates general search query to enter to subprocess call - this call then saves the XML file
#expects a quite specific call
def generateSearchQuery(object_type, search_terms, return_fields, search_type):
#print object_type #document or mail
object_type_text = ""
if object_type == "document":
object_type_text = "register?&"
#print "hi there"
elif object_type == "inbox":
object_type_text = "mail?mail_box=Inbox&"
elif object_type == "outbox":
object_type_text = "mail?mail_box=Sentbox&"
else:
object_type_text = ""
print "Invalid object type: must be either 'document' or 'inbox' or 'outbox'."
#print search_terms #list of terms for the search, e.g. [attribute3:P027c*, (docno:tbs* OR docno:tsb*)], combined with AND each time
search_text = "search_query="+search_terms[0]
if len(search_terms)!=1:
for search_term in search_terms[1:]:
search_text = search_text + " AND " + search_term
search_text = search_text + "&"
#print return_fields #author, received, registered, filename, docno, title, attribute3 [package of works]
return_fields_text = "return_fields="
return_fields_text = return_fields_text + ",".join(return_fields) + "&"
#print search_type #Full
if search_type == "Full":
search_type_text = "search_type=Full"
elif "Number_Limited&search_result_size" in search_type:
search_type_text = "search_type=" + search_type
else:
print "Invalid search type entered, must be 'Full'."
return object_type_text + search_text + return_fields_text + search_type_text
#generates and returns path to xml file of document metadata, trims and adds data to document metadata
#adds ApprovalStatus and ApprovalStatusReason to document metadata if a clear signal is part of the metadata (e.g. status changed to status A)
#expects a 19 digit number, e.g. 1348828088510064790
def generateDocumentMetadata(DocumentID):
#check that the documentTrackingNumber is valid
if len(str(DocumentID)) != 19:
print "Document tracking number " + DocumentID + " is not valid, not 19 characters."
if not unicode(DocumentID).isnumeric():
print "Document tracking number " + DocumentID + " is not valid, not numeric."
SearchQuery = "register/" + str(DocumentID) + "/eventlog"
if not os.path.isfile(XmlSave_location+DocumentId+"documentID.xml"):
call_command = [AconexTestAPI1_location,AconexProject_location+SearchQuery,XmlSave_location,DocumentId+"documentID.xml"]
subprocess.call(call_command)
#strip the xml of less useful information (views, downloads etc)
tree = ET.parse(XmlSave_location+DocumentId+"documentID.xml")
root = tree.getroot()
for item in root.findall('.//CDEventLog'):
status_tag = ET.SubElement(item,'ApprovalStatus')
status_tag.text = "Unknown"
status_tag_reason = ET.SubElement(item,"ApprovalStatusReason")
status_tag_reason.text = "None"
event_type = item.get('EventType')
detailed_event_type = item.find('Event').text
if "TRANSMIT" in detailed_event_type:
transmit_number = detailed_event_type.replace("(",")").split(")")[1]
transmit_number_tag = ET.SubElement(item,'TransmitNumber')
transmit_number_tag.text = transmit_number
#print "transmit number added"
if event_type == "View":
root[0].remove(item) #element not interesting, delete
elif event_type == "Send":
if ("REJCTN" in detailed_event_type):
status_tag.text = "Not approved"
status_tag_reason.text = "REJCTN mail type"
#print "Definately not approved, rejection mail type"
elif (event_type == "Update"):
if ("Status A" in detailed_event_type):
#add approved to xml file
status_tag.text = "Approved"
status_tag_reason.text = "Updated to Status A"
#print "Updated to Status A"
if ("Code 1" in detailed_event_type):
#add approved to xml file
status_tag.text = "Approved"
status_tag_reason.text = "Updated to Code 1"
#print "Definately approved, code 1"
if ("Code 5" in detailed_event_type):
#add approved to xml file
status_tag.text = "Approved"
#status_tag_reason.text = "Updated to Code 5"
if ("Status B" in detailed_event_type):
status_tag.text = "Not approved"
status_tag_reason.text = "Updated to Status B"
#print "Definately not approved, Status B"
if ("Status C" in detailed_event_type and "Status Changed" not in detailed_event_type):
status_tag.text = "Not approved"
status_tag_reason.text = "Updated to Status c"
#print "Definately not approved, Status C"
if ("Code 2" in detailed_event_type):
status_tag.text = "Not approved"
status_tag_reason.text = "Updated to code 2"
#print "Definately not approved, Code 2"
if ("Code 3" in detailed_event_type):
status_tag.text = "Not approved"
status_tag_reason.text = "Updated to Code 3"
#print "Definately not approved, Code 3"
if ("Draft" in detailed_event_type):
status_tag.text = "Not approved"
status_tag_reason.text = "Updated to Draft"
#print "Definately not approved, Status Draft"
else:
print "Unknown event type..."
#save the altered xml file
tree.write(XmlSave_location+DocumentID+"documentID.xml")
return XmlSave_location+DocumentID+"documentID.xml"
#generates and returns path to xml file of mail metadata
#expects a 9 digit number, e.g. 290640151
def generateMailMetadata(MailID):
#check that the MailID is valid
if MailID == None:
print 'Mail ID provided is none'
return ""
if MailID == -1:
print "Mail ID has remained unchanged at -1, see the function that called this function"
if len(str(MailID)) != 9:
print "MailID number " + MailID + " is not valid, not 9 characters."
if not unicode(MailID).isnumeric():
print "MailID number " + MailID + " is not valid, not numeric."
#if file does not exist already, download, otherwise leave alone (sometimes the file is manipulated by the program so don't redownload)
if not os.path.isfile(XmlSave_location+MailID+"mailid.xml"):
call_command = [AconexTestAPI1_location,AconexProject_location+"mail/"+MailID,XmlSave_location,MailID+"mailid.xml"]
subprocess.call(call_command)
#time.sleep(0.2)
#remove the 'ApprovalStatus' tag in the xml file, it is confusing later. It refers to Aconex processing the mail approval, not the document approval.
tree = ET.parse(XmlSave_location+MailID+"mailid.xml")
root = tree.getroot()
root.remove(root.find('ApprovalStatus'))
tree.write(XmlSave_location+MailID+"mailid.xml")
return XmlSave_location+MailID+"mailid.xml"
#generates xml file given a mail number
#expects a mail number, e.g. "CS JV-TRANSMIT-000054", outputs a mail ID
#as a side effect, downloads and saves the xml metadata of that mail
def generateMailIDfromMailNumber(MailNumber):
#if the file has already been looked up, check the existing file, e.g. CS JV-TRANSMIT-009359.xml first
if os.path.isfile(XmlSave_location+MailNumber+".xml"):
#print "n"
tree = ET.parse(XmlSave_location+MailNumber+".xml")
root = tree.getroot().find('SearchResults')
mail_id = root.get('MailId')
else:
#print "m"
MailNumber_nospaces = MailNumber.replace(' ','?') #Aconex has issues processing the spaces, replace with a wildcard, this is a workaround
SearchQuery = generateSearchQuery("inbox",['docno:'+MailNumber_nospaces],['docno'],"Full")
call_command = [AconexTestAPI1_location,AconexProject_location+SearchQuery,XmlSave_location,MailNumber+".xml"]
subprocess.call(call_command)
tree = ET.parse(XmlSave_location+MailNumber+'.xml')
root = tree.getroot()
numberofMailResults = int(root.get('TotalResults'))
#print "numberofMailResults1 " + str(numberofMailResults)
#if there are no results, try the outbox, this is fairly rare
if numberofMailResults == 0:
#print "d"
SearchQuery = generateSearchQuery("outbox",['docno:'+MailNumber_nospaces],['docno'],"Full")
call_command = [AconexTestAPI1_location,AconexProject_location+SearchQuery,XmlSave_location,MailNumber+".xml"]
subprocess.call(call_command)
tree = ET.parse(XmlSave_location+MailNumber+'.xml')
root = tree.getroot()
numberofMailResults = int(root.get('TotalResults'))
#print "number of mail Results2 " + str(numberofMailResults)
mail_id = -1 #gives a bad result if errors occur
if numberofMailResults == 0:
print "Searching mail number " + MailNumber + " produced 0 results, some kind of error. No results returned"
elif numberofMailResults >= 2:
print "Searching mail number " + MailNumber + " produced " + str(numberofMailResults) + " results, some kind of error"
elif numberofMailResults == 1:
#print "Searching mail number " + MailNumber + " produced 1 result, as expected."
mail_element = root.find('.//Mail')
mail_id = mail_element.get('MailId')
#print "x"
#print root
mailIDXMLPath = generateMailMetadata(mail_id)
else:
print "Searching mail number " + MailNumber + " produced " + str(numberofMailResults) + " results, some kind of error."
return mail_id
#returns list of mail ids for related mail items
#expects a 9 digit number, e.g. 290640151
#as a side effect, downloads the metadata for each related mail item, and adds the reply meta-data to each mail xml
def generateRelatedMailIDs(MailID):
#find the thread ID given the mail id
#first find the threadid of the item of mail
#print "y"
#print MailID
MailIDXMLPath = generateMailMetadata(MailID)
tree = ET.parse(MailIDXMLPath)
root = tree.getroot()
threadID = root.find('ThreadId').text
#search for all mail items on that thread
#N.B. Search query very straightforward, no need for separate function
call_command = [AconexTestAPI1_location,AconexProject_location+"mail/"+threadID+"/thread",XmlSave_location,threadID+"threadID.xml"]
subprocess.call(call_command)
thread_tree = ET.parse(XmlSave_location+threadID+"threadID.xml")
thread_root = thread_tree.getroot()
relatedMailIDs = []
#create a list of all the MailIDs in the thread
for mailobject in thread_root.iter('Mail'):
relatedMailIDs.append(mailobject.get('MailId'))
#download the mail meta-data for each item
for relatedMailID in relatedMailIDs:
#print "z"
relatedMailXMLPath = generateMailMetadata(relatedMailID)
#if not already added, transfer the data from the thread into the xml for the message
#find the element of the thread ID xml page
mailThreadItem = thread_root.find('.//Mail[@MailId="' + relatedMailID + '"]')
replyType = mailThreadItem.find('ReplyType').text
numberofreplies = len(list(mailThreadItem.iter('Mail')))-1
#add the reply type and and number of replies to the mailid.xml files for use later in determining approval status
tree = ET.parse(XmlSave_location+relatedMailID+"mailid.xml")
root = tree.getroot()
if root.find('NumberOfReplies') == None:
ET.SubElement(root,'NumberOfReplies')
if root.find('MailType') == None:
ET.SubElement(root,'MailType')
root.find('NumberOfReplies').text = str(numberofreplies)
root.find('MailType').text = replyType
tree.write(XmlSave_location+relatedMailID+"mailid.xml")
return relatedMailIDs
#Searches through document metadata xml to find every transmit number, and returns all the mail numbers in threads with those transmittals
#Searches through all mail that includes the document number in either the subject or body of the message
#expects a path to an existing document metadata xml file, document ID (19 digit number), document number (e.g. 1EW02-CSJ-DS-DES-S003-000110)
#returns a list of Mail IDs
#as a side effect, downloads any outstanding mail xmls
def findAllRelatedMail(documentMetadataPath,DocumentID,DocumentNumber):
#search the inbox and outbox for any mail that includes the document number in the body, this should capture document number requests and uploads to HS2
#I have chosen not to then search the threads of those messages, assumed not to be of much value
relatedMailIDslist = []
relatedMailIDsxmlpathslist = []
SearchQuery = generateSearchQuery("inbox",['(subject:' + DocumentNumber + ' OR corrdata:' + DocumentNumber + ')'],['docno'],"Full")
call_command = [AconexTestAPI1_location,AconexProject_location+SearchQuery,XmlSave_location,DocumentID+"relatedmailinbox.xml"]
subprocess.call(call_command)
tree = ET.parse(XmlSave_location+DocumentID+"relatedmailinbox.xml")
root = tree.getroot().find('SearchResults')
for item in root.findall('Mail'):
relatedMailIDslist.append(item.get('MailId'))
SearchQuery = generateSearchQuery("outbox",['(subject:' + DocumentNumber + ' OR corrdata:' + DocumentNumber + ')'],['docno'],"Full")
call_command = [AconexTestAPI1_location,AconexProject_location+SearchQuery,XmlSave_location,DocumentID+"relatedmailoutbox.xml"]
subprocess.call(call_command)
tree = ET.parse(XmlSave_location+DocumentID+"relatedmailoutbox.xml")
root = tree.getroot().find('SearchResults')
for item in root.findall('Mail'):
relatedMailIDslist.append(item.get('MailId'))
for mailID in relatedMailIDslist:
#download all the xml files for these files if needed
#print "a"
relatedMailIDsxmlpathslist.append(generateMailMetadata(mailID))
#######
tree2 = ET.parse(documentMetadataPath)
root2 = tree2.getroot()
for transmit_number in root2.findall('.//TransmitNumber'):
#print "q"
#print "transmit number " + transmit_number.text
MailID = generateMailIDfromMailNumber(transmit_number.text)
#find mail thread id
#print "p"
#print MailID
relatedMailIDs = generateRelatedMailIDs(MailID)
for relatedMailID in relatedMailIDs:
relatedMailIDslist.append(relatedMailID)
if len(relatedMailIDslist) > 1:
relatedMailIDslist = list(dict.fromkeys(relatedMailIDslist)) #this removes duplicates
return relatedMailIDslist
#given a mail item, uses its data to decide if the mail is approved or not approved and gives a reason
#expects a single mail ID (9 digit number)
#returns path to xml file for the mail, this xml file includes "ApprovalStatus" and "ApprovalStatusReason" tags
def classifyMailItem(mailID,DocumentId,DocumentNumber):
#alter approval status confidence, if it ends at more than 60, say approved, less than -60, no approval
ApprovalStatusConfidence = 0
print mailID
print DocumentId
print DocumentNumber
#get the mail number from each mailID file
tree = ET.parse(XmlSave_location+mailID+"mailid.xml")
root = tree.getroot()
mailNumber = root.find('MailNo').text
#search the document xml for that mail number
tree = ET.parse(XmlSave_location+DocumentId+"documentID.xml")
root = tree.getroot()
element = root.findall('.//CDEventLog[TransmitNumber="' + mailNumber + '"]') #N.B. no not leave spaces around the = sign
if len(element) ==1:
element = element[0]
ApprovalStatus = element.find('ApprovalStatus').text
ApprovalStatusReason = element.find('ApprovalStatusReason').text
print "approval status" + ApprovalStatus
print "approval status reason" + ApprovalStatusReason
#if mail xml does not already have approval status and approval status reason and approval status confidence, insert those
#update status and approval status reason in mail xml
tree = ET.parse(XmlSave_location+mailID+"mailid.xml")
root = tree.getroot()
if root.find('ApprovalStatus') == None:
element = ET.SubElement(root,'ApprovalStatus')
element.text = ApprovalStatus
if root.find('ApprovalStatusReason') == None:
element = ET.SubElement(root,'ApprovalStatusReason')
element.text = ApprovalStatusReason
else:
element.text = element.text + ApprovalStatusReason
elif len(element)>1:
print str(len(element)) + "mails found with mail number " + str(mailNumber) + ", very unexpected."
else:
#continue
print "element has length " + str(len(element)) + ", reasonably expected."
print ""
#add ApprovalStatus and ApprovalStatusReason tags to the xml file
#try to interpret if mail item is an approval or rejection
#signal 1: do the changes to the document include a 'Status 1' or 'Approved' status, is the ?
#import any existing Approval status tags from the document Id xml file
#this is pretty final, set confidence +100
#signal 2: is the mail type a rejection? -but to what documents exactly does it respond to?
#import any existing Approval status tags from the document Id xml file
#this is pretty final, can set ApprovalStatus -100
#signal 2: who sent the document?
#build into a ML model
#if sent by CSJV +10
#if not sent by CSJV -10
#signal 3: has the mail been marked 'closed out'?
#pull out of 'Status tag' of the mail
#build into a ML model, but ignore for now
#not sure about the exact logic that can be applied here, leave for now
#signal 3: does the reponse contain the word 'accepted' or 'approved' in its text, then likely to be approved
#need a robust way of separating the first mail out of the chain
#then an ML model or something less sophisticated?
#reasonably strong evidence, +30 if true
#signal 3b: does the filename of the attached document contain the work 'accepted' or 'approved' or 'signed', then likely to be approved
#fairly strong evidence, +40 if its true
'''
x = root.find('MailData').text #the text in the response
y = root.find('.//FileName') #the filename - not all mails will have an attached document, duh!
if y == None:
y = "abc" #This is a short term fudge, to fix
else:
y = y.text
if x != None:
if 'from' in x:
x = x.split('From:')[0]
if (('approv' in x or 'accept' in x or 'Approv' in x or 'Accept' in x or 'approv' in y or 'accept' in y) and ('for approv' not in x)):
print "Approval very probably granted on " + root.find('MailNo').text
#signal 4: does the response contain the work 'amend' in its text, unlikely to be approved
elif ('marked' in x or 'marked' in y or 'to be addressed' in x):
print "Very probably no approval on " + root.find('MailNo').text
else:
print "Probably no approval on " + root.find('MailNo').text
'''
#signal 4: from the thread information, has the message been responded to?
#if the last piece of mail, then some evidence of approval, +10
#signal 5:
#does the message look like document control responding to a DNR?
#i.e. from document control, containing the document number in the body of the message, DNR is in the thread (especially message following a DNR)
#signal 6:
#does the message look like document control signalling an upload to eB?
#a mail number with INT is in the body of the text, the mail number an INT, does person who sent it have <JobTitle> "Document Controller"?
#signal 7:
#does the message look like document control coming back from eB?
#does the transmit have both the document and a comments sheet, does the person who sent it have <JobTitle> "Document Controller"?
#key inputs for the program to function
AconexTestAPI1_location = r"C:/Users/david/Documents/Visual Studio 2015/Projects/AxonexTestAPI1/AxonexTestAPI1/bin/Debug/AxonexTestAPI1.exe"
AconexProject_location = "https://uk1.aconex.co.uk/api/projects/268441385/"
SearchQuery = ""
XmlSave_location = "D:/AllAconexMail/"
clearXmlSave_location = True #clears the folder of any existing xml files to give a fresh start, otherwise, they stay
clearXmlSaveLocation(clearXmlSave_location)
#this is the overarching xml - where the numbers of every document returned in the search is stored
XmlSaveFileName = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + ".xml"
#this is the overarching search query 'e.g. find every TBS in package 27c' or 'e.g. find TSB-000180'
#SearchQuery = generateSearchQuery("document",["attribute3:P027c*","(docno:TBS* OR docno:TSB*)"],["docno","title","statusid"],"Number_Limited&search_result_size=25")
SearchQuery = generateSearchQuery("document",["attribute3:P027c*","docno:1EW02-CSJ-HS-MST-S0003-*"],["docno","title","statusid"],"Full")
#SearchQuery = generateSearchQuery("document",["attribute3:P027c*","(docno:TBS-000152)"],["docno","title","statusid"],"Number_Limited&search_result_size=25")
call_command = [AconexTestAPI1_location,AconexProject_location+SearchQuery,XmlSave_location,XmlSaveFileName]
subprocess.call(call_command)
#alter the xml file to pull out some key information
tree1 = ET.parse(XmlSave_location+XmlSaveFileName)
root1 = tree1.getroot()
searchResults = root1.find('SearchResults')
for child in searchResults:
DocumentId = child.attrib['DocumentId']
DocumentNumber = child.find('DocumentNumber').text
#print DocumentNumber + " (" + child.find('Title').text + ")"
documentMetadataPath = generateDocumentMetadata(DocumentId)
#find and print all transmit numbers associated with the tbs
#print "Transmit updates:"
#find all the mail numbers of every related mail
relatedMailIDslist = findAllRelatedMail(documentMetadataPath,DocumentId,DocumentNumber)
#go through every mail collected
for mailID in relatedMailIDslist:
continue
#classifyMailItem(mailID,DocumentId,DocumentNumber)
#go through every event in the document ID, follow onto mail when a mail is available
#what about other places mail items give clues: e.g. from document control to say issued
#if issued guarentees that CSJV should have signed it
#if given code 5 or code 1 then CSJV will have signed it
##############
#now search using mailid to find thread id and related mail items
##############
print ""
print ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.