Last active
August 29, 2015 13:57
-
-
Save gpickin/9406303 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<cfsetting requesttimeout="600"> | |
<cfparam name="thelogname" default="clean_maillogH.log"> | |
<cfinclude template="stopwatch.cfm"> | |
<cfset ds = "dev_bktools"> | |
<cfset stopwatch = makeStopwatch()> | |
<cfoutput> | |
<cfset stopwatch.start("Begin timing")> | |
<cfquery name="ind_control" datasource="#ds#"> | |
select * from ind_control | |
where filename = <cfqueryparam value="#thelogname#" cfsqltype="cf_sql_varchar"> | |
</cfquery> | |
<cfset thecounter = 1> | |
<cfloop file="/pathtomylog/#thelogname#" index="thelog" > | |
<cfif thecounter eq 1> | |
<cfset thedate = parseDateFromLine(thelog)> | |
<cfset batchsize = 50000> | |
<cfif ind_control.dte_good neq thedate> | |
<cfset startrow = 1> | |
<cfelse> | |
<cfset startrow = ind_control.int_good_end + 1> | |
<cfif ind_control.int_endoffile is 1> | |
<cfset endoffile_found = 1> | |
</cfif> | |
</cfif> | |
<cfset endrow = startrow + batchsize - 1> | |
</cfif> | |
<cfif thecounter gte startrow and thecounter lte endrow and not structkeyexists(variables, "endoffile_found")> | |
<cfset thedate = parseDateFromLine(thelog)> | |
<cfquery name="logCheck" datasource="#ds#"> | |
select id from ind_log | |
where dte_log = #thedate_log# | |
and txt_log = <cfqueryparam value="#theline#" cfsqltype="cf_sql_longvarchar" > | |
</cfquery> | |
<cfif logCheck.recordcount eq 0> | |
<cfquery datasource="#ds#"> | |
insert into ind_log | |
set | |
dte_log = #thedate_log#, | |
txt_log = <cfqueryparam value="#theline#" cfsqltype="cf_sql_longvarchar" > | |
</cfquery> | |
<cfelse> | |
<p>Duplicate found</p> | |
</cfif> | |
</cfif> | |
<cfset thecounter = thecounter + 1> | |
</cfloop> | |
#thecounter# | |
<cfset stopwatch.lap("Loop and Save Data")> | |
<cfif endrow gt thecounter> | |
<cfset endrow = thecounter> | |
<cfset endoffile = 1> | |
<cfelse> | |
<cfset endoffile = 0> | |
</cfif> | |
<cfif structkeyexists(variables, "endoffile_found")> | |
<cfif thecounter gt ind_control.int_good_end> | |
<cfset endoffile = 0> | |
<cfset endrow = ind_control.int_good_end> | |
</cfif> | |
</cfif> | |
<cfquery name="ind_control" datasource="#ds#"> | |
update ind_control | |
set dte_good = #thedate#, | |
int_good_end = #endrow#, | |
int_endoffile = #endoffile# | |
where filename = <cfqueryparam value="#thelogname#" cfsqltype="cf_sql_varchar"> | |
</cfquery> | |
<cfset stopwatch.lap("Saved to Control File")> | |
<cfdump var="#stopWatch.getTimeline()#"> | |
<cffunction name="parseDateFromLine"> | |
<cfargument name="thelog" required="yes"> | |
<cfif left(arguments.thelog,7) eq "maillog"> | |
<cfset theline = right(arguments.thelog, ( len(arguments.thelog)-find(":", arguments.thelog) ) )> | |
<cfelse> | |
<cfset theline = arguments.thelog> | |
</cfif> | |
<cfif find(' independence ', theline) gt 0> | |
<cfset cleanedup_val = left(theline, find(' independence ', theline)-1)> | |
<cfelse> | |
<cfset cleanedup_val = left(theline, find(' new-independence ', theline)-1)> | |
</cfif> | |
<cfset thedate = parsedatetime(dateformat(now(), "yyyy") & " " & cleanedup_val)> | |
<cfif thedate gt now()> | |
<cfset thedate = dateadd("yyyy", -1, thedate)> | |
</cfif> | |
<cfreturn thedate> | |
</cffunction> | |
</cfoutput> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment