Created
October 5, 2013 14:45
-
-
Save molekilla/6841788 to your computer and use it in GitHub Desktop.
Parser C# Para Ana.gob.pa (casi seudocodigo)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Text; | |
using HttpLightClient; | |
using log4net; | |
using System.Configuration; | |
using log4net.Config; | |
using System.Xml.Linq; | |
using Cobuys.WebRobot; | |
using Cobuys.DataWriters; | |
namespace Cobuys.WebRobots.AduanaPanama | |
{ | |
public class AduanaRobot : IWebRobot | |
{ | |
private List<string> LetterDictionary = new List<string>(); | |
private string Letters = "abcdefghijklmnoprqstuvxyz0123456789"; | |
private string Numbers = "0123456789"; | |
private static ILog Log = LogManager.GetLogger(typeof(AduanaRobot)); | |
const int ABOUTUS_PAGE = 1; | |
const int TRUSTEDPROFILE_PAGE = 2; | |
const int PRODUCTS_PAGE = 3; | |
string[] years; | |
string ENCODING = "UTF-8"; | |
HttpCommand command = null; | |
private IDataWriter dataWriter; | |
AduanaDatabase dataContext; | |
List<HttpSettings> steps = new List<HttpSettings>(); | |
List<string> SearchList = new List<string>(); | |
System.IFormatProvider SpanishDateFormat = new System.Globalization.CultureInfo("es-ES", true); | |
public IDataWriter DataWriter | |
{ | |
get | |
{ | |
return dataWriter; | |
} | |
set | |
{ | |
dataWriter = value; | |
} | |
} | |
public void LoadRobotStepsFromConfiguration() | |
{ | |
// create database | |
string DATABASE_OUTPUT = ConfigurationManager.AppSettings["AduanaPanama.DatabaseOutput"]; | |
dataContext = new AduanaDatabase(DATABASE_OUTPUT); | |
bool isServerDB = bool.Parse(ConfigurationManager.AppSettings["RobotIsServerDatabase"]); | |
if (isServerDB ) | |
{ | |
dataContext.Connection.Open(); | |
} else { | |
bool doDelete = true; | |
Console.WriteLine("Starting database..."); | |
if (!dataContext.DatabaseExists()) | |
{ | |
dataContext.CreateDatabase(); | |
Console.WriteLine("Database created succesfully."); | |
} | |
else | |
{ | |
Console.Write("Reset database ? (Y/N):"); | |
if (Console.ReadKey(true).KeyChar.ToString().ToLowerInvariant() == "n") | |
{ | |
Console.Write("n"); | |
doDelete = false; | |
} | |
else | |
{ | |
Console.Write("y"); | |
} | |
Console.WriteLine(); | |
if (doDelete) | |
{ | |
dataContext.DeleteDatabase(); | |
dataContext.CreateDatabase(); | |
} | |
} | |
} | |
foreach (char ch in Numbers) | |
{ | |
LetterDictionary.Add(ch.ToString()); | |
} | |
foreach (string letterOne in LetterDictionary) | |
{ | |
foreach (string letterTwo in LetterDictionary) | |
{ | |
foreach (string letterThree in LetterDictionary) | |
{ | |
SearchList.Add(letterOne + letterTwo + letterThree); | |
} | |
} | |
} | |
CreateHttpCommand(); | |
} | |
private void CreateHttpCommand() | |
{ | |
// start http client | |
HttpWebClient<HttpCommand> httpClient = new HttpWebClient<HttpCommand>(); | |
try | |
{ | |
int idleTime = 1; | |
int connectionLimit = Int32.Parse(ConfigurationManager.AppSettings["RobotConnectionLimit"]); | |
int maxServicePoints = Int32.Parse(ConfigurationManager.AppSettings["RobotMaxServicePoints"]); | |
httpClient.SetServicePointSettings(connectionLimit, idleTime * 60 * 1000, maxServicePoints); | |
} | |
catch | |
{ | |
httpClient.SetServicePointSettings(30, 2 * 60 * 1000, 15); | |
} | |
command = httpClient.CreateHttpCommand(); | |
try | |
{ | |
command.KeepAlive = bool.Parse(ConfigurationManager.AppSettings["RobotKeepAlive"]); | |
} | |
catch | |
{ | |
command.KeepAlive = false; | |
} | |
// load links | |
steps.Add( | |
new HttpSettings | |
{ | |
QueryMask = "http://www.ana.gob.pa/aduana/index.php?" + | |
//"calendario_desde={2}&calendario_hasta={3}&ruc=&importador=&arancel={0}&mercancia=&cantresxpag=100&pag=formprin&Accion_Consultar=Consultar&np={1}" | |
"ruc=&importador={0}&tipo_oper=I&puerto=&arancel=&mercancia=&cantresxpag=100&pag=formprin&Accion_Consultar=Consultar&np={1}" | |
} | |
); | |
command.RegisterSteps(steps); | |
} | |
public string Preparser(string data) | |
{ | |
return data; | |
} | |
private int GetQuantity(string quantity) | |
{ | |
StringBuilder temp = new StringBuilder(); | |
try | |
{ | |
foreach (char ch in quantity) | |
{ | |
if (Char.IsDigit(ch)) | |
{ | |
temp.Append(ch); | |
} | |
} | |
return Int32.Parse(temp.ToString()); | |
} | |
catch ( Exception ex ) | |
{ | |
Log.Debug("Quantity: " + quantity); | |
return 0; | |
} | |
} | |
private decimal GetDecimal(string currency) | |
{ | |
System.Diagnostics.Debug.WriteLine("decimal: " + currency); | |
StringBuilder temp = new StringBuilder(); | |
try | |
{ | |
foreach (char ch in currency) | |
{ | |
if (Char.IsDigit(ch)) | |
{ | |
temp.Append(ch); | |
} | |
if (ch == '.') | |
{ | |
temp.Append(ch); | |
} | |
} | |
return Decimal.Parse(temp.ToString().Trim('.')); | |
} | |
catch (Exception ex) | |
{ | |
Log.Debug("Decimal: " + currency); | |
return 0; | |
} | |
} | |
private bool HasDetail(AduanaDatabase context, Empresa empresa, Detail detail) | |
{ | |
bool hasDetail = (from d in context.Details | |
where | |
d.FraccionArancelaria == detail.FraccionArancelaria && | |
d.Total == detail.Total && | |
d.ImportDate == detail.ImportDate | |
select d).Count() > 0; | |
return hasDetail; | |
} | |
private void SaveImport(IEnumerable<XElement> datos) | |
{ | |
string DATABASE_OUTPUT = ConfigurationManager.AppSettings["AduanaPanama.DatabaseOutput"]; | |
AduanaDatabase context = new AduanaDatabase(DATABASE_OUTPUT); | |
var rows = from tr in datos.Descendants("TR") | |
where tr.Attribute("CLASS") != null && tr.Attribute("CLASS").Value.Length == 0 | |
select tr; | |
foreach (var row in rows) | |
{ | |
// key values for row | |
var cells = row.Descendants("TD"); | |
string dateData = cells.ElementAt(0).Value; | |
XElement rucData = cells.ElementAt(1); | |
XElement detailData = cells.ElementAt(2); | |
var detailItems = detailData.Descendants("TD"); | |
Detail detail = new Detail(); | |
int indexAt = 1; | |
detail.Puerto = detailItems.ElementAt(indexAt).Value; | |
indexAt += 2; | |
detail.FraccionArancelaria = detailItems.ElementAt(indexAt).Value; | |
indexAt += 2; | |
detail.Cantidad = detailItems.ElementAt(indexAt).Value; | |
indexAt += 2; | |
detail.PesoNeto = GetQuantity(detailItems.ElementAt(indexAt).Value); | |
indexAt += 2; | |
detail.PesoBruto = GetQuantity(detailItems.ElementAt(indexAt).Value); | |
indexAt += 2; | |
detail.ValorFOB = GetDecimal(detailItems.ElementAt(indexAt).Value); | |
indexAt += 2; | |
detail.ValorFlete = GetDecimal(detailItems.ElementAt(indexAt).Value); | |
indexAt += 2; | |
detail.ValorSeguro = GetDecimal(detailItems.ElementAt(indexAt).Value); | |
indexAt += 2; | |
detail.ValorCIF = GetDecimal(detailItems.ElementAt(indexAt).Value); | |
indexAt += 2; | |
detail.ImpuestoImportacion = GetDecimal(detailItems.ElementAt(indexAt).Value); | |
indexAt += 2; | |
detail.ImpuestoITBM = GetDecimal(detailItems.ElementAt(indexAt).Value); | |
indexAt += 2; | |
detail.ImpuestoPetroleo = GetDecimal(detailItems.ElementAt(indexAt).Value); | |
indexAt += 2; | |
detail.ImpuestoISC = GetDecimal(detailItems.ElementAt(indexAt).Value); | |
indexAt += 2; | |
detail.Total = GetDecimal(detailItems.ElementAt(indexAt).Value); | |
indexAt += 2; | |
detail.Procedencia = (detailData.FirstNode as XElement).Value.Substring(1); | |
detail.Descripcion = detailData.FirstNode.NextNode.NextNode.ToString().Trim(); | |
Empresa empresa = new Empresa | |
{ | |
ImportDate = DateTime.Parse(dateData, SpanishDateFormat), | |
LastUpdated = DateTime.Now, | |
RUC = rucData.Descendants("SPAN").ElementAt(0).Value.Replace("RUC: ", string.Empty), | |
}; | |
empresa.Name = rucData.Value.Replace("RUC: " + empresa.RUC, string.Empty).Replace("#","Ñ"); | |
bool hasEmpresa = (from emp in context.Empresas | |
where emp.RUC == empresa.RUC | |
select emp).Count() > 0; | |
if (!hasEmpresa) | |
{ | |
context.Empresas.InsertOnSubmit(empresa); | |
detail.ImportDate = empresa.ImportDate; | |
detail.ParentLinkID = empresa.RUC.ToString(); | |
if (!HasDetail(context, empresa, detail)) | |
{ | |
empresa.Details.Add(detail); | |
context.Details.InsertOnSubmit(detail); | |
Console.WriteLine("Detail: " + detail.Descripcion + " added"); | |
} | |
else | |
{ | |
Console.WriteLine("Detail: " + detail.Descripcion + " already added"); | |
} | |
Console.WriteLine(empresa.RUC + " " + empresa.Name + " added"); | |
} else | |
{ | |
Empresa existingEmpresa = (from emp in context.Empresas | |
where emp.RUC == empresa.RUC | |
select emp).FirstOrDefault(); | |
detail.ImportDate = empresa.ImportDate; | |
detail.ParentLinkID = empresa.RUC.ToString(); | |
if (!HasDetail(context, empresa, detail)) | |
{ | |
existingEmpresa.Details.Add(detail); | |
context.Details.InsertOnSubmit(detail); | |
Console.WriteLine("Detail: " + detail.Descripcion + " added"); | |
} | |
else | |
{ | |
Console.WriteLine("Detail: " + detail.Descripcion + " already added"); | |
} | |
Console.WriteLine(empresa.RUC + " " + empresa.Name + " already in database"); | |
} | |
context.SubmitChanges(); | |
int count = (from a in context.Empresas | |
select a).Count(); | |
Log.Info("Count Aduana - Empresas:" + count.ToString()); | |
int count1 = (from a in context.Details | |
select a).Count(); | |
Log.Info("Count Aduana - Details:" + count1.ToString()); | |
} | |
} | |
private void ResetHttpCommand() | |
{ | |
command = null; | |
CreateHttpCommand(); | |
} | |
public void MainAduana() | |
{ | |
DateTime fromDate = DateTime.Now; | |
try | |
{ | |
fromDate = DateTime.ParseExact(ConfigurationManager.AppSettings["AduanaPanama.StartFromDate"], "yyyy-MM-dd", null); | |
} | |
catch | |
{ | |
Console.WriteLine("AduanaPanama.StartFromDate configuration key is not in the format of 'yyyy-MM-dd'. Exiting application..."); | |
Log.Info("AduanaPanama.StartFromDate configuration key is not in the format of 'yyyy-MM-dd'. Exiting application..."); | |
return; | |
} | |
DateTime toDate = fromDate.AddDays(5); | |
// DateTime.Now.ToString("yyyy-MM-dd").; | |
do | |
{ | |
foreach (string letter in SearchList) | |
{ | |
bool hasCompanies = false; | |
int pageIndex = 0; | |
// Console.WriteLine("Reading data from " + fromDate.ToString() + " to " + toDate.ToString() + " for search '" + letter + "'"); | |
// Log.Info("Reading data from " + fromDate.ToString() + " to " + toDate.ToString() + " for search '" + letter + "'"); | |
string key = "DateSequence_" + letter + fromDate.ToFileTime() + toDate.ToFileTime(); | |
if (!dataContext.HasConsultaID(key)) | |
{ | |
int retries = 0; | |
do | |
{ | |
try | |
{ | |
// , fromDate.ToString("yyyy-MM-dd"), toDate.ToString("yyyy-MM-dd") | |
XDocument document = null; | |
document = command.GetDocumentForUrl(0, Preparser, letter, pageIndex.ToString()); | |
#region save import data | |
IEnumerable<XElement> table = null; | |
if (document != null) | |
{ | |
table = document.DescendantsFromAttributeWithValue("TABLE", "ID", "tablaDatos"); | |
if (table.Count() == 0 || table.Descendants("TR").Count() == 1) | |
{ | |
hasCompanies = false; | |
} | |
else | |
{ | |
SaveImport(table); | |
pageIndex++; | |
hasCompanies = true; | |
} | |
} | |
#endregion | |
} | |
catch | |
{ | |
// retry 5 times | |
retries++; | |
Console.WriteLine("Retry " + retries.ToString() + " ... "); | |
} | |
if (retries > 5) | |
{ | |
hasCompanies = false; | |
} | |
} | |
while (hasCompanies); | |
dataContext.SaveConsultaID(key); | |
} | |
else | |
{ | |
Console.WriteLine("Data from " + fromDate.ToString() + " to " + toDate.ToString() + " for search '" + letter + "' already in database."); | |
} | |
} | |
fromDate = toDate; | |
toDate = fromDate.AddDays(5); | |
} while (fromDate < DateTime.Now); | |
} | |
public void Start() | |
{ | |
XmlConfigurator.Configure(); | |
int retries = 0; | |
do | |
{ | |
try | |
{ | |
MainAduana(); | |
Console.WriteLine("Press ENTER to exit. Aduana downloaded completed"); | |
Console.ReadKey(); | |
break; | |
} | |
catch (System.Net.WebException webex) | |
{ | |
Log.Debug("Aduana - Ignored Error", webex); | |
// ignore | |
} | |
catch (Exception ex) | |
{ | |
Log.Error("Aduana", ex); | |
retries++; | |
} | |
} | |
while (retries < 25); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment