Skip to content

Instantly share code, notes, and snippets.

@molekilla
Created October 5, 2013 14:45
Show Gist options
  • Save molekilla/6841788 to your computer and use it in GitHub Desktop.
Save molekilla/6841788 to your computer and use it in GitHub Desktop.
Parser C# Para Ana.gob.pa (casi seudocodigo)
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using HttpLightClient;
using log4net;
using System.Configuration;
using log4net.Config;
using System.Xml.Linq;
using Cobuys.WebRobot;
using Cobuys.DataWriters;
namespace Cobuys.WebRobots.AduanaPanama
{
public class AduanaRobot : IWebRobot
{
private List<string> LetterDictionary = new List<string>();
private string Letters = "abcdefghijklmnoprqstuvxyz0123456789";
private string Numbers = "0123456789";
private static ILog Log = LogManager.GetLogger(typeof(AduanaRobot));
const int ABOUTUS_PAGE = 1;
const int TRUSTEDPROFILE_PAGE = 2;
const int PRODUCTS_PAGE = 3;
string[] years;
string ENCODING = "UTF-8";
HttpCommand command = null;
private IDataWriter dataWriter;
AduanaDatabase dataContext;
List<HttpSettings> steps = new List<HttpSettings>();
List<string> SearchList = new List<string>();
System.IFormatProvider SpanishDateFormat = new System.Globalization.CultureInfo("es-ES", true);
public IDataWriter DataWriter
{
get
{
return dataWriter;
}
set
{
dataWriter = value;
}
}
public void LoadRobotStepsFromConfiguration()
{
// create database
string DATABASE_OUTPUT = ConfigurationManager.AppSettings["AduanaPanama.DatabaseOutput"];
dataContext = new AduanaDatabase(DATABASE_OUTPUT);
bool isServerDB = bool.Parse(ConfigurationManager.AppSettings["RobotIsServerDatabase"]);
if (isServerDB )
{
dataContext.Connection.Open();
} else {
bool doDelete = true;
Console.WriteLine("Starting database...");
if (!dataContext.DatabaseExists())
{
dataContext.CreateDatabase();
Console.WriteLine("Database created succesfully.");
}
else
{
Console.Write("Reset database ? (Y/N):");
if (Console.ReadKey(true).KeyChar.ToString().ToLowerInvariant() == "n")
{
Console.Write("n");
doDelete = false;
}
else
{
Console.Write("y");
}
Console.WriteLine();
if (doDelete)
{
dataContext.DeleteDatabase();
dataContext.CreateDatabase();
}
}
}
foreach (char ch in Numbers)
{
LetterDictionary.Add(ch.ToString());
}
foreach (string letterOne in LetterDictionary)
{
foreach (string letterTwo in LetterDictionary)
{
foreach (string letterThree in LetterDictionary)
{
SearchList.Add(letterOne + letterTwo + letterThree);
}
}
}
CreateHttpCommand();
}
private void CreateHttpCommand()
{
// start http client
HttpWebClient<HttpCommand> httpClient = new HttpWebClient<HttpCommand>();
try
{
int idleTime = 1;
int connectionLimit = Int32.Parse(ConfigurationManager.AppSettings["RobotConnectionLimit"]);
int maxServicePoints = Int32.Parse(ConfigurationManager.AppSettings["RobotMaxServicePoints"]);
httpClient.SetServicePointSettings(connectionLimit, idleTime * 60 * 1000, maxServicePoints);
}
catch
{
httpClient.SetServicePointSettings(30, 2 * 60 * 1000, 15);
}
command = httpClient.CreateHttpCommand();
try
{
command.KeepAlive = bool.Parse(ConfigurationManager.AppSettings["RobotKeepAlive"]);
}
catch
{
command.KeepAlive = false;
}
// load links
steps.Add(
new HttpSettings
{
QueryMask = "http://www.ana.gob.pa/aduana/index.php?" +
//"calendario_desde={2}&calendario_hasta={3}&ruc=&importador=&arancel={0}&mercancia=&cantresxpag=100&pag=formprin&Accion_Consultar=Consultar&np={1}"
"ruc=&importador={0}&tipo_oper=I&puerto=&arancel=&mercancia=&cantresxpag=100&pag=formprin&Accion_Consultar=Consultar&np={1}"
}
);
command.RegisterSteps(steps);
}
public string Preparser(string data)
{
return data;
}
private int GetQuantity(string quantity)
{
StringBuilder temp = new StringBuilder();
try
{
foreach (char ch in quantity)
{
if (Char.IsDigit(ch))
{
temp.Append(ch);
}
}
return Int32.Parse(temp.ToString());
}
catch ( Exception ex )
{
Log.Debug("Quantity: " + quantity);
return 0;
}
}
private decimal GetDecimal(string currency)
{
System.Diagnostics.Debug.WriteLine("decimal: " + currency);
StringBuilder temp = new StringBuilder();
try
{
foreach (char ch in currency)
{
if (Char.IsDigit(ch))
{
temp.Append(ch);
}
if (ch == '.')
{
temp.Append(ch);
}
}
return Decimal.Parse(temp.ToString().Trim('.'));
}
catch (Exception ex)
{
Log.Debug("Decimal: " + currency);
return 0;
}
}
private bool HasDetail(AduanaDatabase context, Empresa empresa, Detail detail)
{
bool hasDetail = (from d in context.Details
where
d.FraccionArancelaria == detail.FraccionArancelaria &&
d.Total == detail.Total &&
d.ImportDate == detail.ImportDate
select d).Count() > 0;
return hasDetail;
}
private void SaveImport(IEnumerable<XElement> datos)
{
string DATABASE_OUTPUT = ConfigurationManager.AppSettings["AduanaPanama.DatabaseOutput"];
AduanaDatabase context = new AduanaDatabase(DATABASE_OUTPUT);
var rows = from tr in datos.Descendants("TR")
where tr.Attribute("CLASS") != null && tr.Attribute("CLASS").Value.Length == 0
select tr;
foreach (var row in rows)
{
// key values for row
var cells = row.Descendants("TD");
string dateData = cells.ElementAt(0).Value;
XElement rucData = cells.ElementAt(1);
XElement detailData = cells.ElementAt(2);
var detailItems = detailData.Descendants("TD");
Detail detail = new Detail();
int indexAt = 1;
detail.Puerto = detailItems.ElementAt(indexAt).Value;
indexAt += 2;
detail.FraccionArancelaria = detailItems.ElementAt(indexAt).Value;
indexAt += 2;
detail.Cantidad = detailItems.ElementAt(indexAt).Value;
indexAt += 2;
detail.PesoNeto = GetQuantity(detailItems.ElementAt(indexAt).Value);
indexAt += 2;
detail.PesoBruto = GetQuantity(detailItems.ElementAt(indexAt).Value);
indexAt += 2;
detail.ValorFOB = GetDecimal(detailItems.ElementAt(indexAt).Value);
indexAt += 2;
detail.ValorFlete = GetDecimal(detailItems.ElementAt(indexAt).Value);
indexAt += 2;
detail.ValorSeguro = GetDecimal(detailItems.ElementAt(indexAt).Value);
indexAt += 2;
detail.ValorCIF = GetDecimal(detailItems.ElementAt(indexAt).Value);
indexAt += 2;
detail.ImpuestoImportacion = GetDecimal(detailItems.ElementAt(indexAt).Value);
indexAt += 2;
detail.ImpuestoITBM = GetDecimal(detailItems.ElementAt(indexAt).Value);
indexAt += 2;
detail.ImpuestoPetroleo = GetDecimal(detailItems.ElementAt(indexAt).Value);
indexAt += 2;
detail.ImpuestoISC = GetDecimal(detailItems.ElementAt(indexAt).Value);
indexAt += 2;
detail.Total = GetDecimal(detailItems.ElementAt(indexAt).Value);
indexAt += 2;
detail.Procedencia = (detailData.FirstNode as XElement).Value.Substring(1);
detail.Descripcion = detailData.FirstNode.NextNode.NextNode.ToString().Trim();
Empresa empresa = new Empresa
{
ImportDate = DateTime.Parse(dateData, SpanishDateFormat),
LastUpdated = DateTime.Now,
RUC = rucData.Descendants("SPAN").ElementAt(0).Value.Replace("RUC: ", string.Empty),
};
empresa.Name = rucData.Value.Replace("RUC: " + empresa.RUC, string.Empty).Replace("#","Ñ");
bool hasEmpresa = (from emp in context.Empresas
where emp.RUC == empresa.RUC
select emp).Count() > 0;
if (!hasEmpresa)
{
context.Empresas.InsertOnSubmit(empresa);
detail.ImportDate = empresa.ImportDate;
detail.ParentLinkID = empresa.RUC.ToString();
if (!HasDetail(context, empresa, detail))
{
empresa.Details.Add(detail);
context.Details.InsertOnSubmit(detail);
Console.WriteLine("Detail: " + detail.Descripcion + " added");
}
else
{
Console.WriteLine("Detail: " + detail.Descripcion + " already added");
}
Console.WriteLine(empresa.RUC + " " + empresa.Name + " added");
} else
{
Empresa existingEmpresa = (from emp in context.Empresas
where emp.RUC == empresa.RUC
select emp).FirstOrDefault();
detail.ImportDate = empresa.ImportDate;
detail.ParentLinkID = empresa.RUC.ToString();
if (!HasDetail(context, empresa, detail))
{
existingEmpresa.Details.Add(detail);
context.Details.InsertOnSubmit(detail);
Console.WriteLine("Detail: " + detail.Descripcion + " added");
}
else
{
Console.WriteLine("Detail: " + detail.Descripcion + " already added");
}
Console.WriteLine(empresa.RUC + " " + empresa.Name + " already in database");
}
context.SubmitChanges();
int count = (from a in context.Empresas
select a).Count();
Log.Info("Count Aduana - Empresas:" + count.ToString());
int count1 = (from a in context.Details
select a).Count();
Log.Info("Count Aduana - Details:" + count1.ToString());
}
}
private void ResetHttpCommand()
{
command = null;
CreateHttpCommand();
}
public void MainAduana()
{
DateTime fromDate = DateTime.Now;
try
{
fromDate = DateTime.ParseExact(ConfigurationManager.AppSettings["AduanaPanama.StartFromDate"], "yyyy-MM-dd", null);
}
catch
{
Console.WriteLine("AduanaPanama.StartFromDate configuration key is not in the format of 'yyyy-MM-dd'. Exiting application...");
Log.Info("AduanaPanama.StartFromDate configuration key is not in the format of 'yyyy-MM-dd'. Exiting application...");
return;
}
DateTime toDate = fromDate.AddDays(5);
// DateTime.Now.ToString("yyyy-MM-dd").;
do
{
foreach (string letter in SearchList)
{
bool hasCompanies = false;
int pageIndex = 0;
// Console.WriteLine("Reading data from " + fromDate.ToString() + " to " + toDate.ToString() + " for search '" + letter + "'");
// Log.Info("Reading data from " + fromDate.ToString() + " to " + toDate.ToString() + " for search '" + letter + "'");
string key = "DateSequence_" + letter + fromDate.ToFileTime() + toDate.ToFileTime();
if (!dataContext.HasConsultaID(key))
{
int retries = 0;
do
{
try
{
// , fromDate.ToString("yyyy-MM-dd"), toDate.ToString("yyyy-MM-dd")
XDocument document = null;
document = command.GetDocumentForUrl(0, Preparser, letter, pageIndex.ToString());
#region save import data
IEnumerable<XElement> table = null;
if (document != null)
{
table = document.DescendantsFromAttributeWithValue("TABLE", "ID", "tablaDatos");
if (table.Count() == 0 || table.Descendants("TR").Count() == 1)
{
hasCompanies = false;
}
else
{
SaveImport(table);
pageIndex++;
hasCompanies = true;
}
}
#endregion
}
catch
{
// retry 5 times
retries++;
Console.WriteLine("Retry " + retries.ToString() + " ... ");
}
if (retries > 5)
{
hasCompanies = false;
}
}
while (hasCompanies);
dataContext.SaveConsultaID(key);
}
else
{
Console.WriteLine("Data from " + fromDate.ToString() + " to " + toDate.ToString() + " for search '" + letter + "' already in database.");
}
}
fromDate = toDate;
toDate = fromDate.AddDays(5);
} while (fromDate < DateTime.Now);
}
public void Start()
{
XmlConfigurator.Configure();
int retries = 0;
do
{
try
{
MainAduana();
Console.WriteLine("Press ENTER to exit. Aduana downloaded completed");
Console.ReadKey();
break;
}
catch (System.Net.WebException webex)
{
Log.Debug("Aduana - Ignored Error", webex);
// ignore
}
catch (Exception ex)
{
Log.Error("Aduana", ex);
retries++;
}
}
while (retries < 25);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment