TNHSAesop/SearchEngineCrawlPsudeo.cs

## SearchEngineCrawlPsudeo.cs
            // start a loop that's always running
            while (1 == 1)
            {
                // retrieve the next crawl target from a database or other persistent data store
                string url = GetNextCrawlTarget();

                // check to see if this URL has already been indexed, if it has, check to see if it has changed since last crawl and needs to be re-crawled
                if (URLAlreadyIndexed(url) == false && URLUpdatedSinceLastCrawl(url) == false)
                {
                    // crawl the url if there are links present within the document, extract them from the document and put them in a list of new potential crawl targets
                    List<string> potentialCrawlTargetsList = CrawlURL(url);

                    // check to see if the potential crawl targets extracted from the document already exist.
                    //If they do, discard them, if they don't add them to the newCrawlTargetsList for addition to the data store's processing queue
                    List<string> newCrawlTargetsList = DoCrawlTargetsExist(potentialCrawlTargetsList);

                    // if the list of new crawl targets isn't empty, add the new crawl targets to the crawl queue
                    if (newCrawlTargetsList.Count > 0)
                    {
                        AddCrawlTargets(newCrawlTargetsList);
                    }

                    // now that new potential crawl targets have been extracted, we can send the URL over to the indexing queue
                    IndexURL(url);


                    // crawl process for the URL complete, continue to the next iteration of the loop
                    continue;

                }
                else
                {
                    // the crawl target was not valid so continue the loop and go to the next iteration
                    continue;
                }

            }


// visit us on the web at https://tortoiseandharesoftware.com/
	// start a loop that's always running
	while (1 == 1)
	{
	// retrieve the next crawl target from a database or other persistent data store
	string url = GetNextCrawlTarget();

	// check to see if this URL has already been indexed, if it has, check to see if it has changed since last crawl and needs to be re-crawled
	if (URLAlreadyIndexed(url) == false && URLUpdatedSinceLastCrawl(url) == false)
	{
	// crawl the url if there are links present within the document, extract them from the document and put them in a list of new potential crawl targets
	List<string> potentialCrawlTargetsList = CrawlURL(url);

	// check to see if the potential crawl targets extracted from the document already exist.
	//If they do, discard them, if they don't add them to the newCrawlTargetsList for addition to the data store's processing queue
	List<string> newCrawlTargetsList = DoCrawlTargetsExist(potentialCrawlTargetsList);

	// if the list of new crawl targets isn't empty, add the new crawl targets to the crawl queue
	if (newCrawlTargetsList.Count > 0)
	{
	AddCrawlTargets(newCrawlTargetsList);
	}

	// now that new potential crawl targets have been extracted, we can send the URL over to the indexing queue
	IndexURL(url);


	// crawl process for the URL complete, continue to the next iteration of the loop
	continue;

	}
	else
	{
	// the crawl target was not valid so continue the loop and go to the next iteration
	continue;
	}

	}


	// visit us on the web at https://tortoiseandharesoftware.com/