Skip to content

Instantly share code, notes, and snippets.

@UdaraAlwis
Last active November 24, 2019 10:37
Show Gist options
  • Save UdaraAlwis/419d88bc62e65f86866a452a6d8c9e73 to your computer and use it in GitHub Desktop.
Save UdaraAlwis/419d88bc62e65f86866a452a6d8c9e73 to your computer and use it in GitHub Desktop.
Scrape off the list of Field IDs from Google Forms.
// Imports you might need! ;)
//using HtmlAgilityPack;
//using System;
//using System.Collections.Generic;
//using System.Linq;
//using System.Threading.Tasks;
private static async Task<List<string>> ScrapeOffListOfFieldIdsFromGoogleFormsAsync(string yourGoogleFormsUrl)
{
HtmlWeb web = new HtmlWeb();
var htmlDoc = await web.LoadFromWebAsync(yourGoogleFormsUrl);
// Select the "input", "textarea" elements from the html content
var fields = new[] { "input", "textarea" }; // two types of fields
var htmlNodes = htmlDoc.DocumentNode.Descendants().
Where(x => fields.Contains(x.Name));
// Filter out the elements we need
htmlNodes = htmlNodes.Where(
x =>
// Get all that elements contains "entry." prefix in the name
x.GetAttributeValue("name", "").Contains("entry.") &&
// Ignored the "_sentinel" elements rendered for checkboxes fields
!x.GetAttributeValue("name", "").Contains("_sentinel"));
// remove any duplicates (possibly caused by Checkboxes Fields)
var groupedList = htmlNodes.GroupBy(x => x.OuterHtml);
var cleanedNodeList = new List<HtmlNode>();
foreach (var groupedItem in groupedList)
{
cleanedNodeList.Add(groupedItem.First());
}
// retrieve the Fields list
var fieldIdList = new List<string>();
foreach (var node in cleanedNodeList)
{
// grab the Field Id
var fieldId = node.GetAttributeValue("name", "");
fieldIdList.Add(fieldId);
Console.WriteLine(fieldId);
}
return fieldIdList;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment