Skip to content

Instantly share code, notes, and snippets.

@fernando-almeida
Created November 18, 2022 16:01
Show Gist options
  • Save fernando-almeida/7a8ec71d39fdf48615297fa73091a8f3 to your computer and use it in GitHub Desktop.
Save fernando-almeida/7a8ec71d39fdf48615297fa73091a8f3 to your computer and use it in GitHub Desktop.
Extract Links From Google Document
using System.Collections.Generic;
using Google.Apis.Docs;
using Google.Apis.Docs.v1;
using Google.Apis.Docs.v1.Data;
using System.Data.Common;
using System.Xml.Linq;
using Newtonsoft.Json;
using Google.Apis.Auth.OAuth2;
using Google.Apis.Util.Store;
using Google.Apis.Requests;
IEnumerable<IDirectResponseSchema> GenerateLinksFromParagraph(Document document, Paragraph paragraph)
{
return paragraph.Elements.SelectMany(element => {
if (element.TextRun?.TextStyle?.Link is not null)
{
return new IDirectResponseSchema[] { element.TextRun.TextStyle.Link };
}
if (element.RichLink is not null)
{
return new IDirectResponseSchema[] { element.RichLink };
}
if (element.FootnoteReference is not null)
{
var footnote = document.Footnotes[element.FootnoteReference.FootnoteId];
return footnote.Content.SelectMany(structuralElement => GenerateLinksFromStructuralElement(document, structuralElement));
}
return Enumerable.Empty<IDirectResponseSchema>();
});
}
IEnumerable<IDirectResponseSchema> GenerateLinksFromTable(Document document, Table table) {
return table.TableRows.SelectMany(tableRow => tableRow.TableCells.SelectMany(tableCell => tableCell.Content.SelectMany(structuralElement => GenerateLinksFromStructuralElement(document, structuralElement))));
}
IEnumerable<IDirectResponseSchema> GenerateLinksFromStructuralElement(Document document, StructuralElement element) {
if (element.Paragraph is not null)
{
return GenerateLinksFromParagraph(document, element.Paragraph);
}
if (element.Table is not null)
{
return GenerateLinksFromTable(document, element.Table);
}
return Enumerable.Empty<IDirectResponseSchema>();
}
IEnumerable<IDirectResponseSchema> GenerateLinksFromDocument(Document document)
{
return document.Body.Content.SelectMany(structuralElement => GenerateLinksFromStructuralElement(document, structuralElement));
}
async Task TextShowDocumentLinks(CancellationToken cancellationToken = default)
{
var docId = Environment.GetEnvironmentVariable("GOOGLE_DOCUMENT_ID");
var clientSecretFilePath = Environment.GetEnvironmentVariable("GOOGLE_CLIENT_SECRET_FILE_PATH");
var user = Environment.GetEnvironmentVariable("GOOGLE_USER");
UserCredential credential;
using (var stream = new FileStream(clientSecretFilePath, FileMode.Open, FileAccess.Read))
{
credential = await GoogleWebAuthorizationBroker.AuthorizeAsync(
GoogleClientSecrets.FromStream(stream).Secrets,
new[] { DocsService.Scope.Documents },
user,
cancellationToken);
}
var initializer = new DocsService.Initializer
{
ApplicationName = "Links Replacer",
// ApiKey = apiKey,
HttpClientInitializer = credential
};
var docsService = new DocsService(initializer);
var doc = await docsService.Documents.Get(docId).ExecuteAsync();
foreach (var item in GenerateLinksFromDocument(doc))
{
if (item is RichLink richLink)
{
Console.WriteLine($"Title={richLink.RichLinkProperties.Title} Uri={richLink.RichLinkProperties.Uri}");
}
else if (item is Link link)
{
Console.WriteLine($"Uri={link.Url}");
}
}
}
await TextShowDocumentLinks();
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net7.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Google.Apis.Docs.v1" Version="1.57.0.2833" />
</ItemGroup>
</Project>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment