Skip to content

Instantly share code, notes, and snippets.

@jechlin
Last active April 21, 2022 07:32
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jechlin/6d3b4b8105fc4d2bf1ae6d9c22c64923 to your computer and use it in GitHub Desktop.
Save jechlin/6d3b4b8105fc4d2bf1ae6d9c22c64923 to your computer and use it in GitHub Desktop.
package com.onresolve.examples.xhtml
import com.atlassian.confluence.content.render.xhtml.*
import com.atlassian.confluence.content.render.xhtml.migration.UrlResourceIdentifier
import com.atlassian.confluence.content.render.xhtml.model.resource.DefaultEmbeddedImage
import com.atlassian.confluence.content.render.xhtml.model.resource.identifiers.AttachmentResourceIdentifier
import com.atlassian.confluence.content.render.xhtml.model.resource.identifiers.ResourceIdentifier
import com.atlassian.confluence.content.render.xhtml.storage.embed.StorageEmbeddedImageMarshaller
import com.atlassian.confluence.content.render.xhtml.storage.embed.StorageEmbeddedImageUnmarshaller
import com.atlassian.confluence.content.render.xhtml.storage.resource.identifiers.DelegatingResourceIdentifierMarshaller
import com.atlassian.confluence.core.Modification
import com.atlassian.confluence.pages.Attachment
import com.atlassian.confluence.pages.AttachmentManager
import com.atlassian.confluence.pages.Page
import com.atlassian.confluence.pages.PageManager
import com.atlassian.confluence.renderer.PageContext
import com.atlassian.confluence.xhtml.api.EmbeddedImage
import com.atlassian.confluence.xhtml.api.XhtmlContent
import com.atlassian.gzipfilter.org.apache.commons.lang.StringUtils
import com.atlassian.sal.api.component.ComponentLocator
import com.atlassian.spring.container.ContainerManager
import com.onresolve.scriptrunner.canned.confluence.utils.CQLSearchUtils
import com.onresolve.scriptrunner.canned.confluence.xhtml.Rewriter
import com.onresolve.scriptrunner.canned.confluence.xhtml.XhtmlRewriter
import groovy.util.logging.Log4j
import groovyx.net.http.ContentType
import groovyx.net.http.HTTPBuilder
import groovyx.net.http.URIBuilder
import org.apache.commons.io.IOUtils
import javax.xml.stream.XMLEventReader
import javax.xml.stream.XMLEventWriter
import javax.xml.stream.XMLStreamException
@Log4j
class ImageRewriter extends Script {
/* CONFIG */
/**
* Will only download images if the URLs start with the following string
*/
public String urlPrefix = "https://c.tadst.com"
/**
* CQL queries defining which pages to update
*/
public String cqlQuery = "type = Page and space = DEVON"
/* END CONFIG */
def pageManager = ComponentLocator.getComponent(PageManager)
def xhtmlContent = ComponentLocator.getComponent(XhtmlContent)
def imageUnmarshaller = ComponentLocator.getComponent(StorageEmbeddedImageUnmarshaller)
def xmlEventReaderFactory = ContainerManager.getComponent("xmlEventReaderFactory") as XmlEventReaderFactory
def xmlStreamWriterTemplate = ComponentLocator.getComponent(XmlStreamWriterTemplate)
def marshallingRegistry = ComponentLocator.getComponent(MarshallingRegistry)
def resourceIdentifierMarshaller = ComponentLocator.getComponent(DelegatingResourceIdentifierMarshaller)
def rewritingImageMarshaller = new DownloadingImageMarshaller(xmlStreamWriterTemplate, resourceIdentifierMarshaller, marshallingRegistry, this)
def unmarshalMarshalFragmentTransformer = new UnmarshalMarshalFragmentTransformer(imageUnmarshaller, rewritingImageMarshaller)
String rewrite(String content, ConversionContext context) {
def rewriter = new Rewriter() {
@Override
boolean shouldRewrite(XMLEventReader reader) throws XMLStreamException {
def event = reader.peek()
return event.isStartElement() && event.asStartElement().getName() == StorageEmbeddedImageUnmarshaller.IMAGE_ELEMENT
}
@Override
void rewrite(XMLEventReader reader, XMLEventWriter writer) throws XMLStreamException {
def streamable = unmarshalMarshalFragmentTransformer.transform(reader, unmarshalMarshalFragmentTransformer, context)
def stringWriter = new StringWriter()
streamable.writeTo(stringWriter)
def stringWriterString = stringWriter.toString()
def fragmentEventReader = xmlEventReaderFactory.createStorageXmlEventReader(new StringReader(stringWriterString))
writer.add(fragmentEventReader)
}
}
XhtmlRewriter.applyRewriter(content, rewriter)
}
void rewriteAndUpdate(Page page) {
rewritingImageMarshaller.attachmentsDownloaded = false
def context = new DefaultConversionContext(new PageContext(page))
def newContent = rewrite(page.bodyAsString, context)
if (rewritingImageMarshaller.attachmentsDownloaded) {
log.debug("rewriting page: ${page}")
pageManager.saveNewVersion(page, new Modification<Page>() {
@Override
void modify(Page content) {
content.setBodyAsString(newContent)
content.setVersionComment("Images migrated to confluence")
}
})
}
}
void updateMultiple(String cql) {
def hits = CQLSearchUtils.searchForContent(cql)
hits.each { hit ->
rewriteAndUpdate(hit as Page)
}
}
@Override
Object run() {
updateMultiple(cqlQuery)
"See logs for details"
}
}
@Log4j
class DownloadingImageMarshaller extends StorageEmbeddedImageMarshaller {
private final ImageRewriter rewriter
public boolean attachmentsDownloaded = false
DownloadingImageMarshaller(XmlStreamWriterTemplate xmlStreamWriterTemplate, Marshaller<ResourceIdentifier> resourceIdentifierMarshaller, MarshallingRegistry registry, ImageRewriter rewriter) {
super(xmlStreamWriterTemplate, resourceIdentifierMarshaller, registry)
this.rewriter = rewriter
}
@Override
Streamable marshal(EmbeddedImage embeddedImage, ConversionContext conversionContext) throws XhtmlException {
def ri = embeddedImage.getResourceIdentifier()
if (ri instanceof UrlResourceIdentifier && ri.url.startsWith(rewriter.urlPrefix)) {
def url = ri.getUrl()
def httpBuilder = new HTTPBuilder(rewriter.urlPrefix)
httpBuilder.auth.basic("username", "password") // todo: set user name and password for basic auth if required
def attachmentManager = ComponentLocator.getComponent(AttachmentManager)
def filename = StringUtils.substringAfterLast(new URIBuilder(url).getPath(), "/")
if (! filename) {
log.warn("No image found for ${conversionContext.pageContext.entity.idAsString}")
return super.marshal(embeddedImage, conversionContext)
}
def page = conversionContext.getEntity()
assert page instanceof Page
def attachments = attachmentManager.getLatestVersionsOfAttachments(page)
if (filename in attachments*.fileName) {
log.debug("Already have this attachment")
return super.marshal(embeddedImage, conversionContext)
}
try {
httpBuilder.get(uri: url, contentType: ContentType.BINARY) { resp, InputStream inputStream ->
// should write to a file, might get OoM here
def baos = new ByteArrayOutputStream()
IOUtils.copy(inputStream, baos)
def attachment = new Attachment(filename, resp.headers.'Content-Type' as String, baos.size(), "", true)
attachment.setContainer(page)
attachmentManager.saveAttachment(attachment, null, new ByteArrayInputStream(baos.toByteArray()))
}
}
catch (any) {
log.error ("Failed to download ${url} on page: $page : ${any.message}")
return super.marshal(embeddedImage, conversionContext)
}
def newImage = new DefaultEmbeddedImage(new AttachmentResourceIdentifier(filename))
newImage.with {
width = embeddedImage.width
height = embeddedImage.height
source = embeddedImage.source
alignment = embeddedImage.alignment
alternativeText = embeddedImage.alternativeText
hspace = embeddedImage.hspace
vspace = embeddedImage.vspace
thumbnail = embeddedImage.thumbnail
}
attachmentsDownloaded = true
return super.marshal(newImage, conversionContext)
}
return super.marshal(embeddedImage, conversionContext)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment