Skip to content

Instantly share code, notes, and snippets.

@yak1ex
Last active June 5, 2019 11:52
Show Gist options
  • Save yak1ex/01e1c868508e95e8b4634f9b738cfcd7 to your computer and use it in GitHub Desktop.
Save yak1ex/01e1c868508e95e8b4634f9b738cfcd7 to your computer and use it in GitHub Desktop.
Issue report for buildbot

env

  • Python version: 3.7.2
  • Buildbot version: 2.3.1
  • Twisted version: 19.2.0

SVNPoller

svn info may return %-encoding characters. Therefore, it is a safer way to call urllib.parse.unquote() for root in determine_prefix. In order to invoke svn, calling urllib.parse.quote() like SVN BuildStep may be preferable.

BuildStep

master.cfg

factory.addStep(steps.SVN(repourl='file://somehost/自動/', mode='incremental')) # non latin-1
factory.addStep(steps.SVN(repourl='file://somehost/%E8%87%AA%E5%8B%95/', mode='incremental')) # non latin-1 %-encoding

URL for Subversion may have UTF-8 characters, so I think it is not appropriate to specify "latin-1" explicitly in quote(). Even in the case of using %-encoding, urlunquote() will decode %-encoding string, which may become utf-8, then quote() will parse it as explicitly "latin-1", which leads a failure. I'm not sure an exact context of calling _sourcedirIsUpdatable(). I guess it is triggered in subsequent invocations after the first checking out. So you may need to run a builder more than once to reproduce the behavior.

File "c:\bb\master\sandbox\lib\site-packages\buildbot\steps\source\svn.py", line 453, in svnUriCanonicalize path = quote(urlunquote(path)) File "c:\bb\master\sandbox\lib\site-packages\buildbot\steps\source\svn.py", line 425, in quote return urlquote(uri, "!$&'()*+,-./:=@_~", encoding="latin-1")

_checkout() uses raw self.repourl. This is different from update behavior.

  • Non latin-1 repourl: fails the first commit and the subsequent updates
  • Non latin-1 %-encoding repourl: succeeds the first commit and fails the subsequent updates

traceback

Traceback (most recent call last):
  File "c:\bb\master\sandbox\lib\site-packages\twisted\internet\defer.py", line 654, in _runCallbacks
    current.result = callback(current.result, *args, **kw)
  File "c:\bb\master\sandbox\lib\site-packages\twisted\internet\defer.py", line 1475, in gotResult
    _inlineCallbacks(r, g, status)
  File "c:\bb\master\sandbox\lib\site-packages\twisted\internet\defer.py", line 1416, in _inlineCallbacks
    result = result.throwExceptionIntoGenerator(g)
  File "c:\bb\master\sandbox\lib\site-packages\twisted\python\failure.py", line 512, in throwExceptionIntoGenerator
    return g.throw(self.type, self.value, self.tb)
--- <exception caught here> ---
  File "c:\bb\master\sandbox\lib\site-packages\buildbot\process\buildstep.py", line 566, in startStep
    self.results = yield self.run()
  File "c:\bb\master\sandbox\lib\site-packages\twisted\internet\defer.py", line 1418, in _inlineCallbacks
    result = g.send(result)
  File "c:\bb\master\sandbox\lib\site-packages\buildbot\process\buildstep.py", line 696, in run
    results = yield self._start_deferred
  File "c:\bb\master\sandbox\lib\site-packages\twisted\internet\defer.py", line 1416, in _inlineCallbacks
    result = result.throwExceptionIntoGenerator(g)
  File "c:\bb\master\sandbox\lib\site-packages\twisted\python\failure.py", line 512, in throwExceptionIntoGenerator
    return g.throw(self.type, self.value, self.tb)
  File "c:\bb\master\sandbox\lib\site-packages\buildbot\steps\source\svn.py", line 121, in mode_full
    updatable = yield self._sourcedirIsUpdatable()
  File "c:\bb\master\sandbox\lib\site-packages\twisted\internet\defer.py", line 1418, in _inlineCallbacks
    result = g.send(result)
  File "c:\bb\master\sandbox\lib\site-packages\buildbot\steps\source\svn.py", line 286, in _sourcedirIsUpdatable
    return extractedurl == self.svnUriCanonicalize(self.repourl)
  File "c:\bb\master\sandbox\lib\site-packages\buildbot\steps\source\svn.py", line 453, in svnUriCanonicalize
    path = quote(urlunquote(path))
  File "c:\bb\master\sandbox\lib\site-packages\buildbot\steps\source\svn.py", line 425, in quote
    return urlquote(uri, "!$&'()*+,-./:=@_~", encoding="latin-1")
  File "C:\usr\local\Anaconda3\lib\urllib\parse.py", line 801, in quote
    string = string.encode(encoding, errors)
builtins.UnicodeEncodeError: 'latin-1' codec can't encode characters in position 1-9: ordinal not in range(256)
# This file is part of Buildbot. Buildbot is free software: you can
# redistribute it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, version 2.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Copyright Buildbot Team Members
import re
import xml.dom.minidom
import xml.parsers.expat
from urllib.parse import quote as urlquote
from urllib.parse import unquote as urlunquote
from urllib.parse import urlparse
from urllib.parse import urlunparse
from twisted.internet import defer
from twisted.internet import reactor
from twisted.python import log
from buildbot.config import ConfigErrors
from buildbot.process import buildstep
from buildbot.process import remotecommand
from buildbot.steps.source.base import Source
class SVN(Source):
"""I perform Subversion checkout/update operations."""
name = 'svn'
renderables = ['repourl', 'password']
possible_methods = ('clean', 'fresh', 'clobber', 'copy', 'export', None)
def __init__(self, repourl=None, mode='incremental',
method=None, username=None,
password=None, extra_args=None, keep_on_purge=None,
depth=None, preferLastChangedRev=False, **kwargs):
self.repourl = repourl
self.username = username
self.password = password
self.extra_args = extra_args
self.keep_on_purge = keep_on_purge or []
self.depth = depth
self.method = method
self.mode = mode
self.preferLastChangedRev = preferLastChangedRev
super().__init__(**kwargs)
errors = []
if not self._hasAttrGroupMember('mode', self.mode):
errors.append("mode %s is not one of %s" %
(self.mode, self._listAttrGroupMembers('mode')))
if self.method not in self.possible_methods:
errors.append("method %s is not one of %s" %
(self.method, self.possible_methods))
if repourl is None:
errors.append("you must provide repourl")
if errors:
raise ConfigErrors(errors)
def startVC(self, branch, revision, patch):
self.revision = revision
self.method = self._getMethod()
self.stdio_log = self.addLogForRemoteCommands("stdio")
# if the version is new enough, and the password is set, then obfuscate
# it
if self.password is not None:
if not self.workerVersionIsOlderThan('shell', '2.16'):
self.password = ('obfuscated', self.password, 'XXXXXX')
else:
log.msg("Worker does not understand obfuscation; "
"svn password will be logged")
d = self.checkSvn()
@d.addCallback
def checkInstall(svnInstalled):
if not svnInstalled:
raise buildstep.BuildStepFailed(
"SVN is not installed on worker")
return 0
d.addCallback(lambda _: self.sourcedirIsPatched())
@d.addCallback
def checkPatched(patched):
if patched:
return self.purge(False)
return 0
d.addCallback(self._getAttrGroupMember('mode', self.mode))
if patch:
d.addCallback(self.patch, patch)
d.addCallback(self.parseGotRevision)
d.addCallback(self.finish)
d.addErrback(self.failed)
return d
@defer.inlineCallbacks
def mode_full(self, _):
if self.method == 'clobber':
yield self.clobber()
return
elif self.method in ['copy', 'export']:
yield self.copy()
return
updatable = yield self._sourcedirIsUpdatable()
if not updatable:
# blow away the old (un-updatable) directory and checkout
yield self.clobber()
elif self.method == 'clean':
yield self.clean()
elif self.method == 'fresh':
yield self.fresh()
@defer.inlineCallbacks
def mode_incremental(self, _):
updatable = yield self._sourcedirIsUpdatable()
if not updatable:
# blow away the old (un-updatable) directory and checkout
yield self.clobber()
else:
# otherwise, do an update
command = ['update']
if self.revision:
command.extend(['--revision', str(self.revision)])
yield self._dovccmd(command)
def clobber(self):
d = self.runRmdir(self.workdir, timeout=self.timeout)
d.addCallback(lambda _: self._checkout())
return d
def fresh(self):
d = self.purge(True)
cmd = ['update']
if self.revision:
cmd.extend(['--revision', str(self.revision)])
d.addCallback(lambda _: self._dovccmd(cmd))
return d
def clean(self):
d = self.purge(False)
cmd = ['update']
if self.revision:
cmd.extend(['--revision', str(self.revision)])
d.addCallback(lambda _: self._dovccmd(cmd))
return d
@defer.inlineCallbacks
def copy(self):
yield self.runRmdir(self.workdir, timeout=self.timeout)
checkout_dir = 'source'
if self.codebase:
checkout_dir = self.build.path_module.join(
checkout_dir, self.codebase)
# temporarily set workdir = checkout_dir and do an incremental checkout
try:
old_workdir = self.workdir
self.workdir = checkout_dir
yield self.mode_incremental(None)
finally:
self.workdir = old_workdir
self.workdir = old_workdir
# if we're copying, copy; otherwise, export from source to build
if self.method == 'copy':
cmd = remotecommand.RemoteCommand('cpdir',
{'fromdir': checkout_dir, 'todir': self.workdir,
'logEnviron': self.logEnviron})
else:
export_cmd = ['svn', 'export']
if self.revision:
export_cmd.extend(["--revision", str(self.revision)])
if self.username:
export_cmd.extend(['--username', self.username])
if self.password is not None:
export_cmd.extend(['--password', self.password])
if self.extra_args:
export_cmd.extend(self.extra_args)
export_cmd.extend([checkout_dir, self.workdir])
cmd = remotecommand.RemoteShellCommand('', export_cmd,
env=self.env, logEnviron=self.logEnviron,
timeout=self.timeout)
cmd.useLog(self.stdio_log, False)
yield self.runCommand(cmd)
if cmd.didFail():
raise buildstep.BuildStepFailed()
def finish(self, res):
d = defer.succeed(res)
@d.addCallback
def _gotResults(results):
self.setStatus(self.cmd, results)
return results
d.addCallback(self.finished)
return d
def _dovccmd(self, command, collectStdout=False, collectStderr=False, abandonOnFailure=True):
assert command, "No command specified"
command.extend(['--non-interactive', '--no-auth-cache'])
if self.username:
command.extend(['--username', self.username])
if self.password is not None:
command.extend(['--password', self.password])
if self.depth:
command.extend(['--depth', self.depth])
if self.extra_args:
command.extend(self.extra_args)
cmd = remotecommand.RemoteShellCommand(self.workdir, ['svn'] + command,
env=self.env,
logEnviron=self.logEnviron,
timeout=self.timeout,
collectStdout=collectStdout,
collectStderr=collectStderr)
cmd.useLog(self.stdio_log, False)
d = self.runCommand(cmd)
@d.addCallback
def evaluateCommand(_):
if cmd.didFail() and abandonOnFailure:
log.msg("Source step failed while running command %s" % cmd)
raise buildstep.BuildStepFailed()
if collectStdout and collectStderr:
return (cmd.stdout, cmd.stderr)
elif collectStdout:
return cmd.stdout
elif collectStderr:
return cmd.stderr
return cmd.rc
return d
def _getMethod(self):
if self.method is not None and self.mode != 'incremental':
return self.method
elif self.mode == 'incremental':
return None
elif self.method is None and self.mode == 'full':
return 'fresh'
@defer.inlineCallbacks
def _sourcedirIsUpdatable(self):
# first, perform a stat to ensure that this is really an svn directory
res = yield self.pathExists(self.build.path_module.join(self.workdir, '.svn'))
if not res:
return False
# then run 'svn info --xml' to check that the URL matches our repourl
stdout, stderr = yield self._dovccmd(['info', '--xml'], collectStdout=True,
collectStderr=True, abandonOnFailure=False)
# svn: E155037: Previous operation has not finished; run 'cleanup' if
# it was interrupted
if 'E155037:' in stderr:
return False
try:
stdout_xml = xml.dom.minidom.parseString(stdout)
extractedurl = stdout_xml.getElementsByTagName(
'url')[0].firstChild.nodeValue
except xml.parsers.expat.ExpatError:
msg = "Corrupted xml, aborting step"
self.stdio_log.addHeader(msg)
raise buildstep.BuildStepFailed()
return extractedurl == self.svnUriCanonicalize(self.repourl)
@defer.inlineCallbacks
def parseGotRevision(self, _):
# if this was a full/export, then we need to check svnversion in the
# *source* directory, not the build directory
svnversion_dir = self.workdir
if self.mode == 'full' and self.method == 'export':
svnversion_dir = 'source'
cmd = remotecommand.RemoteShellCommand(svnversion_dir, ['svn', 'info', '--xml'],
env=self.env,
logEnviron=self.logEnviron,
timeout=self.timeout,
collectStdout=True)
cmd.useLog(self.stdio_log, False)
yield self.runCommand(cmd)
stdout = cmd.stdout
try:
stdout_xml = xml.dom.minidom.parseString(stdout)
except xml.parsers.expat.ExpatError:
msg = "Corrupted xml, aborting step"
self.stdio_log.addHeader(msg)
raise buildstep.BuildStepFailed()
revision = None
if self.preferLastChangedRev:
try:
revision = stdout_xml.getElementsByTagName(
'commit')[0].attributes['revision'].value
except (KeyError, IndexError):
msg = ("SVN.parseGotRevision unable to detect Last Changed Rev in"
" output of svn info")
log.msg(msg)
# fall through and try to get 'Revision' instead
if revision is None:
try:
revision = stdout_xml.getElementsByTagName(
'entry')[0].attributes['revision'].value
except (KeyError, IndexError):
msg = ("SVN.parseGotRevision unable to detect revision in"
" output of svn info")
log.msg(msg)
raise buildstep.BuildStepFailed()
msg = "Got SVN revision %s" % (revision, )
self.stdio_log.addHeader(msg)
self.updateSourceProperty('got_revision', revision)
return cmd.rc
def purge(self, ignore_ignores):
"""Delete everything that shown up on status."""
command = ['status', '--xml']
if ignore_ignores:
command.append('--no-ignore')
d = self._dovccmd(command, collectStdout=True)
@d.addCallback
def parseAndRemove(stdout):
files = []
for filename in self.getUnversionedFiles(stdout, self.keep_on_purge):
filename = self.build.path_module.join(self.workdir, filename)
files.append(filename)
if not files:
d = defer.succeed(0)
else:
if self.workerVersionIsOlderThan('rmdir', '2.14'):
d = self.removeFiles(files)
else:
d = self.runRmdir(files, abandonOnFailure=False, timeout=self.timeout)
return d
@d.addCallback
def evaluateCommand(rc):
if rc != 0:
log.msg("Failed removing files")
raise buildstep.BuildStepFailed()
return rc
return d
@staticmethod
def getUnversionedFiles(xmlStr, keep_on_purge):
try:
result_xml = xml.dom.minidom.parseString(xmlStr)
except xml.parsers.expat.ExpatError:
log.err("Corrupted xml, aborting step")
raise buildstep.BuildStepFailed()
for entry in result_xml.getElementsByTagName('entry'):
(wc_status,) = entry.getElementsByTagName('wc-status')
if wc_status.getAttribute('item') == 'external':
continue
if wc_status.getAttribute('item') == 'missing':
continue
filename = entry.getAttribute('path')
if filename in keep_on_purge or filename == '':
continue
yield filename
@defer.inlineCallbacks
def removeFiles(self, files):
for filename in files:
res = yield self.runRmdir(filename, abandonOnFailure=False, timeout=self.timeout)
if res:
return res
return 0
def checkSvn(self):
cmd = remotecommand.RemoteShellCommand(self.workdir, ['svn', '--version'],
env=self.env,
logEnviron=self.logEnviron,
timeout=self.timeout)
cmd.useLog(self.stdio_log, False)
d = self.runCommand(cmd)
@d.addCallback
def evaluate(_):
return cmd.rc == 0
return d
def computeSourceRevision(self, changes):
if not changes or None in [c.revision for c in changes]:
return None
lastChange = max([int(c.revision) for c in changes])
return lastChange
@staticmethod
def svnUriCanonicalize(uri):
collapse = re.compile(r'([^/]+/\.\./?|/\./|//|/\.$|/\.\.$|^/\.\.)')
server_authority = re.compile(r'^(?:([^@]+)@)?([^:]+)(?::(.+))?$')
default_port = {'http': '80',
'https': '443',
'svn': '3690'}
relative_schemes = ['http', 'https', 'svn']
def quote(uri):
return urlquote(uri, "!$&'()*+,-./:=@_~")
if not uri or uri == '/':
return uri
(scheme, authority, path, parameters, query, fragment) = urlparse(uri)
scheme = scheme.lower()
if authority:
mo = server_authority.match(authority)
if not mo:
return uri # give up
userinfo, host, port = mo.groups()
if host[-1] == '.':
host = host[:-1]
authority = host.lower()
if userinfo:
authority = "%s@%s" % (userinfo, authority)
if port and port != default_port.get(scheme, None):
authority = "%s:%s" % (authority, port)
if scheme in relative_schemes:
last_path = path
while True:
path = collapse.sub('/', path, 1)
if last_path == path:
break
last_path = path
path = quote(urlunquote(path))
canonical_uri = urlunparse(
(scheme, authority, path, parameters, query, fragment))
if canonical_uri == '/':
return canonical_uri
elif canonical_uri[-1] == '/' and canonical_uri[-2] != '/':
return canonical_uri[:-1]
return canonical_uri
def _checkout(self):
checkout_cmd = ['checkout', self.svnUriCanonicalize(self.repourl), '.']
if self.revision:
checkout_cmd.extend(["--revision", str(self.revision)])
if self.retry:
abandonOnFailure = (self.retry[1] <= 0)
else:
abandonOnFailure = True
d = self._dovccmd(checkout_cmd, abandonOnFailure=abandonOnFailure)
def _retry(res):
if self.stopped or res == 0:
return res
delay, repeats = self.retry
if repeats > 0:
log.msg("Checkout failed, trying %d more times after %d seconds"
% (repeats, delay))
self.retry = (delay, repeats - 1)
df = defer.Deferred()
df.addCallback(lambda _: self.runRmdir(self.workdir, timeout=self.timeout))
df.addCallback(lambda _: self._checkout())
reactor.callLater(delay, df.callback, None)
return df
return res
if self.retry:
d.addCallback(_retry)
return d
# This file is part of Buildbot. Buildbot is free software: you can
# redistribute it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, version 2.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Copyright Buildbot Team Members
# Based on the work of Dave Peticolas for the P4poll
# Changed to svn (using xml.dom.minidom) by Niklaus Giger
# Hacked beyond recognition by Brian Warner
import os
import xml.dom.minidom
from urllib.parse import quote_plus as urlquote_plus
from urllib.parse import quote as urlquote
from urllib.parse import unquote as urlunquote
from twisted.internet import defer
from twisted.internet import utils
from twisted.python import log
from buildbot import util
from buildbot.changes import base
from buildbot.util import bytes2unicode
# these split_file_* functions are available for use as values to the
# split_file= argument.
def split_file_alwaystrunk(path):
return dict(path=path)
def split_file_branches(path):
# turn "trunk/subdir/file.c" into (None, "subdir/file.c")
# and "trunk/subdir/" into (None, "subdir/")
# and "trunk/" into (None, "")
# and "branches/1.5.x/subdir/file.c" into ("branches/1.5.x", "subdir/file.c")
# and "branches/1.5.x/subdir/" into ("branches/1.5.x", "subdir/")
# and "branches/1.5.x/" into ("branches/1.5.x", "")
pieces = path.split('/')
if len(pieces) > 1 and pieces[0] == 'trunk':
return (None, '/'.join(pieces[1:]))
elif len(pieces) > 2 and pieces[0] == 'branches':
return ('/'.join(pieces[0:2]), '/'.join(pieces[2:]))
return None
def split_file_projects_branches(path):
# turn projectname/trunk/subdir/file.c into dict(project=projectname,
# branch=trunk, path=subdir/file.c)
if "/" not in path:
return None
project, path = path.split("/", 1)
f = split_file_branches(path)
if f:
info = dict(project=project, path=f[1])
if f[0]:
info['branch'] = f[0]
return info
return f
class SVNPoller(base.PollingChangeSource, util.ComparableMixin):
"""
Poll a Subversion repository for changes and submit them to the change
master.
"""
compare_attrs = ("repourl", "split_file",
"svnuser", "svnpasswd", "project",
"pollInterval", "histmax",
"svnbin", "category", "cachepath", "pollAtLaunch")
secrets = ("svnuser", "svnpasswd")
parent = None # filled in when we're added
last_change = None
loop = None
def __init__(self, repourl, split_file=None,
svnuser=None, svnpasswd=None,
pollInterval=10 * 60, histmax=100,
svnbin='svn', revlinktmpl='', category=None,
project='', cachepath=None, pollinterval=-2,
extra_args=None, name=None, pollAtLaunch=False):
# for backward compatibility; the parameter used to be spelled with 'i'
if pollinterval != -2:
pollInterval = pollinterval
if name is None:
name = repourl
super().__init__(name=name,
pollInterval=pollInterval,
pollAtLaunch=pollAtLaunch,
svnuser=svnuser, svnpasswd=svnpasswd)
if repourl.endswith("/"):
repourl = repourl[:-1] # strip the trailing slash
self.repourl = repourl
self.extra_args = extra_args
self.split_file = split_file or split_file_alwaystrunk
self.svnuser = svnuser
self.svnpasswd = svnpasswd
self.revlinktmpl = revlinktmpl
# include environment variables required for ssh-agent auth
self.environ = os.environ.copy()
self.svnbin = svnbin
self.histmax = histmax
self._prefix = None
self.category = category if callable(
category) else util.bytes2unicode(category)
self.project = util.bytes2unicode(project)
self.cachepath = cachepath
if self.cachepath and os.path.exists(self.cachepath):
try:
with open(self.cachepath, "r") as f:
self.last_change = int(f.read().strip())
log.msg("SVNPoller: SVNPoller(%s) setting last_change to %s" % (
self.repourl, self.last_change))
# try writing it, too
with open(self.cachepath, "w") as f:
f.write(str(self.last_change))
except Exception:
self.cachepath = None
log.msg(("SVNPoller: SVNPoller(%s) cache file corrupt or unwriteable; " +
"skipping and not using") % self.repourl)
log.err()
def describe(self):
return "SVNPoller: watching %s" % self.repourl
def poll(self):
# Our return value is only used for unit testing.
# we need to figure out the repository root, so we can figure out
# repository-relative pathnames later. Each REPOURL is in the form
# (ROOT)/(PROJECT)/(BRANCH)/(FILEPATH), where (ROOT) is something
# like svn://svn.twistedmatrix.com/svn/Twisted (i.e. there is a
# physical repository at /svn/Twisted on that host), (PROJECT) is
# something like Projects/Twisted (i.e. within the repository's
# internal namespace, everything under Projects/Twisted/ has
# something to do with Twisted, but these directory names do not
# actually appear on the repository host), (BRANCH) is something like
# "trunk" or "branches/2.0.x", and (FILEPATH) is a tree-relative
# filename like "twisted/internet/defer.py".
# our self.repourl attribute contains (ROOT)/(PROJECT) combined
# together in a way that we can't separate without svn's help. If the
# user is not using the split_file= argument, then self.repourl might
# be (ROOT)/(PROJECT)/(BRANCH) . In any case, the filenames we will
# get back from 'svn log' will be of the form
# (PROJECT)/(BRANCH)/(FILEPATH), but we want to be able to remove
# that (PROJECT) prefix from them. To do this without requiring the
# user to tell us how repourl is split into ROOT and PROJECT, we do an
# 'svn info --xml' command at startup. This command will include a
# <root> element that tells us ROOT. We then strip this prefix from
# self.repourl to determine PROJECT, and then later we strip the
# PROJECT prefix from the filenames reported by 'svn log --xml' to
# get a (BRANCH)/(FILEPATH) that can be passed to split_file() to
# turn into separate BRANCH and FILEPATH values.
# whew.
if self.project:
log.msg("SVNPoller: polling " + self.project)
else:
log.msg("SVNPoller: polling")
d = defer.succeed(None)
if not self._prefix:
d.addCallback(lambda _: self.get_prefix())
@d.addCallback
def set_prefix(prefix):
self._prefix = prefix
d.addCallback(self.get_logs)
d.addCallback(self.parse_logs)
d.addCallback(self.get_new_logentries)
d.addCallback(self.create_changes)
d.addCallback(self.submit_changes)
d.addCallback(self.finished_ok)
# eat errors
d.addErrback(log.err, 'SVNPoller: Error in while polling')
return d
def getProcessOutput(self, args):
# this exists so we can override it during the unit tests
d = utils.getProcessOutput(self.svnbin, args, self.environ)
return d
def get_prefix(self):
args = ["info", "--xml", "--non-interactive", urlquote(self.repourl, "!$&'()*+,-./:=@_~")]
if self.svnuser:
args.append("--username=%s" % self.svnuser)
if self.svnpasswd is not None:
args.append("--password=%s" % self.svnpasswd)
if self.extra_args:
args.extend(self.extra_args)
d = self.getProcessOutput(args)
@d.addCallback
def determine_prefix(output):
try:
doc = xml.dom.minidom.parseString(output)
except xml.parsers.expat.ExpatError:
log.msg("SVNPoller: SVNPoller.get_prefix: ExpatError in '%s'"
% output)
raise
rootnodes = doc.getElementsByTagName("root")
if not rootnodes:
# this happens if the URL we gave was already the root. In this
# case, our prefix is empty.
self._prefix = ""
return self._prefix
rootnode = rootnodes[0]
root = urlunquote("".join([c.data for c in rootnode.childNodes]))
# root will be a unicode string
if not self.repourl.startswith(root):
log.msg(format="Got root %(root)r from `svn info`, but it is "
"not a prefix of the configured repourl",
repourl=self.repourl, root=root)
raise RuntimeError("Configured repourl doesn't match svn root")
prefix = self.repourl[len(root):]
if prefix.startswith("/"):
prefix = prefix[1:]
log.msg("SVNPoller: repourl=%s, root=%s, so prefix=%s" %
(self.repourl, root, prefix))
return prefix
return d
def get_logs(self, _):
args = []
args.extend(["log", "--xml", "--verbose", "--non-interactive"])
if self.svnuser:
args.extend(["--username=%s" % self.svnuser])
if self.svnpasswd is not None:
args.extend(["--password=%s" % self.svnpasswd])
if self.extra_args:
args.extend(self.extra_args)
args.extend(["--limit=%d" % (self.histmax), urlquote(self.repourl, "!$&'()*+,-./:=@_~")])
d = self.getProcessOutput(args)
return d
def parse_logs(self, output):
# parse the XML output, return a list of <logentry> nodes
try:
doc = xml.dom.minidom.parseString(output)
except xml.parsers.expat.ExpatError:
log.msg(
"SVNPoller: SVNPoller.parse_logs: ExpatError in '%s'" % output)
raise
logentries = doc.getElementsByTagName("logentry")
return logentries
def get_new_logentries(self, logentries):
last_change = old_last_change = self.last_change
# given a list of logentries, calculate new_last_change, and
# new_logentries, where new_logentries contains only the ones after
# last_change
new_last_change = None
new_logentries = []
if logentries:
new_last_change = int(logentries[0].getAttribute("revision"))
if last_change is None:
# if this is the first time we've been run, ignore any changes
# that occurred before now. This prevents a build at every
# startup.
log.msg('SVNPoller: starting at change %s' % new_last_change)
elif last_change == new_last_change:
# an unmodified repository will hit this case
log.msg('SVNPoller: no changes')
else:
for el in logentries:
if last_change == int(el.getAttribute("revision")):
break
new_logentries.append(el)
new_logentries.reverse() # return oldest first
self.last_change = new_last_change
log.msg('SVNPoller: _process_changes %s .. %s' %
(old_last_change, new_last_change))
return new_logentries
def _get_text(self, element, tag_name):
try:
child_nodes = element.getElementsByTagName(tag_name)[0].childNodes
text = "".join([t.data for t in child_nodes])
except IndexError:
text = "unknown"
return text
def _transform_path(self, path):
if not path.startswith(self._prefix):
log.msg(format="SVNPoller: ignoring path '%(path)s' which doesn't"
"start with prefix '%(prefix)s'",
path=path, prefix=self._prefix)
return
relative_path = path[len(self._prefix):]
if relative_path.startswith("/"):
relative_path = relative_path[1:]
where = self.split_file(relative_path)
# 'where' is either None, (branch, final_path) or a dict
if not where:
return
if isinstance(where, tuple):
where = dict(branch=where[0], path=where[1])
return where
def create_changes(self, new_logentries):
changes = []
for el in new_logentries:
revision = str(el.getAttribute("revision"))
revlink = ''
if self.revlinktmpl and revision:
revlink = self.revlinktmpl % urlquote_plus(revision)
revlink = str(revlink)
log.msg("Adding change revision %s" % (revision,))
author = self._get_text(el, "author")
comments = self._get_text(el, "msg")
# there is a "date" field, but it provides localtime in the
# repository's timezone, whereas we care about buildmaster's
# localtime (since this will get used to position the boxes on
# the Waterfall display, etc). So ignore the date field, and
# addChange will fill in with the current time
branches = {}
try:
pathlist = el.getElementsByTagName("paths")[0]
except IndexError: # weird, we got an empty revision
log.msg("ignoring commit with no paths")
continue
for p in pathlist.getElementsByTagName("path"):
kind = p.getAttribute("kind")
action = p.getAttribute("action")
path = "".join([t.data for t in p.childNodes])
if path.startswith("/"):
path = path[1:]
if kind == "dir" and not path.endswith("/"):
path += "/"
where = self._transform_path(path)
# if 'where' is None, the file was outside any project that
# we care about and we should ignore it
if where:
branch = where.get("branch", None)
filename = where["path"]
if branch not in branches:
branches[branch] = {
'files': [], 'number_of_directories': 0}
if filename == "":
# root directory of branch
branches[branch]['files'].append(filename)
branches[branch]['number_of_directories'] += 1
elif filename.endswith("/"):
# subdirectory of branch
branches[branch]['files'].append(filename[:-1])
branches[branch]['number_of_directories'] += 1
else:
branches[branch]['files'].append(filename)
if "action" not in branches[branch]:
branches[branch]['action'] = action
for key in ("repository", "project", "codebase"):
if key in where:
branches[branch][key] = where[key]
for branch in branches:
action = branches[branch]['action']
files = branches[branch]['files']
number_of_directories_changed = branches[
branch]['number_of_directories']
number_of_files_changed = len(files)
if (action == 'D' and number_of_directories_changed == 1 and
number_of_files_changed == 1 and files[0] == ''):
log.msg("Ignoring deletion of branch '%s'" % branch)
else:
chdict = dict(
author=author,
# weakly assume filenames are utf-8
files=[bytes2unicode(f, 'utf-8', 'replace')
for f in files],
comments=comments,
revision=revision,
branch=util.bytes2unicode(branch),
revlink=revlink,
category=self.category,
repository=util.bytes2unicode(
branches[branch].get('repository', self.repourl)),
project=util.bytes2unicode(
branches[branch].get('project', self.project)),
codebase=util.bytes2unicode(
branches[branch].get('codebase', None)))
changes.append(chdict)
return changes
@defer.inlineCallbacks
def submit_changes(self, changes):
for chdict in changes:
yield self.master.data.updates.addChange(src='svn', **chdict)
def finished_ok(self, res):
if self.cachepath:
with open(self.cachepath, "w") as f:
f.write(str(self.last_change))
log.msg("SVNPoller: finished polling %s" % res)
return res
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment