Skip to content

Instantly share code, notes, and snippets.

@kszucs
Last active October 21, 2019 15:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kszucs/b2743546044ccd3215e5bb34fa0d76a0 to your computer and use it in GitHub Desktop.
Save kszucs/b2743546044ccd3215e5bb34fa0d76a0 to your computer and use it in GitHub Desktop.
Snippet to query cherry-pickable commits for a release

Commits to cherry pick on top of apache-arrow-0.15 tag

git cherry-pick 321896f6bf6c57e317c2d2c8ce83aca7d1eb78a0  # ARROW-6740: [C++] Unmap MemoryMappedFile as soon as possible
git cherry-pick c379f22f653e1fc0ea477ab5e26ca7eb1f3a4e24  # ARROW-6777: [GLib][CI] Unpin gobject-introspection gem
git cherry-pick b9c154a8bad81217621bb638a72f6f454ad18806  # ARROW-6762: [C++] Support reading JSON files with no newline at end
git cherry-pick 4fa044c0377c7a6e92de106782ba3509129f8ea0  # ARROW-6806: [C++] [Python] Fix crash validating an IPC-originating empty array
git cherry-pick ad335f953ba25d1a51005ecce7f4a0499b10d7d2  # ARROW-6631: [C++] Do not build any compression libraries by default in C++ build
git cherry-pick 6ea98460ed034efcf0f7d06940769c848f2528f4  # ARROW-6834: [C++][TRIAGE] Pin gtest version 1.8.1 to unblock Appveyor builds
git cherry-pick d1f872a05aa4aa28c980fd2fbacfc457ce289e38  # ARROW-6831: [R] Update R macOS/Windows builds for change in cmake compression defaults
git cherry-pick ad85b1186c53a74e166848c5d150b6c65808c1cd  # ARROW-6661: [Java] Implement APIs like slice to enhance VectorSchemaRoot (#5470)
git cherry-pick 8621a5c489cc5af066150e900b21c709161d6cbf  # ARROW-6464: [Java] Refactor FixedSizeListVector#splitAndTransfer with slice API (#5293)
git cherry-pick 102acc47287c37a01ac11a5cb6bd1da3f1f0712d  # ARROW-6860: [Python][C++] Do not link shared libraries monolithically to pyarrow.lib, add libarrow_python_flight.so
git cherry-pick 07128fa41ef4733730a844a4376b3d42e03e069e  # ARROW-6864: [C++] Add compression-related compile definitions before adding any unit tests
git cherry-pick 6f21dc63fe6743bffff9930b12ce77956ac9299c  # ARROW-6852: [C++] Fix build issue on memory-benchmark
git cherry-pick d7ad509aee543d96ae3d291b7f9bbb272057d31b  # ARROW-6873: [Python] Remove stale CColumn references
git cherry-pick 40c971110247fb6657ff4ed337a309284c62a357  # ARROW-6857: [C++] Fix DictionaryEncode for zero-chunk ChunkedArray
git cherry-pick 0cb737f381d225707a9fc8d02e8c93a174fee14b  # ARROW-6882: [C++] Ensure the DictionaryArray indices has no dictionary data
git cherry-pick 018e1ff4a9a0d430bdb332b86a4a3dac677c3945  # ARROW-6877: [C++] Add additional Boost versions to support 1.71 and the presumed next 2 future versions
git cherry-pick 2f183a597204f8c535937ccf94bfe24371a0bc59  # ARROW-6844: [C++][Parquet] Fix regression in reading List types with item name that is not "item"
git cherry-pick 2ce62df589d1517348e500c96a493b24e8f866e3  # ARROW-6876: [C++][Parquet] Use shared_ptr to avoid copying ReaderContext struct, fix performance regression with reading many columns
git cherry-pick 56086774322c8d7d38c72da6e46cec606c7076a3  # ARROW-6903: [Python] Attempt to fix Python wheels with introduction of libarrow_python_flight, disabling of pyarrow.orc
git cherry-pick 3572af2a7b25dde50647d7aa4b081da9a6c4f665  # ARROW-6874: [Python] Fix memory leak when converting to Pandas object data
git cherry-pick 1766eb9b40c53e675bd333e824d19e127ac4a2e7  # ARROW-6898: [Java] Fix potential memory leak in ArrowWriter and several test classes
git cherry-pick 442d25e2b4fa78c986d704aa4be2e9fb38af638a  # ARROW-6886: [C++] Fix arrow::io nvcc compiler warnings
git cherry-pick 64c74815e989b706b106d2cb0fe4e1598bd97207  # ARROW-6905: [Gandiva][Crossbow] Use xcode9.4 for osx builds, do not build dataset, filesystem
git cherry-pick 83ed357d726a73b7877d60e1f6a17f43fbd5d20e  # ARROW-6861: [C++] Fix length/null_count/capacity accounting through Reset and AppendIndices in DictionaryBuilder
git cherry-pick 1714fb8379e22911612a50719b945227822103f6  # ARROW-6878: [Python] Fix creating array from list of dicts with bytes keys
git cherry-pick a81db80e2612467d707853dd9205d30e6c7eb205  # ARROW-6813: [Ruby] Arrow::Table.load with headers=true leads to exception in Arrow 0.15
git cherry-pick 3675073a9bb0e967115e7d0cf5165364c7227418  # ARROW-6795: [C#] Fix for reading large (2GB+) files
git cherry-pick 0475455bc7ba4297214e618b77ed4a12e1394414  # ARROW-6728: [C#] Support reading and writing Date32 and Date64 arrays
git cherry-pick 99aa62b761503419206597df406617906d36df87  # ARROW-6869: [C++] Do not return invalid arrays from DictionaryBuilder::Finish when reusing builder. Add "FinishDelta" method and "ResetFull" method
git cherry-pick 5465c108026c6d1e13ed4913b9cb88450c743346  # ARROW-6937: [Packaging][Python] Fix conda linux and OSX wheel nightly builds
git cherry-pick 3adea33b82c467bc0928fc4e7fae02b9e61b41a9  # ARROW-6922: [Python] Compat with pandas for MultiIndex.levels.names
git cherry-pick a70cf783364b140cab172e1851b563295c46e333  # ARROW-6927: [C++] Add gRPC version check
git cherry-pick 32a1e5c92875ef9b197ea9ea516b9daaf9159707  # ARROW-6938: [Packaging][Python] Disable bz2 in Windows wheels and build ZSTD in bundled mode to triage linking issues
from jira import JIRA
import warnings
import pygit2
class Release:
"""Release object for querying issues and commits
Usage:
jira = JIRA(
{'server': 'https://issues.apache.org/jira'},
basic_auth=(user, password)
)
repo = pygit2.Repository('path/to/arrow/repo')
release = Release(jira, repo, '0.15.1', '0.15.0')
# show the commits in application order
for commit in release.commits():
print(commit.oid)
# cherry-pick the patches to a branch
release.apply_patches_to('a-branch')
"""
def __init__(self, jira, repo, version, previous_version):
self.jira = jira
self.repo = repo
self.version = version
self.previous_version = previous_version
self._issues = None
def _tag(self, version):
return self.repo.revparse_single(f'refs/tags/apache-arrow-{version}')
def issues(self):
if self._issues is None:
self._issues = self.jira.search_issues(
f'project=ARROW AND fixVersion={self.version}'
)
return self._issues
def commits(self):
"""Commits belonging to release applied on master branch
The returned commits' order corresponds to the output of
git log.
"""
issue_keys = {issue.key for issue in self.issues()}
previous_tag = self._tag(self.previous_version)
master = self.repo.branches['master']
ordering = pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE
walker = self.repo.walk(master.target, ordering)
walker.hide(previous_tag.oid)
for commit in walker:
issue_key, *_ = commit.message.split(':', 1)
if issue_key in issue_keys:
yield commit
issue_keys.remove(issue_key)
if issue_keys:
warnings.warn(f'Remaining issues without patch: {issue_keys}')
def apply_patches_to(self, branch_name):
previous_tag = self._tag(self.previous_version)
branch = repo.create_branch(branch_name, previous_tag.get_object())
try:
head = branch.target
for commit in commits:
base = repo.merge_base(commit.oid, head)
parent_tree = commit.parents[0].tree
index = repo.merge_trees(parent_tree, head, commit.oid)
tree_id = index.write_tree(repo)
head = repo.create_commit(
branch.name,
commit.author,
commit.committer,
commit.message,
tree_id,
[head]
)
except pygit2.GitError:
repo.branches[branch_name].delete()
raise
import pygit2
from jira import JIRA
jira = JIRA(
{'server': 'https://issues.apache.org/jira'},
basic_auth=('user', 'secret')
)
repo = pygit2.Repository('path/to/arrow')
release = Release(jira, repo, version='0.15.1', previous_version='0.15.0')
for commit in release.commits():
print(commit.oid)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment