Kazuaki Ishizaki kiszk

## scriptmode.pt.BE
0000000 504b 0304 0000 0808 0000 0000 0000 0000
0000020 0000 0000 0000 0000 0000 1500 0d00 7363
0000040 7269 7074 6d6f 6475 6c65 2f64 6174 612e
0000060 706b 6c46 4209 005a 5a5a 5a5a 5a5a 5a5a
0000100 8002 635f 5f74 6f72 6368 5f5f 0a4d 794d
0000120 6f64 756c 650a 7100 2981 7d28 5800 0000
0000140 0874 7261 696e 696e 6771 0188 5800 0000
0000160 165f 6973 5f66 756c 6c5f 6261 636b 7761
0000200 7264 5f68 6f6f 6b71 024e 7562 7103 2e50
0000220 4b07 08f3 4c76 c34f 0000 004f 0000 0050

## CONTRIBUTING.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                kiszk
                / CONTRIBUTING.md
            
            
              Created
              January 16, 2022 10:10
            
          
    How to contribute Apache Arrow Julia

Did you find a bug or have an improvement?

We recommend you first search among existing Arrow issues. The community may already address the same idea. If you could find the issue, you may want to contribute to the existing issue.
How do you write a patch that fixes a bug or brings an improvement?

If you cannot find the same idea, you first need to write GitHub issues (e.g. issues in Arrow-julia) for a bug fix or planned features for the improvement. To write an issue would help the community have visibility and opportunities for collaborations before a pull request (PR) shows up. This is for the Apache way. We can use GitHub labels to identify bugs.

It should not be necessary to file an issue for some non-code changes, such as CI changes or minor documentation updates such as fixing typos.

  
## gist:daedf1e033a07d3f484655a69aac78ca
Micah, thank you for expanding the scope for Big Endian support in Arrow. I am happy to see this when I am back from one-week vacation. I agree with this since we have just seen the kickoff of BE support in Go. Let me make comments

> 1.  As long as there is CI in place to catch regressions (right now I think the CI is fairly unreliable?)
I agree. While TravisCI on s390x are unreliable, each platform can set up CI script.

> 2.  No degradation in performance for little-endian architectures (verified by additional micro benchmarks)
Yes, we can do it. @kou suggested me to use the existing mechanism like [1]. Now, it supports C++.We could expand this to other languages.

> 3.  Not a large amount of invasive code to distinguish between platforms.
Yes, I will prepare a draft of the principle for endian-independent code in another mail.

## read_write_test.cc
Status MakePrimitiveBatch(std::shared_ptr<RecordBatch>* out) {
  int seed = 0;
  int length = 17;

  // Make the schema
  auto f0 = field("bool_nullable", boolean());
  auto f1 = field("bool_nonnullable", boolean());
  auto f2 = field("int8_nullable", int8());
  auto f3 = field("int8_nonnullable", int8());
  auto f4 = field("int16_nullable", int16());

## MemoryBlockBenchmark
package org.apache.spark.sql.execution.benchmark

import org.apache.spark.SparkFunSuite
import org.apache.spark.unsafe.Platform
import org.apache.spark.unsafe.memory._
import org.apache.spark.util.Benchmark

class MemoryBlockBenchmark extends SparkFunSuite {
  test("benchmark") {
    val N = 256 * 1024 * 1024

## MemoryBlockCopyBenchmark.scala
package org.apache.spark.sql

import scala.concurrent.duration._

import org.apache.spark.{SparkConf, SparkContext, SparkEnv, SparkFunSuite}
import org.apache.spark.sql._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.Platform

## myInstance.py
#!/usr/bin/env python

N = 20000  # the number of total instance variables
INNER = 5000 # the number of instance variables in an inner class
M = 30   # the number of accesses in a method
NN = N / M
TYPE = "int"

def printArray():
    print("  %s[] instance = new %s[N];" %  (TYPE, TYPE))

## Test.java
public class Test {
  static public void main(String[] argv) {
    MyInstanceA ma = new MyInstanceA();
    MyInstanceI mi = new MyInstanceI();
    MyInstanceV mv = new MyInstanceV();

    // warmup
    for (long i = 0; i < 1500000; i++) {
      ma.accessArray();
      mi.accessIVars();

## MemoryBlockBenchmark.scala
package org.apache.spark.sql

import org.apache.spark.SparkFunSuite
import org.apache.spark.unsafe.Platform
import org.apache.spark.unsafe.memory._
import org.apache.spark.util.Benchmark

class MemoryBlockBenchmark extends SparkFunSuite {
  test("benchmark") {
    val N = 128 * 1024 * 1024

## gist:29ce3558564ccb0b8797dec37b29eb12
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
	0000000 504b 0304 0000 0808 0000 0000 0000 0000
	0000020 0000 0000 0000 0000 0000 1500 0d00 7363
	0000040 7269 7074 6d6f 6475 6c65 2f64 6174 612e
	0000060 706b 6c46 4209 005a 5a5a 5a5a 5a5a 5a5a
	0000100 8002 635f 5f74 6f72 6368 5f5f 0a4d 794d
	0000120 6f64 756c 650a 7100 2981 7d28 5800 0000
	0000140 0874 7261 696e 696e 6771 0188 5800 0000
	0000160 165f 6973 5f66 756c 6c5f 6261 636b 7761
	0000200 7264 5f68 6f6f 6b71 024e 7562 7103 2e50
	0000220 4b07 08f3 4c76 c34f 0000 004f 0000 0050
	Micah, thank you for expanding the scope for Big Endian support in Arrow. I am happy to see this when I am back from one-week vacation. I agree with this since we have just seen the kickoff of BE support in Go. Let me make comments

	> 1. As long as there is CI in place to catch regressions (right now I think the CI is fairly unreliable?)
	I agree. While TravisCI on s390x are unreliable, each platform can set up CI script.

	> 2. No degradation in performance for little-endian architectures (verified by additional micro benchmarks)
	Yes, we can do it. @kou suggested me to use the existing mechanism like [1]. Now, it supports C++.We could expand this to other languages.

	> 3. Not a large amount of invasive code to distinguish between platforms.
	Yes, I will prepare a draft of the principle for endian-independent code in another mail.
	Status MakePrimitiveBatch(std::shared_ptr<RecordBatch>* out) {
	int seed = 0;
	int length = 17;

	// Make the schema
	auto f0 = field("bool_nullable", boolean());
	auto f1 = field("bool_nonnullable", boolean());
	auto f2 = field("int8_nullable", int8());
	auto f3 = field("int8_nonnullable", int8());
	auto f4 = field("int16_nullable", int16());
	package org.apache.spark.sql.execution.benchmark

	import org.apache.spark.SparkFunSuite
	import org.apache.spark.unsafe.Platform
	import org.apache.spark.unsafe.memory._
	import org.apache.spark.util.Benchmark

	class MemoryBlockBenchmark extends SparkFunSuite {
	test("benchmark") {
	val N = 256 * 1024 * 1024
	package org.apache.spark.sql

	import scala.concurrent.duration._

	import org.apache.spark.{SparkConf, SparkContext, SparkEnv, SparkFunSuite}
	import org.apache.spark.sql._
	import org.apache.spark.sql.internal.SQLConf
	import org.apache.spark.sql.test.SharedSQLContext
	import org.apache.spark.sql.types._
	import org.apache.spark.unsafe.Platform
	#!/usr/bin/env python

	N = 20000 # the number of total instance variables
	INNER = 5000 # the number of instance variables in an inner class
	M = 30 # the number of accesses in a method
	NN = N / M
	TYPE = "int"

	def printArray():
	print(" %s[] instance = new %s[N];" % (TYPE, TYPE))
	public class Test {
	static public void main(String[] argv) {
	MyInstanceA ma = new MyInstanceA();
	MyInstanceI mi = new MyInstanceI();
	MyInstanceV mv = new MyInstanceV();

	// warmup
	for (long i = 0; i < 1500000; i++) {
	ma.accessArray();
	mi.accessIVars();
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*