Hyukjin Kwon HyukjinKwon

## log.txt
databricks/koalas/spark/functions.py:21: error: Module 'pyspark.sql.column' has no attribute '_to_java_column'
databricks/koalas/spark/functions.py:21: error: Module 'pyspark.sql.column' has no attribute '_to_seq'
databricks/koalas/spark/functions.py:21: error: Module 'pyspark.sql.column' has no attribute '_create_column_from_literal'
databricks/koalas/strings.py: note: In member "findall" of class "StringMethods":
databricks/koalas/strings.py:1146: error: "Iterable[DataFrameLike]" has no attribute "str"
databricks/koalas/strings.py: note: In member "split" of class "StringMethods":
databricks/koalas/strings.py:1991: error: "Iterable[DataFrameLike]" has no attribute "str"
databricks/koalas/strings.py:2003: error: Argument 1 to "with_new_columns" of "InternalFrame" has incompatible type "List[Column]"; expected "List[Union[Column, Series[Any]]]"
databricks/koalas/strings.py:2003: note: "List" is invariant -- see http://mypy.readthedocs.io/en/latest/common_issues.html#variance
databricks/koalas/strings.py:2003:

## before.txt
>>> from pyspark.sql.functions import udf
>>> @udf
... def divide_by_zero(v):
...     raise v / 0
...
>>> spark.range(1).select(divide_by_zero("id")).show()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/.../python/pyspark/sql/dataframe.py", line 427, in show
    print(self._jdf.showString(n, 20, vertical))

## .coveragerc
[run]
branch = true
parallel = true
data_file = ${COVERAGE_DIR}/coverage_data/coverage

[html]
ignore_errors = true

## each-linter.txt

Test linter [object_name_linter] only

.lintr file:
    linters: with_defaults(object_usage_linter = NULL, absolute_path_linter = NULL, nonportable_path_linter = NULL, pipe_continuation_linter = NULL, assignment_linter = NULL, closed_curly_linter = NULL, commas_linter = NULL, extraction_operator_linter = NULL, implicit_integer_linter = NULL, infix_spaces_linter = NULL, line_length_linter = NULL, no_tab_linter = NULL, object_length_linter = NULL, open_curly_linter = NULL, semicolon_terminator_linter = NULL, single_quotes_linter = NULL, spaces_inside_linter = NULL, spaces_left_parentheses_linter = NULL, todo_comment_linter = NULL, trailing_blank_lines_linter = NULL, trailing_whitespace_linter = NULL, T_and_F_symbol_linter = NULL, undesirable_function_linter = NULL, undesirable_operator_linter = NULL, unneeded_concatenation_linter = NULL)
    exclusions: list("inst/profile/general.R" = 1, "inst/profile/shell.R")


Elapsed: [170.740297079] sec

## results.txt
inst/worker/worker.R:71:10: style: Remove spaces before the left parenthesis in a function call.
  return (output)
         ^
R/column.R:241:1: style: Lines should not be more than 100 characters.
#'        \href{https://spark.apache.org/docs/latest/sparkr.html#data-type-mapping-between-r-and-spark}{
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
R/context.R:332:1: style: Variable and function names should not be longer than 30 characters.
spark.getSparkFilesRootDirectory <- function() {
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
R/DataFrame.R:1912:1: style: Lines should not be more than 100 characters.

## after.txt
<lambda> function took 3812.337 ms
DataFrame[value: int]
<lambda> function took 2307.548 ms
DataFrame[value: int]
<lambda> function took 2379.824 ms
DataFrame[value: int]
<lambda> function took 2427.346 ms
DataFrame[value: int]
<lambda> function took 2488.754 ms
DataFrame[value: int]

## printerrors.py
from pyspark.sql.types import *
from pyspark.sql import SparkSession

spark = SparkSession \
    .builder \
    .appName("Python Spark Test") \
    .getOrCreate()

def print_message(f):
    try:

## after.txt
struct<a:int>
field a: IntegerType can not accept object 'a' in type <type 'str'>

int
field value: IntegerType can not accept object 'a' in type <type 'str'>

struct<a:array<int>>
StructType can not accept object 'a' in type <type 'str'>

struct<a:array<int>>

## before.txt
struct<a:int>
IntegerType can not accept object 'a' in type <type 'str'>

int
IntegerType can not accept object 'a' in type <type 'str'>

struct<a:array<int>>
StructType can not accept object 'a' in type <type 'str'>

struct<a:array<int>>

## benchmark.py
import time


def timing(f):
    def wrap(*args):
        time1 = time.time()
        ret = f(*args)
        time2 = time.time()
        print '%s function took %0.3f ms' % (f.func_name, (time2-time1)*1000.0)
        return ret
	databricks/koalas/spark/functions.py:21: error: Module 'pyspark.sql.column' has no attribute '_to_java_column'
	databricks/koalas/spark/functions.py:21: error: Module 'pyspark.sql.column' has no attribute '_to_seq'
	databricks/koalas/spark/functions.py:21: error: Module 'pyspark.sql.column' has no attribute '_create_column_from_literal'
	databricks/koalas/strings.py: note: In member "findall" of class "StringMethods":
	databricks/koalas/strings.py:1146: error: "Iterable[DataFrameLike]" has no attribute "str"
	databricks/koalas/strings.py: note: In member "split" of class "StringMethods":
	databricks/koalas/strings.py:1991: error: "Iterable[DataFrameLike]" has no attribute "str"
	databricks/koalas/strings.py:2003: error: Argument 1 to "with_new_columns" of "InternalFrame" has incompatible type "List[Column]"; expected "List[Union[Column, Series[Any]]]"
	databricks/koalas/strings.py:2003: note: "List" is invariant -- see http://mypy.readthedocs.io/en/latest/common_issues.html#variance
	databricks/koalas/strings.py:2003:
	>>> from pyspark.sql.functions import udf
	>>> @udf
	... def divide_by_zero(v):
	... raise v / 0
	...
	>>> spark.range(1).select(divide_by_zero("id")).show()
	Traceback (most recent call last):
	File "<stdin>", line 1, in <module>
	File "/.../python/pyspark/sql/dataframe.py", line 427, in show
	print(self._jdf.showString(n, 20, vertical))
	[run]
	branch = true
	parallel = true
	data_file = ${COVERAGE_DIR}/coverage_data/coverage

	[html]
	ignore_errors = true

	Test linter [object_name_linter] only

	.lintr file:
	linters: with_defaults(object_usage_linter = NULL, absolute_path_linter = NULL, nonportable_path_linter = NULL, pipe_continuation_linter = NULL, assignment_linter = NULL, closed_curly_linter = NULL, commas_linter = NULL, extraction_operator_linter = NULL, implicit_integer_linter = NULL, infix_spaces_linter = NULL, line_length_linter = NULL, no_tab_linter = NULL, object_length_linter = NULL, open_curly_linter = NULL, semicolon_terminator_linter = NULL, single_quotes_linter = NULL, spaces_inside_linter = NULL, spaces_left_parentheses_linter = NULL, todo_comment_linter = NULL, trailing_blank_lines_linter = NULL, trailing_whitespace_linter = NULL, T_and_F_symbol_linter = NULL, undesirable_function_linter = NULL, undesirable_operator_linter = NULL, unneeded_concatenation_linter = NULL)
	exclusions: list("inst/profile/general.R" = 1, "inst/profile/shell.R")


	Elapsed: [170.740297079] sec
	inst/worker/worker.R:71:10: style: Remove spaces before the left parenthesis in a function call.
	return (output)
	^
	R/column.R:241:1: style: Lines should not be more than 100 characters.
	#' \href{https://spark.apache.org/docs/latest/sparkr.html#data-type-mapping-between-r-and-spark}{
	^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	R/context.R:332:1: style: Variable and function names should not be longer than 30 characters.
	spark.getSparkFilesRootDirectory <- function() {
	^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	R/DataFrame.R:1912:1: style: Lines should not be more than 100 characters.
	<lambda> function took 3812.337 ms
	DataFrame[value: int]
	<lambda> function took 2307.548 ms
	DataFrame[value: int]
	<lambda> function took 2379.824 ms
	DataFrame[value: int]
	<lambda> function took 2427.346 ms
	DataFrame[value: int]
	<lambda> function took 2488.754 ms
	DataFrame[value: int]
	from pyspark.sql.types import *
	from pyspark.sql import SparkSession

	spark = SparkSession \
	.builder \
	.appName("Python Spark Test") \
	.getOrCreate()

	def print_message(f):
	try:
	struct<a:int>
	field a: IntegerType can not accept object 'a' in type <type 'str'>

	int
	field value: IntegerType can not accept object 'a' in type <type 'str'>

	struct<a:array<int>>
	StructType can not accept object 'a' in type <type 'str'>

	struct<a:array<int>>
	struct<a:int>
	IntegerType can not accept object 'a' in type <type 'str'>

	int
	IntegerType can not accept object 'a' in type <type 'str'>

	struct<a:array<int>>
	StructType can not accept object 'a' in type <type 'str'>

	struct<a:array<int>>
	import time


	def timing(f):
	def wrap(*args):
	time1 = time.time()
	ret = f(*args)
	time2 = time.time()
	print '%s function took %0.3f ms' % (f.func_name, (time2-time1)*1000.0)
	return ret