Venkatesh-Prasad Ranganath rvprasad

## batchRead.r
#' Read a file in chunks
#'
#' @param theConn providing the data, e.g., file('data/transactions.csv', 'r').
#' @param headers of the data being read.
#' @param leftOver rows that were not read but not returned by the previous invocation of this function.
#' @param col on which the data is grouped.
#' @return a list of two elements: data provided by the current invocation and leftOver to be used during the next invocation.
getDataFrameForNextId <- function(theFile, headers, leftOver, col) {
  while (NROW(leftOver) == 0 || NROW(unique(leftOver[,col])) < 2) {
    tmp1 <- read.csv(theFile, nrows=100000)

## recoverSchema.py
import re

def getVocabulary(wordFileName):
    ret = set()
    with open(wordFileName) as wordFile:
        for w in wordFile:
            ret.add(w.strip())
    return ret

import string

## conftest.py
import pytest  # added
from _pytest import runner, _code # added

def pytest_runtest_makereport(item, call):
    when = call.when
    duration = call.stop-call.start
    keywords = dict([(x,1) for x in item.keywords])
    excinfo = call.excinfo
    sections = []
    if not call.excinfo:

## calculateBufferedReadWriteTimes.groovy

fileSize = 2 ** 16 * 1000

def getStatsWith(closure) {
    buffSizes = (8..15)
    iterations = (0..10)
    buffSize2runtimes = buffSizes.collectEntries { [(2 ** it):[]] }
    iterations.each {
        buffSize2runtimes.each { buffSize, runtimes ->
            runtimes << closure(buffSize) * 1000 / 1024 / 1024

## XDump.java
/*
 * Run this with ASM 5.1 (http://forge.ow2.org/project/showfiles.php?group_id=23) to generate X.class.
 * Loading the generated X.class will cause the following error with JDK 9-ea, JDK 1.8.0_112, and Zulu 1.8.0_112.
 *
Error: A JNI error has occurred, please check your installation and try again
Exception in thread "main" java.lang.VerifyError: Stack map does not match the one at exception handler 13
Exception Details:
  Location:
    X.<init>(LX;)V @13: athrow
  Reason:

## collectTagsOccurringWithGivenTag.groovy
/*
 * Copyright (c) 2017, Venkatesh-Prasad Ranganath
 *
 * BSD 3-clause License
 *
 * Author: Venkatesh-Prasad Ranganath
 */

import groovy.util.CliBuilder
import groovyx.gpars.actor.DynamicDispatchActor

## test_str_to_int.py
# Python -- v3.6
# https://docs.pytest.org/en/latest/ -- v3.2.1
# http://hypothesis.readthedocs.io/en/latest/ -- v3.7

from hypothesis import assume, given
from hypothesis.strategies import text
import pytest

rep2int = {
        '1':1,

## test_process.py
# Python -- v3.6

import begin
import multiprocessing
import time


def worker(varying_data, fixed_data):
    t = 0
    for j in range(1, 10000):

## test_process_graph.gp
set terminal png
set output "test_process.png"
set logscale
set xlabel "Size of fixed data [Number of ints]"
set ylabel "Performance [seconds per iteration]"
set title "Performance of options vs Size of fixed data"
plot "test_process.csv" using 1:2 title "builtin pool" with linespoints, \
     "test_process.csv" using 1:3 title "custom pool" with linespoints

## test_pool_map_graph.gp
set terminal png
set output "test_pool_map.png"
set logscale
set xlabel "Size of aux data [Number of ints]"
set ylabel "Performance [seconds per iteration]"
set title "Performance of options vs Size of aux data"
plot "test_pool_map.csv" using 1:2 title "without initializer / default chunksize" with linespoints, \
    "test_pool_map.csv" using 1:3 title "with initializer / default chunksize" with linespoints, \
    "test_pool_map.csv" using 1:4 title "without initializer / 250 chunksize" with linespoints, \
    "test_pool_map.csv" using 1:5 title "with initializer / 250 chunksize" with linespoints, \
	#' Read a file in chunks
	#'
	#' @param theConn providing the data, e.g., file('data/transactions.csv', 'r').
	#' @param headers of the data being read.
	#' @param leftOver rows that were not read but not returned by the previous invocation of this function.
	#' @param col on which the data is grouped.
	#' @return a list of two elements: data provided by the current invocation and leftOver to be used during the next invocation.
	getDataFrameForNextId <- function(theFile, headers, leftOver, col) {
	while (NROW(leftOver) == 0 \|\| NROW(unique(leftOver[,col])) < 2) {
	tmp1 <- read.csv(theFile, nrows=100000)
	import re

	def getVocabulary(wordFileName):
	ret = set()
	with open(wordFileName) as wordFile:
	for w in wordFile:
	ret.add(w.strip())
	return ret

	import string
	import pytest # added
	from _pytest import runner, _code # added

	def pytest_runtest_makereport(item, call):
	when = call.when
	duration = call.stop-call.start
	keywords = dict([(x,1) for x in item.keywords])
	excinfo = call.excinfo
	sections = []
	if not call.excinfo:

	fileSize = 2 ** 16 * 1000

	def getStatsWith(closure) {
	buffSizes = (8..15)
	iterations = (0..10)
	buffSize2runtimes = buffSizes.collectEntries { [(2 ** it):[]] }
	iterations.each {
	buffSize2runtimes.each { buffSize, runtimes ->
	runtimes << closure(buffSize) * 1000 / 1024 / 1024
	/*
	* Run this with ASM 5.1 (http://forge.ow2.org/project/showfiles.php?group_id=23) to generate X.class.
	* Loading the generated X.class will cause the following error with JDK 9-ea, JDK 1.8.0_112, and Zulu 1.8.0_112.
	*
	Error: A JNI error has occurred, please check your installation and try again
	Exception in thread "main" java.lang.VerifyError: Stack map does not match the one at exception handler 13
	Exception Details:
	Location:
	X.<init>(LX;)V @13: athrow
	Reason:
	/*
	* Copyright (c) 2017, Venkatesh-Prasad Ranganath
	*
	* BSD 3-clause License
	*
	* Author: Venkatesh-Prasad Ranganath
	*/

	import groovy.util.CliBuilder
	import groovyx.gpars.actor.DynamicDispatchActor
	# Python -- v3.6
	# https://docs.pytest.org/en/latest/ -- v3.2.1
	# http://hypothesis.readthedocs.io/en/latest/ -- v3.7

	from hypothesis import assume, given
	from hypothesis.strategies import text
	import pytest

	rep2int = {
	'1':1,
	# Python -- v3.6

	import begin
	import multiprocessing
	import time


	def worker(varying_data, fixed_data):
	t = 0
	for j in range(1, 10000):
	set terminal png
	set output "test_process.png"
	set logscale
	set xlabel "Size of fixed data [Number of ints]"
	set ylabel "Performance [seconds per iteration]"
	set title "Performance of options vs Size of fixed data"
	plot "test_process.csv" using 1:2 title "builtin pool" with linespoints, \
	"test_process.csv" using 1:3 title "custom pool" with linespoints
	set terminal png
	set output "test_pool_map.png"
	set logscale
	set xlabel "Size of aux data [Number of ints]"
	set ylabel "Performance [seconds per iteration]"
	set title "Performance of options vs Size of aux data"
	plot "test_pool_map.csv" using 1:2 title "without initializer / default chunksize" with linespoints, \
	"test_pool_map.csv" using 1:3 title "with initializer / default chunksize" with linespoints, \
	"test_pool_map.csv" using 1:4 title "without initializer / 250 chunksize" with linespoints, \
	"test_pool_map.csv" using 1:5 title "with initializer / 250 chunksize" with linespoints, \