Bryan Cutler BryanCutler

## start_jupyter_pyspark.sh
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#

## PySpark_Vectorized_UDFs.ipynb

      
              1 file
            
          
              1 fork
            
          
              2 comments
            
          
              2 stars
            
          
                BryanCutler
                / PySpark_Vectorized_UDFs.ipynb
            
            
              Last active
              February 17, 2022 13:57
            
              
                PySpark vectorized UDFs with Arrow
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## tf_arrow_model_training.py
from functools import partial
import multiprocessing
import os
import socket
import sys

from sklearn.preprocessing import StandardScaler

import numpy as np
import pandas as pd

## tep_extending_pandas_blog1_5.py
class SpanOpMixin:
  def __add__(self, other) -> Union["Span", "SpanArray"]:

    if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
      # Rely on pandas to unbox and dispatch to us.
      return NotImplemented

    if isinstance(self, Span) and isinstance(other, Span):
      # Span + *Span = Span
      return Span(self.target_text,

## tep_extending_pandas_blog1_5_0.py
# Addition of `Span` with Series of `SpanDtype` produces another Series
df["span"].iloc[1]  + df["span"].iloc[3:5]

# 3    [0, 16): 'Monty Python and'
# 4    [0, 20): 'Monty Python and the'
# Name: span, dtype: SpanDtype

## tep_extending_pandas_blog1_6.py
# Reduce the `SpanArray` to a single `Span` covering tokens 2 up to 5.
df["span"].iloc[2:5].sum()

# [6, 20): 'Python and the'

## tep_extending_pandas_blog1_7.py
@pytest.fixture
def dtype():
    """ Return dtype of your extension array."""
    return TensorDtype()

@pytest.fixture
def data(dtype):
    """ Return an extension array as data for the tests."""
    return pd.array(np.array([[i] for i in range(100)]), dtype=dtype)


## tep_extending_pandas_blog1_4.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                BryanCutler
                / tep_extending_pandas_blog1_4.ipynb
            
            
              Created
              May 3, 2021 20:23
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## tep_extending_pandas_blog1_3.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                BryanCutler
                / tep_extending_pandas_blog1_3.ipynb
            
            
              Created
              May 3, 2021 20:13
            
              
                Text Extensions for Pandas: Tips and Techniques for Extending Pandas, Part 1 Blog
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## tep_extending_pandas_blog1_2.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                BryanCutler
                / tep_extending_pandas_blog1_2.ipynb
            
            
              Last active
              May 3, 2021 17:11
            
              
                Text Extensions for Pandas: Tips and Techniques for Extending Pandas, Part 1 Blog
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	#!/usr/bin/env bash

	#
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	from functools import partial
	import multiprocessing
	import os
	import socket
	import sys

	from sklearn.preprocessing import StandardScaler

	import numpy as np
	import pandas as pd
	class SpanOpMixin:
	def __add__(self, other) -> Union["Span", "SpanArray"]:

	if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
	# Rely on pandas to unbox and dispatch to us.
	return NotImplemented

	if isinstance(self, Span) and isinstance(other, Span):
	# Span + *Span = Span
	return Span(self.target_text,
	# Addition of `Span` with Series of `SpanDtype` produces another Series
	df["span"].iloc[1] + df["span"].iloc[3:5]

	# 3 [0, 16): 'Monty Python and'
	# 4 [0, 20): 'Monty Python and the'
	# Name: span, dtype: SpanDtype
	# Reduce the `SpanArray` to a single `Span` covering tokens 2 up to 5.
	df["span"].iloc[2:5].sum()

	# [6, 20): 'Python and the'
	@pytest.fixture
	def dtype():
	""" Return dtype of your extension array."""
	return TensorDtype()

	@pytest.fixture
	def data(dtype):
	""" Return an extension array as data for the tests."""
	return pd.array(np.array([[i] for i in range(100)]), dtype=dtype)