Skip to content

Instantly share code, notes, and snippets.

@ei-grad
Created March 12, 2020 19:15
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ei-grad/d311d0f34b60ebef96841a3a39103622 to your computer and use it in GitHub Desktop.
Save ei-grad/d311d0f34b60ebef96841a3a39103622 to your computer and use it in GitHub Desktop.
diff --git a/python/pyspark/cloudpickle.py b/python/pyspark/cloudpickle.py
index 88519d7311..f7ab821b97 100644
--- a/python/pyspark/cloudpickle.py
+++ b/python/pyspark/cloudpickle.py
@@ -72,77 +72,16 @@ else:
PY3 = True
-def _make_cell_set_template_code():
- """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF
+def f():
+ cell = None
- Notes
- -----
- In Python 3, we could use an easier function:
+ def _stub(value):
+ nonlocal cell
+ cell = value
- .. code-block:: python
+ return _stub
- def f():
- cell = None
-
- def _stub(value):
- nonlocal cell
- cell = value
-
- return _stub
-
- _cell_set_template_code = f()
-
- This function is _only_ a LOAD_FAST(arg); STORE_DEREF, but that is
- invalid syntax on Python 2. If we use this function we also don't need
- to do the weird freevars/cellvars swap below
- """
- def inner(value):
- lambda: cell # make ``cell`` a closure so that we get a STORE_DEREF
- cell = value
-
- co = inner.__code__
-
- # NOTE: we are marking the cell variable as a free variable intentionally
- # so that we simulate an inner function instead of the outer function. This
- # is what gives us the ``nonlocal`` behavior in a Python 2 compatible way.
- if not PY3:
- return types.CodeType(
- co.co_argcount,
- co.co_nlocals,
- co.co_stacksize,
- co.co_flags,
- co.co_code,
- co.co_consts,
- co.co_names,
- co.co_varnames,
- co.co_filename,
- co.co_name,
- co.co_firstlineno,
- co.co_lnotab,
- co.co_cellvars, # this is the trickery
- (),
- )
- else:
- return types.CodeType(
- co.co_argcount,
- co.co_kwonlyargcount,
- co.co_nlocals,
- co.co_stacksize,
- co.co_flags,
- co.co_code,
- co.co_consts,
- co.co_names,
- co.co_varnames,
- co.co_filename,
- co.co_name,
- co.co_firstlineno,
- co.co_lnotab,
- co.co_cellvars, # this is the trickery
- (),
- )
-
-
-_cell_set_template_code = _make_cell_set_template_code()
+_cell_set_template_code = f()
def cell_set(cell, value):
@derya26
Copy link

derya26 commented Apr 24, 2020

Hi, this solves the error in cloudpickle.py thanks! But when map-reduce functions are used, in serializers.py the error given below comes out :

in loads
return pickle.loads(obj, encoding=encoding)
TypeError: an integer is required (got type bytes)

how can I solve this problem? (Python 3.8.2 with spark-hadoop 2.4.5 )

like in this case https://stackoverflow.com/a/60660577/12100307

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment