Skip to content

Instantly share code, notes, and snippets.

@yamasakih
Last active October 6, 2017 22:27
Show Gist options
  • Save yamasakih/edc80a0f64a246e6306f7f5fbdc379b3 to your computer and use it in GitHub Desktop.
Save yamasakih/edc80a0f64a246e6306f7f5fbdc379b3 to your computer and use it in GitHub Desktop.
sqlalchemyで取り出したレコードをjoblibの引数にすると出るエラーへの対処 ref: http://qiita.com/yamasakih/items/ed806c1c46a56c1f884a
type(records)
type(other_records)
>>> other_records = list(other_records)
>>> type(other_records)
>>> list
[(1, 100),
(2, 200),
(3, 300),
(4, 400),
(5, 500)]
[(1, 'Alice'),
(2, 'Bob'),
(3, 'Cayce')]
>>> type(records)
>>> sql.run.ResultSet
>>> type(other_records)
>>> sql.run.ResultSet
records = %sql SELECT * FROM TABLE_A
from joblib import Parallel, delayed
r = Parallel(n_jobs=-1, verbose=5) (delayed(parallel_func) (record, other_records) for record in records)
from joblib import Parallel, delayed
r = Parallel(n_jobs=-1, verbose=5) (delayed(parallel_func) (record, list(other_records)) for record in records)
def parallel_func(record, other_records):
...
#省略
other_records = %sql SELECT * FROM TABLE_B
---------------------------------------------------------------------------
PicklingError Traceback (most recent call last)
<ipython-input-46-a4803b957f2d> in <module>()
----> 1 r = Parallel(n_jobs=6, verbose=3) ( delayed(update_fragment_ids) (record, parents) for record in records )
/home/yamasakih/.pyenv/versions/anaconda-4.0.0/envs/rdkit/lib/python3.6/site-packages/joblib/parallel.py in __call__(self, iterable)
787 # consumption.
788 self._iterating = False
--> 789 self.retrieve()
790 # Make sure that we get a last message telling us we are done
791 elapsed_time = time.time() - self._start_time
/home/yamasakih/.pyenv/versions/anaconda-4.0.0/envs/rdkit/lib/python3.6/site-packages/joblib/parallel.py in retrieve(self)
697 try:
698 if getattr(self._backend, 'supports_timeout', False):
--> 699 self._output.extend(job.get(timeout=self.timeout))
700 else:
701 self._output.extend(job.get())
/home/yamasakih/.pyenv/versions/anaconda-4.0.0/envs/rdkit/lib/python3.6/multiprocessing/pool.py in get(self, timeout)
606 return self._value
607 else:
--> 608 raise self._value
609
610 def _set(self, i, obj):
/home/yamasakih/.pyenv/versions/anaconda-4.0.0/envs/rdkit/lib/python3.6/multiprocessing/pool.py in _handle_tasks(taskqueue, put, outqueue, pool, cache)
383 break
384 try:
--> 385 put(task)
386 except Exception as e:
387 job, ind = task[:2]
/home/yamasakih/.pyenv/versions/anaconda-4.0.0/envs/rdkit/lib/python3.6/site-packages/joblib/pool.py in send(obj)
369 def send(obj):
370 buffer = BytesIO()
--> 371 CustomizablePickler(buffer, self._reducers).dump(obj)
372 self._writer.send_bytes(buffer.getvalue())
373 self._send = send
PicklingError: Can't pickle <built-in function input>: it's not the same object as builtins.input
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment