A solution to https://stackoverflow.com/questions/55362925/how-can-i-get-intersection-of-two-pandas-series-text-column/55363837#55363837.
Last login: Tue Mar 26 22:42:03 on console
Rishikeshs-MacBook-Air:~ hygull$ python3
Python 3.6.7 (v3.6.7:6ec5cf24b7, Oct 20 2018, 03:02:14)
[GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.57)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import pandas as pd
>>>
>>> df = pd.DataFrame({
... "set": [{"this", "is", "good"}, {"this", "is", "not", "good"}]
... })
>>>
>>> df
set
0 {this, is, good}
1 {not, this, is, good}
>>>
>>> df1 = pd.DataFrame({
... "set": [{"this", "is"}, {"good"}]
... })
>>>
>>> df1
set
0 {this, is}
1 {good}
>>>
>>> df.apply(lambda x: print(x))
0 {this, is, good}
1 {not, this, is, good}
Name: set, dtype: object
set None
dtype: object
>>>
>>> df.apply(lambda x: print("ok", x))
ok 0 {this, is, good}
1 {not, this, is, good}
Name: set, dtype: object
set None
dtype: object
>>>
>>> df.apply(lambda x: print("ok", x), axis=1)
ok set {this, is, good}
Name: 0, dtype: object
ok set {not, this, is, good}
Name: 1, dtype: object
0 None
1 None
dtype: object
>>>
>>> df.apply(lambda c, x: print(c, x), axis=1)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/frame.py", line 6487, in apply
return op.get_result()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 151, in get_result
return self.apply_standard()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 257, in apply_standard
self.apply_series_generator()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 286, in apply_series_generator
results[i] = self.f(v)
TypeError: ("<lambda>() missing 1 required positional argument: 'x'", 'occurred at index 0')
>>>
>>> df.apply(lambda x: print(x.name), axis=1)
0
1
0 None
1 None
dtype: object
>>>
>>> df.loc[0]
set {this, is, good}
Name: 0, dtype: object
>>>
>>> df.apply(lambda row: row.intersection(df1.loc[row.name]), axis=1)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/frame.py", line 6487, in apply
return op.get_result()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 151, in get_result
return self.apply_standard()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 257, in apply_standard
self.apply_series_generator()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 286, in apply_series_generator
results[i] = self.f(v)
File "<stdin>", line 1, in <lambda>
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/generic.py", line 5067, in __getattr__
return object.__getattribute__(self, name)
AttributeError: ("'Series' object has no attribute 'intersection'", 'occurred at index 0')
>>>
>>> df.apply(lambda row: row.intersection(df1.loc[row.name]), axis=1)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/frame.py", line 6487, in apply
return op.get_result()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 151, in get_result
return self.apply_standard()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 257, in apply_standard
self.apply_series_generator()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 286, in apply_series_generator
results[i] = self.f(v)
File "<stdin>", line 1, in <lambda>
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/generic.py", line 5067, in __getattr__
return object.__getattribute__(self, name)
AttributeError: ("'Series' object has no attribute 'intersection'", 'occurred at index 0')
>>>
>>> df.apply(lambda row: print(type(row)), axis=1)
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
0 None
1 None
dtype: object
>>> df.apply(lambda row: print(type(row), row), axis=1)
<class 'pandas.core.series.Series'> set {this, is, good}
Name: 0, dtype: object
<class 'pandas.core.series.Series'> set {not, this, is, good}
Name: 1, dtype: object
0 None
1 None
dtype: object
>>> df.apply(lambda row: row.loc[0].intersection(df1.loc[row.name]), axis=1)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/frame.py", line 6487, in apply
return op.get_result()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 151, in get_result
return self.apply_standard()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 257, in apply_standard
self.apply_series_generator()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 286, in apply_series_generator
results[i] = self.f(v)
File "<stdin>", line 1, in <lambda>
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/indexing.py", line 1500, in __getitem__
return self._getitem_axis(maybe_callable, axis=axis)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/indexing.py", line 1912, in _getitem_axis
self._validate_key(key, axis)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/indexing.py", line 1799, in _validate_key
self._convert_scalar_indexer(key, axis)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/indexing.py", line 262, in _convert_scalar_indexer
return ax._convert_scalar_indexer(key, kind=self.name)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 2881, in _convert_scalar_indexer
return self._invalid_indexer('label', key)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3067, in _invalid_indexer
kind=type(key)))
TypeError: ("cannot do label indexing on <class 'pandas.core.indexes.base.Index'> with these indexers [0] of <class 'int'>", 'occurred at index 0')
>>>
>>> df.apply(lambda row: row[0].intersection(df1.loc[row.name]), axis=1)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/frame.py", line 6487, in apply
return op.get_result()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 151, in get_result
return self.apply_standard()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 257, in apply_standard
self.apply_series_generator()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 286, in apply_series_generator
results[i] = self.f(v)
File "<stdin>", line 1, in <lambda>
TypeError: ("unhashable type: 'set'", 'occurred at index 0')
>>>
>>> df.apply(lambda row: print(row[0]), axis=1)
{'this', 'is', 'good'}
{'not', 'this', 'is', 'good'}
0 None
1 None
dtype: object
>>>
>>> df.apply(lambda row: print(type(row[0])), axis=1)
<class 'set'>
<class 'set'>
0 None
1 None
dtype: object
>>> df.apply(lambda row: print(type(row[0]), df1.loc[row.name]), axis=1)
<class 'set'> set {this, is}
Name: 0, dtype: object
<class 'set'> set {good}
Name: 1, dtype: object
0 None
1 None
dtype: object
>>> df.apply(lambda row: print(type(row[0]), type(df1.loc[row.name])), axis=1)
<class 'set'> <class 'pandas.core.series.Series'>
<class 'set'> <class 'pandas.core.series.Series'>
0 None
1 None
dtype: object
>>> df.apply(lambda row: print(type(row[0]), type(df1.loc[row.name][0])), axis=1)
<class 'set'> <class 'set'>
<class 'set'> <class 'set'>
0 None
1 None
dtype: object
>>>
>>> df.apply(lambda row: row[0].intersection(df1.loc[row.name][0])), axis=1)
File "<stdin>", line 1
df.apply(lambda row: row[0].intersection(df1.loc[row.name][0])), axis=1)
^
SyntaxError: invalid syntax
>>>
>>> df.apply(lambda row: row[0].intersection(df1.loc[row.name][0]), axis=1)
0 {this, is}
1 {good}
dtype: object
>>>
>>>
>>> df1 = pd.DataFrame({
... "set": [{"this", "is"}, {"good", "bad"}]
... })
>>>
>>> df1
set
0 {this, is}
1 {bad, good}
>>>
>>> df.apply(lambda row: row[0].intersection(df1.loc[row.name][0]), axis=1)
0 {this, is}
1 {good}
dtype: object
>>>
>>> df
set
0 {this, is, good}
1 {not, this, is, good}
>>>
>>> df1
set
0 {this, is}
1 {bad, good}
>>>
>>> df.apply(lambda row: row[0].intersection(df1.loc[row.name][0]), axis=1)
0 {this, is}
1 {good}
dtype: object
>>>
>>>