Skip to content

Instantly share code, notes, and snippets.

@paulochf
Last active July 25, 2018 17:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save paulochf/19c14c93156fea2cea24fae06beee841 to your computer and use it in GitHub Desktop.
Save paulochf/19c14c93156fea2cea24fae06beee841 to your computer and use it in GitHub Desktop.
Set intersection between pandas columns
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"ExecuteTime": {
"start_time": "2018-07-25T17:29:02.460157Z",
"end_time": "2018-07-25T17:29:06.353105Z"
},
"trusted": true
},
"cell_type": "code",
"source": "import pandas as pd",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2018-07-25T17:38:47.577188Z",
"end_time": "2018-07-25T17:38:47.583787Z"
},
"trusted": true
},
"cell_type": "code",
"source": "df = pd.DataFrame(columns=[\"A\", \"B\"])",
"execution_count": 18,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2018-07-25T17:38:48.067820Z",
"end_time": "2018-07-25T17:38:48.083702Z"
},
"trusted": true
},
"cell_type": "code",
"source": "for i in range(10):\n df.loc[i, :] = [list(range(i, i+10)), list(range(i+3, i+13))]",
"execution_count": 19,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2018-07-25T17:38:48.491221Z",
"end_time": "2018-07-25T17:38:48.516282Z"
},
"trusted": true
},
"cell_type": "code",
"source": "df",
"execution_count": 20,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 20,
"data": {
"text/plain": " A \\\n0 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] \n1 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] \n2 [2, 3, 4, 5, 6, 7, 8, 9, 10, 11] \n3 [3, 4, 5, 6, 7, 8, 9, 10, 11, 12] \n4 [4, 5, 6, 7, 8, 9, 10, 11, 12, 13] \n5 [5, 6, 7, 8, 9, 10, 11, 12, 13, 14] \n6 [6, 7, 8, 9, 10, 11, 12, 13, 14, 15] \n7 [7, 8, 9, 10, 11, 12, 13, 14, 15, 16] \n8 [8, 9, 10, 11, 12, 13, 14, 15, 16, 17] \n9 [9, 10, 11, 12, 13, 14, 15, 16, 17, 18] \n\n B \n0 [3, 4, 5, 6, 7, 8, 9, 10, 11, 12] \n1 [4, 5, 6, 7, 8, 9, 10, 11, 12, 13] \n2 [5, 6, 7, 8, 9, 10, 11, 12, 13, 14] \n3 [6, 7, 8, 9, 10, 11, 12, 13, 14, 15] \n4 [7, 8, 9, 10, 11, 12, 13, 14, 15, 16] \n5 [8, 9, 10, 11, 12, 13, 14, 15, 16, 17] \n6 [9, 10, 11, 12, 13, 14, 15, 16, 17, 18] \n7 [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] \n8 [11, 12, 13, 14, 15, 16, 17, 18, 19, 20] \n9 [12, 13, 14, 15, 16, 17, 18, 19, 20, 21] ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>A</th>\n <th>B</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]</td>\n <td>[3, 4, 5, 6, 7, 8, 9, 10, 11, 12]</td>\n </tr>\n <tr>\n <th>1</th>\n <td>[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]</td>\n <td>[4, 5, 6, 7, 8, 9, 10, 11, 12, 13]</td>\n </tr>\n <tr>\n <th>2</th>\n <td>[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]</td>\n <td>[5, 6, 7, 8, 9, 10, 11, 12, 13, 14]</td>\n </tr>\n <tr>\n <th>3</th>\n <td>[3, 4, 5, 6, 7, 8, 9, 10, 11, 12]</td>\n <td>[6, 7, 8, 9, 10, 11, 12, 13, 14, 15]</td>\n </tr>\n <tr>\n <th>4</th>\n <td>[4, 5, 6, 7, 8, 9, 10, 11, 12, 13]</td>\n <td>[7, 8, 9, 10, 11, 12, 13, 14, 15, 16]</td>\n </tr>\n <tr>\n <th>5</th>\n <td>[5, 6, 7, 8, 9, 10, 11, 12, 13, 14]</td>\n <td>[8, 9, 10, 11, 12, 13, 14, 15, 16, 17]</td>\n </tr>\n <tr>\n <th>6</th>\n <td>[6, 7, 8, 9, 10, 11, 12, 13, 14, 15]</td>\n <td>[9, 10, 11, 12, 13, 14, 15, 16, 17, 18]</td>\n </tr>\n <tr>\n <th>7</th>\n <td>[7, 8, 9, 10, 11, 12, 13, 14, 15, 16]</td>\n <td>[10, 11, 12, 13, 14, 15, 16, 17, 18, 19]</td>\n </tr>\n <tr>\n <th>8</th>\n <td>[8, 9, 10, 11, 12, 13, 14, 15, 16, 17]</td>\n <td>[11, 12, 13, 14, 15, 16, 17, 18, 19, 20]</td>\n </tr>\n <tr>\n <th>9</th>\n <td>[9, 10, 11, 12, 13, 14, 15, 16, 17, 18]</td>\n <td>[12, 13, 14, 15, 16, 17, 18, 19, 20, 21]</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2018-07-25T17:38:50.576659Z",
"end_time": "2018-07-25T17:38:50.590508Z"
},
"trusted": true
},
"cell_type": "code",
"source": "df.applymap(frozenset).apply(lambda x: x[0] & x[1], axis=1)",
"execution_count": 21,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 21,
"data": {
"text/plain": "0 (3, 4, 5, 6, 7, 8, 9)\n1 (4, 5, 6, 7, 8, 9, 10)\n2 (5, 6, 7, 8, 9, 10, 11)\n3 (6, 7, 8, 9, 10, 11, 12)\n4 (7, 8, 9, 10, 11, 12, 13)\n5 (8, 9, 10, 11, 12, 13, 14)\n6 (9, 10, 11, 12, 13, 14, 15)\n7 (10, 11, 12, 13, 14, 15, 16)\n8 (11, 12, 13, 14, 15, 16, 17)\n9 (12, 13, 14, 15, 16, 17, 18)\ndtype: object"
},
"metadata": {}
}
]
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.7.0",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "Set intersection between pandas columns",
"public": false
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment