Skip to content

Instantly share code, notes, and snippets.

@drorata
Created May 26, 2017 09:29
Show Gist options
  • Save drorata/bfc5d956c4fb928dcc77510a33009691 to your computer and use it in GitHub Desktop.
Save drorata/bfc5d956c4fb928dcc77510a33009691 to your computer and use it in GitHub Desktop.
Comparing numpy arrays and pandas data frames
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import hashlib"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[['42' 'foo' '42']\n",
" ['42' 'foo' 'foo']\n",
" ['42' 'bar' '42']]\n",
" 0 1 2\n",
"0 42 foo 42\n",
"1 42 foo foo\n",
"2 42 bar 42\n",
"52db9328682317c44370b8186a5c6bae75f2a94c9d0d5b24d61f602857acd3de\n",
"02c8520959ae029e3d968a0f4fc2b0d036445578dd80c54d75ccfe7ab0c863bd\n",
"aa93ea3aeae7d27581dbf0ea8ea43250b16e7fe340cd72655a694fc857a2a4be\n",
"942282b41afac938d2d5be69cd1868918c55fa18b96106677f01dfaf816c9cba\n"
]
}
],
"source": [
"np.random.seed(42)\n",
"arr = np.random.choice(['foo', 'bar', 42], size=(3,3))\n",
"df = pd.DataFrame(arr)\n",
"print(arr)\n",
"print(df)\n",
"print(hashlib.sha256(arr.tobytes()).hexdigest())\n",
"print(hashlib.sha256(df.values.tobytes()).hexdigest())\n",
"print(hashlib.sha256(df.to_json().encode()).hexdigest())\n",
"print(hashlib.sha256(df.to_csv().encode()).hexdigest())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([['42', 'foo', '42'],\n",
" ['42', 'foo', 'foo'],\n",
" ['42', 'bar', '42']], \n",
" dtype='<U3')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"arr"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([['42', 'foo', '42'],\n",
" ['42', 'foo', 'foo'],\n",
" ['42', 'bar', '42']], dtype=object)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.values"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.array_equal(arr , df.values )"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[42 41 42]\n",
" [42 41 41]\n",
" [42 43 42]]\n",
" 0 1 2\n",
"0 42 41 42\n",
"1 42 41 41\n",
"2 42 43 42\n",
"ddfee4572d380bef86d3ebe3cb7bfa7c68b7744f55f67f4e1ca5f6872c2c9ba1\n",
"ddfee4572d380bef86d3ebe3cb7bfa7c68b7744f55f67f4e1ca5f6872c2c9ba1\n"
]
}
],
"source": [
"np.random.seed(42)\n",
"arr = np.random.choice([41, 43, 42], size=(3,3))\n",
"df = pd.DataFrame(arr)\n",
"print(arr)\n",
"print(df)\n",
"print(hashlib.sha256(arr.tobytes()).hexdigest())\n",
"print(hashlib.sha256(df.values.tobytes()).hexdigest())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[42, 41, 42],\n",
" [42, 41, 41],\n",
" [42, 43, 42]])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"arr"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[42, 41, 42],\n",
" [42, 41, 41],\n",
" [42, 43, 42]])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.values"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.array_equal(arr, df.values)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment