Last active
April 24, 2017 20:01
-
-
Save captainsafia/49d475947ffbeb118e4c1dd756a2201a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In [1]: # What's the fastest way to convert a column of minutes to hours? | |
In [2]: import pandas as pd | |
In [3]: import numpy as np | |
In [4]: minutes = pd.Series(np.random.randint(0, 1440, size=(20000))) | |
In [5]: minutes.shape | |
Out[5]: (20000,) | |
In [6]: minutes.head(5) | |
Out[6]: | |
0 901 | |
1 263 | |
2 726 | |
3 971 | |
4 313 | |
dtype: int64 | |
In [7]: 901 / 60 | |
Out[7]: 15 | |
In [8]: 901 / 60.0 | |
Out[8]: 15.016666666666667 | |
In [9]: minutes.apply(lambda x: x / 60.0) | |
Out[9]: | |
0 15.016667 | |
1 4.383333 | |
2 12.100000 | |
3 16.183333 | |
4 5.216667 | |
5 16.533333 | |
6 0.433333 | |
7 19.500000 | |
8 23.750000 | |
9 13.150000 | |
10 22.216667 | |
11 20.883333 | |
12 11.683333 | |
13 18.816667 | |
14 8.916667 | |
15 19.416667 | |
16 9.500000 | |
17 4.366667 | |
18 4.750000 | |
19 19.366667 | |
20 18.783333 | |
21 18.183333 | |
22 23.783333 | |
23 6.533333 | |
24 8.633333 | |
25 17.866667 | |
26 12.833333 | |
27 19.950000 | |
28 8.533333 | |
29 3.433333 | |
... | |
19970 12.266667 | |
19971 19.183333 | |
19972 1.683333 | |
19973 4.466667 | |
19974 20.666667 | |
19975 19.083333 | |
19976 5.200000 | |
19977 4.733333 | |
19978 9.383333 | |
19979 9.483333 | |
19980 8.300000 | |
19981 2.083333 | |
19982 4.583333 | |
19983 6.916667 | |
19984 3.616667 | |
19985 21.083333 | |
19986 4.416667 | |
19987 1.933333 | |
19988 19.800000 | |
19989 18.350000 | |
19990 15.233333 | |
19991 12.366667 | |
19992 7.250000 | |
19993 21.583333 | |
19994 1.983333 | |
19995 4.566667 | |
19996 23.433333 | |
19997 13.750000 | |
19998 4.433333 | |
19999 10.216667 | |
dtype: float64 | |
In [10]: %timeit mi | |
min minutes | |
In [10]: %timeit minutes.apply(lambda x: x / 60.0) | |
100 loops, best of 3: 5.49 ms per loop | |
In [11]: minutes.map(lambda x: x / 60.0) | |
Out[11]: | |
0 15.016667 | |
1 4.383333 | |
2 12.100000 | |
3 16.183333 | |
4 5.216667 | |
5 16.533333 | |
6 0.433333 | |
7 19.500000 | |
8 23.750000 | |
9 13.150000 | |
10 22.216667 | |
11 20.883333 | |
12 11.683333 | |
13 18.816667 | |
14 8.916667 | |
15 19.416667 | |
16 9.500000 | |
17 4.366667 | |
18 4.750000 | |
19 19.366667 | |
20 18.783333 | |
21 18.183333 | |
22 23.783333 | |
23 6.533333 | |
24 8.633333 | |
25 17.866667 | |
26 12.833333 | |
27 19.950000 | |
28 8.533333 | |
29 3.433333 | |
... | |
19970 12.266667 | |
19971 19.183333 | |
19972 1.683333 | |
19973 4.466667 | |
19974 20.666667 | |
19975 19.083333 | |
19976 5.200000 | |
19977 4.733333 | |
19978 9.383333 | |
19979 9.483333 | |
19980 8.300000 | |
19981 2.083333 | |
19982 4.583333 | |
19983 6.916667 | |
19984 3.616667 | |
19985 21.083333 | |
19986 4.416667 | |
19987 1.933333 | |
19988 19.800000 | |
19989 18.350000 | |
19990 15.233333 | |
19991 12.366667 | |
19992 7.250000 | |
19993 21.583333 | |
19994 1.983333 | |
19995 4.566667 | |
19996 23.433333 | |
19997 13.750000 | |
19998 4.433333 | |
19999 10.216667 | |
dtype: float64 | |
In [12]: %timeit minutes.map(lambda x: x / 60.0) | |
100 loops, best of 3: 5.59 ms per loop | |
In [13]: minutes / 60.0 | |
Out[13]: | |
0 15.016667 | |
1 4.383333 | |
2 12.100000 | |
3 16.183333 | |
4 5.216667 | |
5 16.533333 | |
6 0.433333 | |
7 19.500000 | |
8 23.750000 | |
9 13.150000 | |
10 22.216667 | |
11 20.883333 | |
12 11.683333 | |
13 18.816667 | |
14 8.916667 | |
15 19.416667 | |
16 9.500000 | |
17 4.366667 | |
18 4.750000 | |
19 19.366667 | |
20 18.783333 | |
21 18.183333 | |
22 23.783333 | |
23 6.533333 | |
24 8.633333 | |
25 17.866667 | |
26 12.833333 | |
27 19.950000 | |
28 8.533333 | |
29 3.433333 | |
... | |
19970 12.266667 | |
19971 19.183333 | |
19972 1.683333 | |
19973 4.466667 | |
19974 20.666667 | |
19975 19.083333 | |
19976 5.200000 | |
19977 4.733333 | |
19978 9.383333 | |
19979 9.483333 | |
19980 8.300000 | |
19981 2.083333 | |
19982 4.583333 | |
19983 6.916667 | |
19984 3.616667 | |
19985 21.083333 | |
19986 4.416667 | |
19987 1.933333 | |
19988 19.800000 | |
19989 18.350000 | |
19990 15.233333 | |
19991 12.366667 | |
19992 7.250000 | |
19993 21.583333 | |
19994 1.983333 | |
19995 4.566667 | |
19996 23.433333 | |
19997 13.750000 | |
19998 4.433333 | |
19999 10.216667 | |
dtype: float64 | |
In [14]: %timeit minutes / 60.0 | |
10000 loops, best of 3: 109 µs per loop | |
In [15]: # map = element wise; apply = row/column wise | |
In [16]: hours = minutes / 60.0 | |
In [17]: hours.head(5) | |
Out[17]: | |
0 15.016667 | |
1 4.383333 | |
2 12.100000 | |
3 16.183333 | |
4 5.216667 | |
dtype: float64 | |
In [18]: minutes = pd.Series(np.random.randint(0, 1440, size=(1000000))) | |
In [19]: %timeit minutes / 60.0 | |
100 loops, best of 3: 4.58 ms per loop | |
In [20]: %timeit minutes.apply(lambda x: x / 60.0) | |
1 loop, best of 3: 280 ms per loop | |
In [21]: %timeit minutes.map(lambda x: x / 60.0) | |
1 loop, best of 3: 279 ms per loop | |
In [22]: %timeit minutes = minutes / 60.0 | |
--------------------------------------------------------------------------- | |
UnboundLocalError Traceback (most recent call last) | |
<ipython-input-22-f16539e491d2> in <module>() | |
----> 1 get_ipython().magic(u'timeit minutes = minutes / 60.0') | |
/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in magic(self, arg_s) | |
2161 magic_name, _, magic_arg_s = arg_s.partition(' ') | |
2162 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC) | |
-> 2163 return self.run_line_magic(magic_name, magic_arg_s) | |
2164 | |
2165 #------------------------------------------------------------------------- | |
/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line) | |
2082 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals | |
2083 with self.builtin_trap: | |
-> 2084 result = fn(*args,**kwargs) | |
2085 return result | |
2086 | |
<decorator-gen-59> in timeit(self, line, cell) | |
/usr/local/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k) | |
191 # but it's overkill for just that one bit of state. | |
192 def magic_deco(arg): | |
--> 193 call = lambda f, *a, **k: f(*a, **k) | |
194 | |
195 if callable(arg): | |
/usr/local/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in timeit(self, line, cell) | |
1039 number = 1 | |
1040 for _ in range(1, 10): | |
-> 1041 time_number = timer.timeit(number) | |
1042 worst_tuning = max(worst_tuning, time_number / number) | |
1043 if time_number >= 0.2: | |
/usr/local/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in timeit(self, number) | |
135 gc.disable() | |
136 try: | |
--> 137 timing = self.inner(it, self.timer) | |
138 finally: | |
139 if gcold: | |
<magic-timeit> in inner(_it, _timer) | |
UnboundLocalError: local variable 'minutes' referenced before assignment | |
In [23]: %timeit hours = minutes / 60.0 | |
100 loops, best of 3: 4.86 ms per loop | |
In [24]: %timeit minutes / 60.0 | |
100 loops, best of 3: 4.65 ms per loop | |
In [25]: minutes | |
Out[25]: | |
0 570 | |
1 592 | |
2 570 | |
3 688 | |
4 36 | |
5 243 | |
6 252 | |
7 1194 | |
8 1240 | |
9 1048 | |
10 945 | |
11 204 | |
12 143 | |
13 503 | |
14 734 | |
15 1356 | |
16 462 | |
17 996 | |
18 304 | |
19 354 | |
20 1235 | |
21 1106 | |
22 780 | |
23 14 | |
24 178 | |
25 424 | |
26 1402 | |
27 211 | |
28 141 | |
29 291 | |
... | |
999970 1243 | |
999971 462 | |
999972 1081 | |
999973 1272 | |
999974 89 | |
999975 551 | |
999976 1278 | |
999977 1188 | |
999978 1403 | |
999979 1158 | |
999980 1429 | |
999981 480 | |
999982 1333 | |
999983 586 | |
999984 847 | |
In [28]: minutes | |
Out[28]: | |
0 570 | |
1 592 | |
2 570 | |
3 688 | |
4 36 | |
5 243 | |
6 252 | |
7 1194 | |
8 1240 | |
9 1048 | |
10 945 | |
11 204 | |
12 143 | |
13 503 | |
14 734 | |
15 1356 | |
16 462 | |
17 996 | |
18 304 | |
19 354 | |
20 1235 | |
21 1106 | |
22 780 | |
23 14 | |
24 178 | |
25 424 | |
26 1402 | |
27 211 | |
28 141 | |
29 291 | |
... | |
999970 1243 | |
999971 462 | |
999972 1081 | |
999973 1272 | |
999974 89 | |
999975 551 | |
999976 1278 | |
999977 1188 | |
999978 1403 | |
999979 1158 | |
999980 1429 | |
999981 480 | |
999982 1333 | |
999983 586 | |
999984 847 | |
999985 209 | |
999986 147 | |
999987 354 | |
999988 232 | |
999989 621 | |
999990 659 | |
999991 1047 | |
999992 923 | |
999993 42 | |
999994 916 | |
999995 834 | |
999996 336 | |
999997 1388 | |
999998 1410 | |
999999 1083 | |
dtype: int64 | |
In [29]: minutes = pd.Series(np.random.randint(0, 1440, size=(500000))) | |
In [30]: minutes <= 800 | |
Out[30]: | |
0 True | |
1 False | |
2 True | |
3 False | |
4 True | |
5 True | |
6 True | |
7 True | |
8 False | |
9 False | |
10 True | |
11 False | |
12 False | |
13 False | |
14 True | |
15 True | |
16 False | |
17 True | |
18 False | |
19 True | |
20 False | |
21 False | |
22 False | |
23 False | |
24 True | |
25 True | |
26 False | |
27 True | |
28 False | |
29 False | |
... | |
499970 True | |
499971 True | |
499972 False | |
499973 True | |
499974 False | |
499975 True | |
499976 False | |
499977 False | |
499978 True | |
499979 True | |
499980 True | |
499981 False | |
499982 True | |
499983 False | |
499984 False | |
499985 False | |
499986 True | |
499987 True | |
499988 False | |
499989 True | |
499990 True | |
499991 False | |
499992 True | |
499993 False | |
499994 False | |
499995 False | |
499996 True | |
499997 False | |
499998 True | |
499999 True | |
dtype: bool | |
In [31]: minutes[minutes <= 800] | |
Out[31]: | |
0 777 | |
2 462 | |
4 699 | |
5 291 | |
6 420 | |
7 418 | |
10 169 | |
14 216 | |
15 438 | |
17 176 | |
19 3 | |
24 55 | |
25 762 | |
27 343 | |
31 485 | |
32 258 | |
34 779 | |
37 334 | |
38 766 | |
39 666 | |
42 267 | |
43 131 | |
45 646 | |
46 92 | |
48 144 | |
49 563 | |
50 254 | |
53 237 | |
54 650 | |
61 269 | |
... | |
499952 532 | |
499954 757 | |
499955 326 | |
499956 396 | |
499957 64 | |
499960 747 | |
499961 761 | |
499962 587 | |
499964 671 | |
499965 662 | |
499966 335 | |
499967 113 | |
499968 138 | |
499969 605 | |
499970 358 | |
499971 157 | |
499973 600 | |
499975 208 | |
499978 519 | |
499979 705 | |
499980 140 | |
499982 733 | |
499986 195 | |
499987 35 | |
499989 304 | |
499990 738 | |
499992 666 | |
499996 567 | |
499998 93 | |
499999 38 | |
dtype: int64 | |
In [32]: minutes.drop(minutes <= 800) | |
Out[32]: | |
2 462 | |
3 1297 | |
4 699 | |
5 291 | |
6 420 | |
7 418 | |
8 1075 | |
9 895 | |
10 169 | |
11 1338 | |
12 855 | |
13 1306 | |
14 216 | |
15 438 | |
16 1152 | |
17 176 | |
18 1352 | |
19 3 | |
20 1014 | |
21 1322 | |
22 1171 | |
23 912 | |
24 55 | |
25 762 | |
26 1257 | |
27 343 | |
28 950 | |
29 1151 | |
30 1262 | |
31 485 | |
... | |
499970 358 | |
499971 157 | |
499972 855 | |
499973 600 | |
499974 833 | |
499975 208 | |
499976 1289 | |
499977 987 | |
499978 519 | |
499979 705 | |
499980 140 | |
499981 1140 | |
499982 733 | |
499983 1009 | |
499984 1019 | |
499985 1388 | |
499986 195 | |
499987 35 | |
499988 1100 | |
499989 304 | |
499990 738 | |
499991 1253 | |
499992 666 | |
499993 1274 | |
499994 970 | |
499995 1183 | |
499996 567 | |
499997 1049 | |
499998 93 | |
499999 38 | |
dtype: int64 | |
In [36]: minutes = pd.DataFrame(np.random.randint(0, 1440, size=(500000, 3)), columns='ABC') | |
--------------------------------------------------------------------------- | |
TypeError Traceback (most recent call last) | |
<ipython-input-36-97597bebafc5> in <module>() | |
----> 1 minutes = pd.DataFrame(np.random.randint(0, 1440, size=(500000, 3)), columns='ABC') | |
/usr/local/lib/python2.7/site-packages/pandas/core/frame.pyc in __init__(self, data, index, columns, dtype, copy) | |
253 else: | |
254 mgr = self._init_ndarray(data, index, columns, dtype=dtype, | |
--> 255 copy=copy) | |
256 elif isinstance(data, (list, types.GeneratorType)): | |
257 if isinstance(data, types.GeneratorType): | |
/usr/local/lib/python2.7/site-packages/pandas/core/frame.pyc in _init_ndarray(self, values, index, columns, dtype, copy) | |
421 raise_with_traceback(e) | |
422 | |
--> 423 index, columns = _get_axes(*values.shape) | |
424 values = values.T | |
425 | |
/usr/local/lib/python2.7/site-packages/pandas/core/frame.pyc in _get_axes(N, K, index, columns) | |
388 columns = _default_index(K) | |
389 else: | |
--> 390 columns = _ensure_index(columns) | |
391 return index, columns | |
392 | |
/usr/local/lib/python2.7/site-packages/pandas/indexes/base.pyc in _ensure_index(index_like, copy) | |
3407 index_like = copy(index_like) | |
3408 | |
-> 3409 return Index(index_like) | |
3410 | |
3411 | |
/usr/local/lib/python2.7/site-packages/pandas/indexes/base.pyc in __new__(cls, data, dtype, copy, name, fastpath, tupleize_cols, **kwargs) | |
266 **kwargs) | |
267 elif data is None or lib.isscalar(data): | |
--> 268 cls._scalar_data_error(data) | |
269 else: | |
270 if (tupleize_cols and isinstance(data, list) and data and | |
/usr/local/lib/python2.7/site-packages/pandas/indexes/base.pyc in _scalar_data_error(cls, data) | |
481 raise TypeError('{0}(...) must be called with a collection of some ' | |
482 'kind, {1} was passed'.format(cls.__name__, | |
--> 483 repr(data))) | |
484 | |
485 @classmethod | |
TypeError: Index(...) must be called with a collection of some kind, 'ABC' was passed | |
In [37]: minutes = pd.DataFrame(np.random.randint(0, 1440, size=(500000, 3)), columns=['A', 'B', 'C']) | |
In [38]: minutes | |
Out[38]: | |
A B C | |
0 563 1201 987 | |
1 858 570 926 | |
2 1235 277 60 | |
3 597 1326 971 | |
4 634 1286 1092 | |
5 681 675 1085 | |
6 489 433 1415 | |
7 226 904 1351 | |
8 593 547 414 | |
9 1086 256 1070 | |
10 137 827 188 | |
11 1127 684 779 | |
12 476 888 1041 | |
13 398 205 865 | |
14 531 1042 996 | |
15 605 909 304 | |
16 1178 1372 195 | |
17 24 817 314 | |
18 90 1078 1332 | |
19 3 525 736 | |
20 715 372 951 | |
21 1111 842 231 | |
22 397 809 1033 | |
23 188 1353 790 | |
24 308 220 244 | |
25 586 804 664 | |
26 31 1430 900 | |
27 593 691 874 | |
28 179 1320 633 | |
29 1268 1380 663 | |
... ... ... ... | |
499970 997 404 499 | |
499971 106 93 606 | |
499972 1130 1187 1155 | |
499973 163 1287 416 | |
499974 261 88 1111 | |
499975 1335 1357 969 | |
499976 711 1301 213 | |
499977 1369 705 882 | |
499978 536 697 1205 | |
499979 709 1011 868 | |
499980 120 494 1314 | |
499981 604 1323 381 | |
499982 1398 598 1292 | |
499983 16 1244 1091 | |
499984 249 1008 426 | |
499985 1 181 386 | |
499986 123 1277 1140 | |
499987 531 870 1086 | |
499988 372 906 353 | |
499989 545 824 489 | |
499990 1127 1422 436 | |
499991 994 188 552 | |
499992 835 1343 609 | |
499993 905 1430 483 | |
499994 124 1260 1159 | |
499995 139 1 48 | |
499996 829 277 542 | |
499997 870 1208 1173 | |
499998 1083 599 527 | |
499999 1057 940 87 | |
[500000 rows x 3 columns] | |
In [39]: start_date = '04-24-2016' | |
In [40]: end_data = '04-24-2017' | |
In [41]: date_index = pd.date_range(start_date, end_date) | |
--------------------------------------------------------------------------- | |
NameError Traceback (most recent call last) | |
<ipython-input-41-e81a21d8cc55> in <module>() | |
----> 1 date_index = pd.date_range(start_date, end_date) | |
NameError: name 'end_date' is not defined | |
In [42]: end_date = '04-24-2017' | |
In [43]: date_index = pd.date_range(start_date, end_date) | |
In [44]: date_index | |
Out[44]: | |
DatetimeIndex(['2016-04-24', '2016-04-25', '2016-04-26', '2016-04-27', | |
'2016-04-28', '2016-04-29', '2016-04-30', '2016-05-01', | |
'2016-05-02', '2016-05-03', | |
... | |
'2017-04-15', '2017-04-16', '2017-04-17', '2017-04-18', | |
'2017-04-19', '2017-04-20', '2017-04-21', '2017-04-22', | |
'2017-04-23', '2017-04-24'], | |
dtype='datetime64[ns]', length=366, freq='D') | |
In [45]: current_data = pd.Series({'04-28-2016': 6, '05-28-2016': 5, '12-27-2016': 7, '04-23-2017': 5}) | |
In [46]: current_data | |
Out[46]: | |
04-23-2017 5 | |
04-28-2016 6 | |
05-28-2016 5 | |
12-27-2016 7 | |
dtype: int64 | |
In [47]: current_data.index = pd.DateTimeIndex(current_data.index) | |
--------------------------------------------------------------------------- | |
AttributeError Traceback (most recent call last) | |
<ipython-input-47-0fd208f594aa> in <module>() | |
----> 1 current_data.index = pd.DateTimeIndex(current_data.index) | |
AttributeError: 'module' object has no attribute 'DateTimeIndex' | |
In [48]: current_data.index = pd.DatetimeIndex(current_data.index) | |
In [49]: current_Data | |
--------------------------------------------------------------------------- | |
NameError Traceback (most recent call last) | |
<ipython-input-49-6c816f2643d3> in <module>() | |
----> 1 current_Data | |
NameError: name 'current_Data' is not defined | |
In [50]: current_data | |
Out[50]: | |
2017-04-23 5 | |
2016-04-28 6 | |
2016-05-28 5 | |
2016-12-27 7 | |
dtype: int64 | |
In [51]: current_data = current_data.reindex(date_index, fill_value=0) | |
In [52]: current_data | |
Out[52]: | |
2016-04-24 0 | |
2016-04-25 0 | |
2016-04-26 0 | |
2016-04-27 0 | |
2016-04-28 6 | |
2016-04-29 0 | |
2016-04-30 0 | |
2016-05-01 0 | |
2016-05-02 0 | |
2016-05-03 0 | |
2016-05-04 0 | |
2016-05-05 0 | |
2016-05-06 0 | |
2016-05-07 0 | |
2016-05-08 0 | |
2016-05-09 0 | |
2016-05-10 0 | |
2016-05-11 0 | |
2016-05-12 0 | |
2016-05-13 0 | |
2016-05-14 0 | |
2016-05-15 0 | |
2016-05-16 0 | |
2016-05-17 0 | |
2016-05-18 0 | |
2016-05-19 0 | |
2016-05-20 0 | |
2016-05-21 0 | |
2016-05-22 0 | |
2016-05-23 0 | |
.. | |
2017-03-26 0 | |
2017-03-27 0 | |
2017-03-28 0 | |
2017-03-29 0 | |
2017-03-30 0 | |
2017-03-31 0 | |
2017-04-01 0 | |
2017-04-02 0 | |
2017-04-03 0 | |
2017-04-04 0 | |
2017-04-05 0 | |
2017-04-06 0 | |
In [54]: # column = value; parity = 1/0 where 1 if data is OK; output remove parity and set column to NaN if necessary | |
In [55]: cleaning = pd.DataFame.from_dict({'values | |
File "<ipython-input-55-2eb6a38dc553>", line 1 | |
cleaning = pd.DataFame.from_dict({'values | |
^ | |
SyntaxError: EOL while scanning string literal | |
In [56]: cleaning = pd.DataFame.from_dict({'values': [30, 20, 40, 50, 60], 'parity': [1, 0, 1, 1, 0]}) | |
--------------------------------------------------------------------------- | |
AttributeError Traceback (most recent call last) | |
<ipython-input-56-60e27d20fc3d> in <module>() | |
----> 1 cleaning = pd.DataFame.from_dict({'values': [30, 20, 40, 50, 60], 'parity': [1, 0, 1, 1, 0]}) | |
AttributeError: 'module' object has no attribute 'DataFame' | |
In [57]: cleaning = pd.DataFrame.from_dict({'values': [30, 20, 40, 50, 60], 'parity | |
': [1, 0, 1, 1, 0]}) | |
In [58]: cleaning | |
Out[58]: | |
parity values | |
0 1 30 | |
1 0 20 | |
2 1 40 | |
3 1 50 | |
4 0 60 | |
In [59]: cleaning['values'][cleaning.parity == 1] | |
Out[59]: | |
0 30 | |
2 40 | |
3 50 | |
Name: values, dtype: int64 | |
In [60]: cleaning['values'][cleaning.parity == 1] = None | |
In [61]: cleaning | |
Out[61]: | |
parity values | |
0 1 NaN | |
1 0 20.0 | |
2 1 NaN | |
3 1 NaN | |
4 0 60.0 | |
In [62]: cleaning = cleaning.drop('partiy', 1) | |
--------------------------------------------------------------------------- | |
ValueError Traceback (most recent call last) | |
<ipython-input-62-392d092b74f7> in <module>() | |
----> 1 cleaning = cleaning.drop('partiy', 1) | |
/usr/local/lib/python2.7/site-packages/pandas/core/generic.pyc in drop(self, labels, axis, level, inplace, errors) | |
1875 new_axis = axis.drop(labels, level=level, errors=errors) | |
1876 else: | |
-> 1877 new_axis = axis.drop(labels, errors=errors) | |
1878 dropped = self.reindex(**{axis_name: new_axis}) | |
1879 try: | |
/usr/local/lib/python2.7/site-packages/pandas/indexes/base.pyc in drop(self, labels, errors) | |
3049 if errors != 'ignore': | |
3050 raise ValueError('labels %s not contained in axis' % | |
-> 3051 labels[mask]) | |
3052 indexer = indexer[~mask] | |
3053 return self.delete(indexer) | |
ValueError: labels ['partiy'] not contained in axis | |
In [63]: cleaning = cleaning.drop('parity', 1) | |
In [64]: cleaning | |
Out[64]: | |
values | |
0 NaN | |
1 20.0 | |
2 NaN | |
3 NaN | |
4 60.0 | |
In [65]: cleaning = pd.DataFrame.from_dict({'values': [30, 20, 40, 50, 60], 'parity': [1, 0, 1, 1, 0]}) | |
In [66]: cleaning['values'][cleaning.parity == 1] | |
Out[66]: | |
0 30 | |
2 40 | |
3 50 | |
Name: values, dtype: int64 | |
In [67]: cleaning.loc[cleaning.parity == 1, 'values'] | |
Out[67]: | |
0 30 | |
2 40 | |
3 50 | |
Name: values, dtype: int64 | |
In [68]: %timeit cleaning['values'][cleaning.parity == 1] | |
1000 loops, best of 3: 582 µs per loop | |
In [69]: %timeit cleaning.loc[cleaning.parity == 1, 'values'] | |
1000 loops, best of 3: 314 µs per loop | |
In [70]: type(cleaning.loc[cleaning.parity == 1, 'values']) | |
Out[70]: pandas.core.series.Series | |
In [71]: type(cleaning.loc[cleaning.parity == 1]) | |
Out[71]: pandas.core.frame.DataFrame | |
In [72]: class AwesomeData: | |
....: def __init__(self, data): | |
....: self.df = pd.DataFrame(data) | |
....: | |
In [73]: a_data = AwesomeData([1, 2, 3, 4]) | |
In [74]: a_data.df.my_special_df_function() | |
--------------------------------------------------------------------------- | |
AttributeError Traceback (most recent call last) | |
<ipython-input-74-83fbe86d639f> in <module>() | |
----> 1 a_data.df.my_special_df_function() | |
/usr/local/lib/python2.7/site-packages/pandas/core/generic.pyc in __getattr__(self, name) | |
2670 if name in self._info_axis: | |
2671 return self[name] | |
-> 2672 return object.__getattribute__(self, name) | |
2673 | |
2674 def __setattr__(self, name, value): | |
AttributeError: 'DataFrame' object has no attribute 'my_special_df_function' | |
In [75]: a_data.my_special_df_function() | |
--------------------------------------------------------------------------- | |
AttributeError Traceback (most recent call last) | |
<ipython-input-75-40ac7c0b9cef> in <module>() | |
----> 1 a_data.my_special_df_function() | |
AttributeError: AwesomeData instance has no attribute 'my_special_df_function' | |
In [76]: class CoolData(): | |
....: def __init__(self,data): | |
....: self.df = pd.DataFrame(data) | |
....: def get_size(self): | |
....: return self.df.shape | |
....: | |
In [77]: a_data = CoolData([1, 2, 3, 4]) | |
In [78]: a_data.get_size() | |
Out[78]: (4, 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment