Skip to content

Instantly share code, notes, and snippets.

@eloraburns
Created March 17, 2013 02:03
Show Gist options
  • Save eloraburns/5179224 to your computer and use it in GitHub Desktop.
Save eloraburns/5179224 to your computer and use it in GitHub Desktop.
from cStringIO import StringIO
log_file = StringIO(
'posix_timestamp,elapsed,sys,user,queries,query_time,rows,'
'accountid,userid,contactid,level,silo,method\n'
'1343103150,0.062353,0,4,6,0.01690,3,'
'12345,1,-1,3,invoice_InvoiceResource,search\n'
)
import pandas as pd
from datetime import datetime
import gc
def posix_string_to_datetime(posix_string):
return datetime.utcfromtimestamp(int(posix_string))
# This works on pandas 0.9.0, but not on 0.10.1 or github master
df = pd.io.parsers.read_csv(
log_file,
# index_col is the first column, our posix_timestamp
index_col=0,
# Interpret the index column as a date
parse_dates=0,
date_parser=posix_string_to_datetime)
# The crash looks like this (on master)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-2-c3fa4840399b> in <module>()
17 # Interpret the index column as a date
18 parse_dates=0,
---> 19 date_parser=posix_string_to_datetime)
20 gc.enable()
/Users/taavi/src/pandas/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, nrows, iterator, chunksize, verbose, encoding, squeeze)
398 buffer_lines=buffer_lines)
399
--> 400 return _read(filepath_or_buffer, kwds)
401
402 parser_f.__name__ = name
/Users/taavi/src/pandas/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
213 return parser
214
--> 215 return parser.read()
216
217 _parser_defaults = {
/Users/taavi/src/pandas/pandas/io/parsers.pyc in read(self, nrows)
630 # self._engine.set_error_bad_lines(False)
631
--> 632 ret = self._engine.read(nrows)
633
634 if self.options.get('as_recarray'):
/Users/taavi/src/pandas/pandas/io/parsers.pyc in read(self, nrows)
1006
1007 names, data = self._do_date_conversions(names, data)
-> 1008 index = self._make_index(data, alldata, names)
1009
1010 return index, names, data
/Users/taavi/src/pandas/pandas/io/parsers.pyc in _make_index(self, data, alldata, columns)
706 elif not self._has_complex_date_col:
707 index = self._get_simple_index(alldata, columns)
--> 708 index = self._agg_index(index)
709
710 elif self._has_complex_date_col:
/Users/taavi/src/pandas/pandas/io/parsers.pyc in _agg_index(self, index, try_parse_dates)
789 self.na_values)
790
--> 791 arr, _ = self._convert_types(arr, col_na_values)
792 arrays.append(arr)
793
/Users/taavi/src/pandas/pandas/io/parsers.pyc in _convert_types(self, values, na_values, try_num_bool)
815 def _convert_types(self, values, na_values, try_num_bool=True):
816 na_count = 0
--> 817 if issubclass(values.dtype.type, (np.number, np.bool_)):
818 mask = lib.ismember(values, na_values)
819 na_count = mask.sum()
AttributeError: 'datetime.datetime' object has no attribute 'dtype'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment