Skip to content

Instantly share code, notes, and snippets.

@tdhopper
Last active December 23, 2015 22:39
Show Gist options
  • Save tdhopper/6705042 to your computer and use it in GitHub Desktop.
Save tdhopper/6705042 to your computer and use it in GitHub Desktop.
Causes error. Seems to be related to type.
import pandas as pd
from sklearn_pandas import DataFrameMapper
from sklearn.feature_extraction.text import CountVectorizer
df = pd.DataFrame({"content":["tim hopper", "this data tim"]})
mapper = DataFrameMapper([("content", CountVectorizer())])
mapper.fit_transform(df)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-7-01896d0e98b9> in <module>()
5 df = pd.DataFrame({"content":["tim hopper", "this data tim"]})
6 mapper = DataFrameMapper([("content", CountVectorizer())])
----> 7 mapper.fit_transform(df)
C:\Anaconda\lib\site-packages\sklearn\base.pyc in fit_transform(self, X, y, **fit_params)
406 if y is None:
407 # fit method of arity 1 (unsupervised transformation)
--> 408 return self.fit(X, **fit_params).transform(X)
409 else:
410 # fit method of arity 2 (supervised transformation)
C:\Anaconda\lib\site-packages\sklearn_pandas\__init__.pyc in fit(self, X, y)
100 for columns, transformer in self.features:
101 if transformer is not None:
--> 102 transformer.fit(self._get_col_subset(X, columns))
103 return self
104
C:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.pyc in fit(self, raw_documents, y)
754 self
755 """
--> 756 self.fit_transform(raw_documents)
757 return self
758
C:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.pyc in fit_transform(self, raw_documents, y)
778 max_features = self.max_features
779
--> 780 vocabulary, X = self._count_vocab(raw_documents, self.fixed_vocabulary)
781 X = X.tocsc()
782
C:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.pyc in _count_vocab(self, raw_documents, fixed_vocab)
713 indptr.append(0)
714 for doc in raw_documents:
--> 715 for feature in analyze(doc):
716 try:
717 j_indices.append(vocabulary[feature])
C:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.pyc in <lambda>(doc)
227
228 return lambda doc: self._word_ngrams(
--> 229 tokenize(preprocess(self.decode(doc))), stop_words)
230
231 else:
C:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.pyc in <lambda>(x)
193
194 if self.lowercase:
--> 195 return lambda x: strip_accents(x.lower())
196 else:
197 return strip_accents
AttributeError: 'numpy.ndarray' object has no attribute 'lower'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment