abhirk/tfidf

## tfidf
In [34]: cProfile.run("vectorizer.transform(input_txt)")
         8676327 function calls (8676325 primitive calls) in 10.875 CPU seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   10.875   10.875 <string>:1(<module>)
        2    0.000    0.000    0.717    0.359 base.py:178(asformat)
        1    0.000    0.000    0.719    0.719 base.py:229(__mul__)
        7    0.000    0.000    0.000    0.000 base.py:51(__init__)
        1    1.193    1.193    7.473    7.473 base.py:529(setdiag)
        9    0.000    0.000    0.000    0.000 base.py:553(isspmatrix)
        7    0.000    0.000    0.000    0.000 base.py:59(set_shape)
   476299    0.109    0.000    0.109    0.000 base.py:81(get_shape)
        5    0.000    0.000    0.001    0.000 compressed.py:101(check_format)
      5/3    0.000    0.000    0.718    0.239 compressed.py:20(__init__)
        1    0.000    0.000    0.719    0.719 compressed.py:276(_mul_sparse_matrix)
        5    0.000    0.000    0.000    0.000 compressed.py:622(prune)
       20    0.000    0.000    0.000    0.000 compressed.py:85(getnnz)
        2    0.000    0.000    0.000    0.000 compressed.py:90(_set_self)
        1    0.000    0.000    0.000    0.000 coo.py:115(__init__)
        2    0.000    0.000    0.000    0.000 coo.py:194(getnnz)
        1    0.000    0.000    0.000    0.000 coo.py:205(_check)
        1    0.000    0.000    0.000    0.000 coo.py:281(tocsr)
        1    0.000    0.000    0.000    0.000 csr.py:129(tocsr)
       17    0.000    0.000    0.000    0.000 csr.py:180(_swap)
        1    0.000    0.000    0.001    0.001 csr.py:244(csr_matmat_pass2)
        1    0.000    0.000    0.000    0.000 csr.py:74(csr_matmat_pass1)
        6    0.000    0.000    0.000    0.000 data.py:17(__init__)
        6    0.000    0.000    0.000    0.000 data.py:20(_get_dtype)
     6441    0.023    0.000    0.034    0.000 fixes.py:22(__init__)
     6441    0.009    0.000    0.012    0.000 fixes.py:29(update)
        1    0.000    0.000    0.002    0.002 fromnumeric.py:1643(cumsum)
       25    0.000    0.000    0.000    0.000 fromnumeric.py:2116(rank)
   476268    2.661    0.000    3.647    0.000 lil.py:244(_insertat2)
   476268    1.241    0.000    6.281    0.000 lil.py:307(__setitem__)
        1    0.341    0.341    0.717    0.717 lil.py:441(tocsr)
        1    2.394    2.394    2.407    2.407 lil.py:77(__init__)
        1    0.000    0.000    0.000    0.000 memmap.py:254(__array_finalize__)
        1    0.000    0.000    0.000    0.000 memmap.py:290(__del__)
  1905073    1.084    0.000    1.788    0.000 numeric.py:1574(isscalar)
       26    0.000    0.000    0.226    0.009 numeric.py:167(asarray)
        1    0.000    0.000    0.000    0.000 numerictypes.py:665(issubclass_)
        1    0.000    0.000    0.000    0.000 numerictypes.py:733(issubdtype)
        1    0.000    0.000    0.000    0.000 preprocessing.py:289(normalize)
        1    0.000    0.000    0.000    0.000 re.py:188(compile)
        1    0.000    0.000    0.000    0.000 re.py:229(_compile)
        5    0.000    0.000    0.000    0.000 sputils.py:111(issequence)
       10    0.000    0.000    0.000    0.000 sputils.py:116(_isinstance)
        1    0.000    0.000    0.000    0.000 sputils.py:124(isdense)
        1    0.000    0.000    0.000    0.000 sputils.py:18(upcast)
        6    0.000    0.000    0.000    0.000 sputils.py:50(to_native)
        4    0.000    0.000    0.000    0.000 sputils.py:54(getdtype)
        1    0.000    0.000    0.000    0.000 sputils.py:77(isscalarlike)
        5    0.000    0.000    0.000    0.000 sputils.py:81(isintlike)
        5    0.000    0.000    0.000    0.000 sputils.py:96(isshape)
     6441    0.007    0.000    0.010    0.000 text.py:248(decode)
     6441    0.022    0.000    0.026    0.000 text.py:263(_word_ngrams)
        1    0.000    0.000    0.000    0.000 text.py:318(build_preprocessor)
     6441    0.001    0.000    0.001    0.000 text.py:328(<lambda>)
     6441    0.007    0.000    0.010    0.000 text.py:344(<lambda>)
        1    0.000    0.000    0.000    0.000 text.py:348(build_tokenizer)
     6441    0.005    0.000    0.014    0.000 text.py:353(<lambda>)
        1    0.000    0.000    0.000    0.000 text.py:355(get_stop_words)
        1    0.000    0.000    0.000    0.000 text.py:359(build_analyzer)
     6441    0.016    0.000    0.077    0.000 text.py:377(<lambda>)
        1    0.006    0.006    0.042    0.042 text.py:384(_term_count_dicts_to_matrix)
        1    0.017    0.017    0.171    0.171 text.py:512(transform)
        1    0.001    0.001   10.600   10.600 text.py:652(transform)
        1    0.000    0.000    0.000    0.000 text.py:78(_check_stop_list)
        1    0.104    0.104   10.875   10.875 text.py:910(transform)
        2    0.000    0.000    0.000    0.000 validation.py:115(_num_samples)
        1    0.000    0.000    0.000    0.000 validation.py:122(check_arrays)
        1    0.000    0.000    0.000    0.000 validation.py:200(warn_if_not_float)
   476268    0.197    0.000    0.197    0.000 {_bisect.bisect_left}
        1    0.000    0.000    0.000    0.000 {_csr.csr_matmat_pass1}
        1    0.001    0.001    0.001    0.001 {_csr.csr_matmat_pass2}
     6441    0.009    0.000    0.009    0.000 {built-in method findall}
        4    0.000    0.000    0.000    0.000 {getattr}
        8    0.000    0.000    0.000    0.000 {hasattr}
  1917980    0.710    0.000    0.710    0.000 {isinstance}
        2    0.000    0.000    0.000    0.000 {issubclass}
   959035    0.114    0.000    0.114    0.000 {len}
        1    0.034    0.034    0.034    0.034 {max}
   952537    0.210    0.000    0.210    0.000 {method 'append' of 'list' objects}
        3    0.000    0.000    0.000    0.000 {method 'astype' of 'numpy.ndarray' objects}
     6441    0.001    0.000    0.001    0.000 {method 'clear' of 'dict' objects}
        1    0.002    0.002    0.002    0.002 {method 'cumsum' of 'numpy.ndarray' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
   952536    0.110    0.000    0.110    0.000 {method 'extend' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {method 'get' of 'dict' objects}
     6441    0.001    0.000    0.001    0.000 {method 'iteritems' of 'dict' objects}
        1    0.000    0.000    0.000    0.000 {method 'itervalues' of 'dict' objects}
     6441    0.002    0.000    0.002    0.000 {method 'lower' of 'unicode' objects}
        1    0.000    0.000    0.000    0.000 {method 'mro' of 'type' objects}
        6    0.000    0.000    0.000    0.000 {method 'newbyteorder' of 'numpy.dtype' objects}
        4    0.000    0.000    0.000    0.000 {method 'pop' of 'dict' objects}
       20    0.000    0.000    0.000    0.000 {method 'split' of 'str' objects}
     6442    0.003    0.000    0.003    0.000 {min}
       38    0.226    0.006    0.226    0.006 {numpy.core.multiarray.array}
       10    0.000    0.000    0.000    0.000 {numpy.core.multiarray.can_cast}
        1    0.000    0.000    0.000    0.000 {numpy.core.multiarray.concatenate}
        5    0.004    0.001    0.004    0.001 {numpy.core.multiarray.empty}
        3    0.000    0.000    0.000    0.000 {numpy.core.multiarray.zeros}
        1    0.009    0.009    0.009    0.009 {range}
        1    0.000    0.000    0.000    0.000 {sklearn.utils.sparsefuncs.inplace_csr_row_normalize_l2}
	In [34]: cProfile.run("vectorizer.transform(input_txt)")
	8676327 function calls (8676325 primitive calls) in 10.875 CPU seconds

	Ordered by: standard name

	ncalls tottime percall cumtime percall filename:lineno(function)
	1 0.000 0.000 10.875 10.875 <string>:1(<module>)
	2 0.000 0.000 0.717 0.359 base.py:178(asformat)
	1 0.000 0.000 0.719 0.719 base.py:229(__mul__)
	7 0.000 0.000 0.000 0.000 base.py:51(__init__)
	1 1.193 1.193 7.473 7.473 base.py:529(setdiag)
	9 0.000 0.000 0.000 0.000 base.py:553(isspmatrix)
	7 0.000 0.000 0.000 0.000 base.py:59(set_shape)
	476299 0.109 0.000 0.109 0.000 base.py:81(get_shape)
	5 0.000 0.000 0.001 0.000 compressed.py:101(check_format)
	5/3 0.000 0.000 0.718 0.239 compressed.py:20(__init__)
	1 0.000 0.000 0.719 0.719 compressed.py:276(_mul_sparse_matrix)
	5 0.000 0.000 0.000 0.000 compressed.py:622(prune)
	20 0.000 0.000 0.000 0.000 compressed.py:85(getnnz)
	2 0.000 0.000 0.000 0.000 compressed.py:90(_set_self)
	1 0.000 0.000 0.000 0.000 coo.py:115(__init__)
	2 0.000 0.000 0.000 0.000 coo.py:194(getnnz)
	1 0.000 0.000 0.000 0.000 coo.py:205(_check)
	1 0.000 0.000 0.000 0.000 coo.py:281(tocsr)
	1 0.000 0.000 0.000 0.000 csr.py:129(tocsr)
	17 0.000 0.000 0.000 0.000 csr.py:180(_swap)
	1 0.000 0.000 0.001 0.001 csr.py:244(csr_matmat_pass2)
	1 0.000 0.000 0.000 0.000 csr.py:74(csr_matmat_pass1)
	6 0.000 0.000 0.000 0.000 data.py:17(__init__)
	6 0.000 0.000 0.000 0.000 data.py:20(_get_dtype)
	6441 0.023 0.000 0.034 0.000 fixes.py:22(__init__)
	6441 0.009 0.000 0.012 0.000 fixes.py:29(update)
	1 0.000 0.000 0.002 0.002 fromnumeric.py:1643(cumsum)
	25 0.000 0.000 0.000 0.000 fromnumeric.py:2116(rank)
	476268 2.661 0.000 3.647 0.000 lil.py:244(_insertat2)
	476268 1.241 0.000 6.281 0.000 lil.py:307(__setitem__)
	1 0.341 0.341 0.717 0.717 lil.py:441(tocsr)
	1 2.394 2.394 2.407 2.407 lil.py:77(__init__)
	1 0.000 0.000 0.000 0.000 memmap.py:254(__array_finalize__)
	1 0.000 0.000 0.000 0.000 memmap.py:290(__del__)
	1905073 1.084 0.000 1.788 0.000 numeric.py:1574(isscalar)
	26 0.000 0.000 0.226 0.009 numeric.py:167(asarray)
	1 0.000 0.000 0.000 0.000 numerictypes.py:665(issubclass_)
	1 0.000 0.000 0.000 0.000 numerictypes.py:733(issubdtype)
	1 0.000 0.000 0.000 0.000 preprocessing.py:289(normalize)
	1 0.000 0.000 0.000 0.000 re.py:188(compile)
	1 0.000 0.000 0.000 0.000 re.py:229(_compile)
	5 0.000 0.000 0.000 0.000 sputils.py:111(issequence)
	10 0.000 0.000 0.000 0.000 sputils.py:116(_isinstance)
	1 0.000 0.000 0.000 0.000 sputils.py:124(isdense)
	1 0.000 0.000 0.000 0.000 sputils.py:18(upcast)
	6 0.000 0.000 0.000 0.000 sputils.py:50(to_native)
	4 0.000 0.000 0.000 0.000 sputils.py:54(getdtype)
	1 0.000 0.000 0.000 0.000 sputils.py:77(isscalarlike)
	5 0.000 0.000 0.000 0.000 sputils.py:81(isintlike)
	5 0.000 0.000 0.000 0.000 sputils.py:96(isshape)
	6441 0.007 0.000 0.010 0.000 text.py:248(decode)
	6441 0.022 0.000 0.026 0.000 text.py:263(_word_ngrams)
	1 0.000 0.000 0.000 0.000 text.py:318(build_preprocessor)
	6441 0.001 0.000 0.001 0.000 text.py:328(<lambda>)
	6441 0.007 0.000 0.010 0.000 text.py:344(<lambda>)
	1 0.000 0.000 0.000 0.000 text.py:348(build_tokenizer)
	6441 0.005 0.000 0.014 0.000 text.py:353(<lambda>)
	1 0.000 0.000 0.000 0.000 text.py:355(get_stop_words)
	1 0.000 0.000 0.000 0.000 text.py:359(build_analyzer)
	6441 0.016 0.000 0.077 0.000 text.py:377(<lambda>)
	1 0.006 0.006 0.042 0.042 text.py:384(_term_count_dicts_to_matrix)
	1 0.017 0.017 0.171 0.171 text.py:512(transform)
	1 0.001 0.001 10.600 10.600 text.py:652(transform)
	1 0.000 0.000 0.000 0.000 text.py:78(_check_stop_list)
	1 0.104 0.104 10.875 10.875 text.py:910(transform)
	2 0.000 0.000 0.000 0.000 validation.py:115(_num_samples)
	1 0.000 0.000 0.000 0.000 validation.py:122(check_arrays)
	1 0.000 0.000 0.000 0.000 validation.py:200(warn_if_not_float)
	476268 0.197 0.000 0.197 0.000 {_bisect.bisect_left}
	1 0.000 0.000 0.000 0.000 {_csr.csr_matmat_pass1}
	1 0.001 0.001 0.001 0.001 {_csr.csr_matmat_pass2}
	6441 0.009 0.000 0.009 0.000 {built-in method findall}
	4 0.000 0.000 0.000 0.000 {getattr}
	8 0.000 0.000 0.000 0.000 {hasattr}
	1917980 0.710 0.000 0.710 0.000 {isinstance}
	2 0.000 0.000 0.000 0.000 {issubclass}
	959035 0.114 0.000 0.114 0.000 {len}
	1 0.034 0.034 0.034 0.034 {max}
	952537 0.210 0.000 0.210 0.000 {method 'append' of 'list' objects}
	3 0.000 0.000 0.000 0.000 {method 'astype' of 'numpy.ndarray' objects}
	6441 0.001 0.000 0.001 0.000 {method 'clear' of 'dict' objects}
	1 0.002 0.002 0.002 0.002 {method 'cumsum' of 'numpy.ndarray' objects}
	1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
	952536 0.110 0.000 0.110 0.000 {method 'extend' of 'list' objects}
	1 0.000 0.000 0.000 0.000 {method 'get' of 'dict' objects}
	6441 0.001 0.000 0.001 0.000 {method 'iteritems' of 'dict' objects}
	1 0.000 0.000 0.000 0.000 {method 'itervalues' of 'dict' objects}
	6441 0.002 0.000 0.002 0.000 {method 'lower' of 'unicode' objects}
	1 0.000 0.000 0.000 0.000 {method 'mro' of 'type' objects}
	6 0.000 0.000 0.000 0.000 {method 'newbyteorder' of 'numpy.dtype' objects}
	4 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects}
	20 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}
	6442 0.003 0.000 0.003 0.000 {min}
	38 0.226 0.006 0.226 0.006 {numpy.core.multiarray.array}
	10 0.000 0.000 0.000 0.000 {numpy.core.multiarray.can_cast}
	1 0.000 0.000 0.000 0.000 {numpy.core.multiarray.concatenate}
	5 0.004 0.001 0.004 0.001 {numpy.core.multiarray.empty}
	3 0.000 0.000 0.000 0.000 {numpy.core.multiarray.zeros}
	1 0.009 0.009 0.009 0.009 {range}
	1 0.000 0.000 0.000 0.000 {sklearn.utils.sparsefuncs.inplace_csr_row_normalize_l2}
No results found