-
-
Save abhirk/3933727 to your computer and use it in GitHub Desktop.
| In [14]: cProfile.run("vectorizer.transform(input_txt)") | |
| 103465 function calls (103458 primitive calls) in 0.128 CPU seconds | |
| Ordered by: standard name | |
| ncalls tottime percall cumtime percall filename:lineno(function) | |
| 1 0.000 0.000 0.128 0.128 <string>:1(<module>) | |
| 1 0.000 0.000 0.000 0.000 base.py:178(asformat) | |
| 3 0.000 0.000 0.000 0.000 base.py:51(__init__) | |
| 4 0.000 0.000 0.000 0.000 base.py:553(isspmatrix) | |
| 3 0.000 0.000 0.000 0.000 base.py:59(set_shape) | |
| 16 0.000 0.000 0.000 0.000 base.py:81(get_shape) | |
| 2 0.000 0.000 0.000 0.000 compressed.py:101(check_format) | |
| 2/1 0.000 0.000 0.000 0.000 compressed.py:20(__init__) | |
| 2 0.000 0.000 0.000 0.000 compressed.py:622(prune) | |
| 8 0.000 0.000 0.000 0.000 compressed.py:85(getnnz) | |
| 1 0.000 0.000 0.000 0.000 compressed.py:90(_set_self) | |
| 1 0.000 0.000 0.000 0.000 coo.py:115(__init__) | |
| 2 0.000 0.000 0.000 0.000 coo.py:194(getnnz) | |
| 1 0.000 0.000 0.000 0.000 coo.py:205(_check) | |
| 1 0.000 0.000 0.000 0.000 coo.py:281(tocsr) | |
| 1 0.000 0.000 0.000 0.000 csr.py:129(tocsr) | |
| 7 0.000 0.000 0.000 0.000 csr.py:180(_swap) | |
| 3 0.000 0.000 0.000 0.000 data.py:17(__init__) | |
| 4 0.000 0.000 0.000 0.000 data.py:20(_get_dtype) | |
| 6441 0.017 0.000 0.027 0.000 fixes.py:22(__init__) | |
| 6441 0.007 0.000 0.010 0.000 fixes.py:29(update) | |
| 14 0.000 0.000 0.000 0.000 fromnumeric.py:2116(rank) | |
| 12 0.000 0.000 0.000 0.000 numeric.py:167(asarray) | |
| 1 0.000 0.000 0.000 0.000 numerictypes.py:665(issubclass_) | |
| 1 0.000 0.000 0.000 0.000 numerictypes.py:733(issubdtype) | |
| 1 0.000 0.000 0.000 0.000 preprocessing.py:289(normalize) | |
| 1 0.000 0.000 0.001 0.001 re.py:188(compile) | |
| 1 0.000 0.000 0.001 0.001 re.py:229(_compile) | |
| 3 0.000 0.000 0.000 0.000 sputils.py:111(issequence) | |
| 4 0.000 0.000 0.000 0.000 sputils.py:116(_isinstance) | |
| 3 0.000 0.000 0.000 0.000 sputils.py:50(to_native) | |
| 1 0.000 0.000 0.000 0.000 sputils.py:54(getdtype) | |
| 3 0.000 0.000 0.000 0.000 sputils.py:81(isintlike) | |
| 2 0.000 0.000 0.000 0.000 sputils.py:96(isshape) | |
| 3 0.000 0.000 0.000 0.000 sre_compile.py:184(_compile_charset) | |
| 3 0.000 0.000 0.000 0.000 sre_compile.py:213(_optimize_charset) | |
| 1 0.000 0.000 0.000 0.000 sre_compile.py:360(_simple) | |
| 1 0.000 0.000 0.000 0.000 sre_compile.py:367(_compile_info) | |
| 3/1 0.000 0.000 0.000 0.000 sre_compile.py:38(_compile) | |
| 2 0.000 0.000 0.000 0.000 sre_compile.py:480(isstring) | |
| 1 0.000 0.000 0.000 0.000 sre_compile.py:486(_code) | |
| 1 0.000 0.000 0.001 0.001 sre_compile.py:501(compile) | |
| 4 0.000 0.000 0.000 0.000 sre_parse.py:132(__len__) | |
| 8 0.000 0.000 0.000 0.000 sre_parse.py:136(__getitem__) | |
| 1 0.000 0.000 0.000 0.000 sre_parse.py:140(__setitem__) | |
| 6 0.000 0.000 0.000 0.000 sre_parse.py:144(append) | |
| 3/2 0.000 0.000 0.000 0.000 sre_parse.py:146(getwidth) | |
| 1 0.000 0.000 0.000 0.000 sre_parse.py:184(__init__) | |
| 13 0.000 0.000 0.000 0.000 sre_parse.py:188(__next) | |
| 9 0.000 0.000 0.000 0.000 sre_parse.py:201(match) | |
| 10 0.000 0.000 0.000 0.000 sre_parse.py:207(get) | |
| 5 0.000 0.000 0.000 0.000 sre_parse.py:263(_escape) | |
| 2/1 0.000 0.000 0.000 0.000 sre_parse.py:307(_parse_sub) | |
| 2/1 0.000 0.000 0.000 0.000 sre_parse.py:385(_parse) | |
| 1 0.000 0.000 0.000 0.000 sre_parse.py:669(parse) | |
| 1 0.000 0.000 0.000 0.000 sre_parse.py:73(__init__) | |
| 3 0.000 0.000 0.000 0.000 sre_parse.py:96(__init__) | |
| 6441 0.006 0.000 0.009 0.000 text.py:248(decode) | |
| 6441 0.019 0.000 0.023 0.000 text.py:263(_word_ngrams) | |
| 1 0.000 0.000 0.000 0.000 text.py:318(build_preprocessor) | |
| 6441 0.001 0.000 0.001 0.000 text.py:328(<lambda>) | |
| 6441 0.005 0.000 0.008 0.000 text.py:344(<lambda>) | |
| 1 0.000 0.000 0.001 0.001 text.py:348(build_tokenizer) | |
| 6441 0.005 0.000 0.013 0.000 text.py:353(<lambda>) | |
| 1 0.000 0.000 0.000 0.000 text.py:355(get_stop_words) | |
| 1 0.000 0.000 0.001 0.001 text.py:359(build_analyzer) | |
| 6441 0.014 0.000 0.066 0.000 text.py:377(<lambda>) | |
| 1 0.006 0.006 0.020 0.020 text.py:384(_term_count_dicts_to_matrix) | |
| 1 0.013 0.013 0.127 0.127 text.py:512(transform) | |
| 1 0.000 0.000 0.001 0.001 text.py:652(transform) | |
| 1 0.000 0.000 0.000 0.000 text.py:78(_check_stop_list) | |
| 1 0.001 0.001 0.128 0.128 text.py:910(transform) | |
| 2 0.000 0.000 0.000 0.000 validation.py:115(_num_samples) | |
| 1 0.000 0.000 0.000 0.000 validation.py:122(check_arrays) | |
| 1 0.000 0.000 0.000 0.000 validation.py:200(warn_if_not_float) | |
| 1 0.000 0.000 0.000 0.000 {_sre.compile} | |
| 6441 0.008 0.000 0.008 0.000 {built-in method findall} | |
| 1 0.000 0.000 0.000 0.000 {getattr} | |
| 7 0.000 0.000 0.000 0.000 {hasattr} | |
| 12907 0.006 0.000 0.006 0.000 {isinstance} | |
| 2 0.000 0.000 0.000 0.000 {issubclass} | |
| 6512/6511 0.001 0.000 0.001 0.000 {len} | |
| 1 0.013 0.013 0.013 0.013 {max} | |
| 43 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects} | |
| 3 0.000 0.000 0.000 0.000 {method 'astype' of 'numpy.ndarray' objects} | |
| 6441 0.001 0.000 0.001 0.000 {method 'clear' of 'dict' objects} | |
| 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} | |
| 6 0.000 0.000 0.000 0.000 {method 'get' of 'dict' objects} | |
| 1 0.000 0.000 0.000 0.000 {method 'items' of 'dict' objects} | |
| 6441 0.001 0.000 0.001 0.000 {method 'iteritems' of 'dict' objects} | |
| 1 0.000 0.000 0.000 0.000 {method 'itervalues' of 'dict' objects} | |
| 6441 0.002 0.000 0.002 0.000 {method 'lower' of 'unicode' objects} | |
| 1 0.000 0.000 0.000 0.000 {method 'mro' of 'type' objects} | |
| 3 0.000 0.000 0.000 0.000 {method 'newbyteorder' of 'numpy.dtype' objects} | |
| 4 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects} | |
| 8 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects} | |
| 6445 0.003 0.000 0.003 0.000 {min} | |
| 15 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array} | |
| 3 0.000 0.000 0.000 0.000 {numpy.core.multiarray.zeros} | |
| 1 0.000 0.000 0.000 0.000 {sklearn.utils.sparsefuncs.inplace_csr_row_normalize_l2} |
cProfile.run("vectorizer.transform(input_txt)") 866315 function calls (866308 primitive calls) in 1.047 CPU seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 1.047 1.047 :1()
1 0.000 0.000 0.000 0.000 base.py:178(asformat)
3 0.000 0.000 0.000 0.000 base.py:51(init)
4 0.000 0.000 0.000 0.000 base.py:553(isspmatrix)
3 0.000 0.000 0.000 0.000 base.py:59(set_shape)
16 0.000 0.000 0.000 0.000 base.py:81(get_shape)
2 0.000 0.000 0.000 0.000 compressed.py:101(check_format)
2/1 0.000 0.000 0.000 0.000 compressed.py:20(init)
2 0.000 0.000 0.000 0.000 compressed.py:622(prune)
8 0.000 0.000 0.000 0.000 compressed.py:85(getnnz)
1 0.000 0.000 0.000 0.000 compressed.py:90(_set_self)
1 0.000 0.000 0.000 0.000 coo.py:115(init)
2 0.000 0.000 0.000 0.000 coo.py:194(getnnz)
1 0.000 0.000 0.000 0.000 coo.py:205(_check)
1 0.000 0.000 0.000 0.000 coo.py:281(tocsr)
1 0.000 0.000 0.000 0.000 csr.py:129(tocsr)
7 0.000 0.000 0.000 0.000 csr.py:180(_swap)
3 0.000 0.000 0.000 0.000 data.py:17(init)
4 0.000 0.000 0.000 0.000 data.py:20(get_dtype)
45574 0.136 0.000 0.211 0.000 fixes.py:22(init)
45574 0.051 0.000 0.074 0.000 fixes.py:29(update)
14 0.000 0.000 0.000 0.000 fromnumeric.py:2116(rank)
12 0.000 0.000 0.000 0.000 numeric.py:167(asarray)
1 0.000 0.000 0.000 0.000 numerictypes.py:665(issubclass)
1 0.000 0.000 0.000 0.000 numerictypes.py:733(issubdtype)
1 0.000 0.000 0.000 0.000 preprocessing.py:289(normalize)
1 0.000 0.000 0.001 0.001 re.py:188(compile)
1 0.000 0.000 0.001 0.001 re.py:229(_compile)
3 0.000 0.000 0.000 0.000 sputils.py:111(issequence)
4 0.000 0.000 0.000 0.000 sputils.py:116(_isinstance)
3 0.000 0.000 0.000 0.000 sputils.py:50(to_native)
1 0.000 0.000 0.000 0.000 sputils.py:54(getdtype)
3 0.000 0.000 0.000 0.000 sputils.py:81(isintlike)
2 0.000 0.000 0.000 0.000 sputils.py:96(isshape)
3 0.000 0.000 0.000 0.000 sre_compile.py:184(_compile_charset)
3 0.000 0.000 0.000 0.000 sre_compile.py:213(_optimize_charset)
1 0.000 0.000 0.000 0.000 sre_compile.py:360(_simple)
1 0.000 0.000 0.000 0.000 sre_compile.py:367(_compile_info)
3/1 0.000 0.000 0.000 0.000 sre_compile.py:38(_compile)
2 0.000 0.000 0.000 0.000 sre_compile.py:480(isstring)
1 0.000 0.000 0.000 0.000 sre_compile.py:486(_code)
1 0.000 0.000 0.001 0.001 sre_compile.py:501(compile)
4 0.000 0.000 0.000 0.000 sre_parse.py:132(len)
8 0.000 0.000 0.000 0.000 sre_parse.py:136(getitem)
1 0.000 0.000 0.000 0.000 sre_parse.py:140(setitem)
6 0.000 0.000 0.000 0.000 sre_parse.py:144(append)
3/2 0.000 0.000 0.000 0.000 sre_parse.py:146(getwidth)
1 0.000 0.000 0.000 0.000 sre_parse.py:184(init)
13 0.000 0.000 0.000 0.000 sre_parse.py:188(next)
9 0.000 0.000 0.000 0.000 sre_parse.py:201(match)
10 0.000 0.000 0.000 0.000 sre_parse.py:207(get)
5 0.000 0.000 0.000 0.000 sre_parse.py:263(_escape)
2/1 0.000 0.000 0.000 0.000 sre_parse.py:307(_parse_sub)
2/1 0.000 0.000 0.000 0.000 sre_parse.py:385(_parse)
1 0.000 0.000 0.000 0.000 sre_parse.py:669(parse)
1 0.000 0.000 0.000 0.000 sre_parse.py:73(__init)
3 0.000 0.000 0.000 0.000 sre_parse.py:96(init)
45574 0.072 0.000 0.240 0.000 text.py:248(decode)
45574 0.128 0.000 0.157 0.000 text.py:263(_word_ngrams)
1 0.000 0.000 0.000 0.000 text.py:318(build_preprocessor)
45574 0.008 0.000 0.008 0.000 text.py:328()
45574 0.041 0.000 0.061 0.000 text.py:344()
1 0.000 0.000 0.001 0.001 text.py:348(build_tokenizer)
45574 0.036 0.000 0.105 0.000 text.py:353()
1 0.000 0.000 0.000 0.000 text.py:355(get_stop_words)
1 0.000 0.000 0.001 0.001 text.py:359(build_analyzer)
45574 0.098 0.000 0.661 0.000 text.py:377()
1 0.044 0.044 0.069 0.069 text.py:384(_term_count_dicts_to_matrix)
1 0.096 0.096 1.037 1.037 text.py:512(transform)
1 0.000 0.000 0.001 0.001 text.py:652(transform)
1 0.000 0.000 0.000 0.000 text.py:78(_check_stop_list)
1 0.009 0.009 1.047 1.047 text.py:910(transform)
45574 0.032 0.000 0.093 0.000 utf_8.py:15(decode)
2 0.000 0.000 0.000 0.000 validation.py:115(_num_samples)
1 0.000 0.000 0.000 0.000 validation.py:122(check_arrays)
1 0.000 0.000 0.000 0.000 validation.py:200(warn_if_not_float)
45574 0.060 0.000 0.060 0.000 {_codecs.utf_8_decode}
1 0.000 0.000 0.000 0.000 {_sre.compile}
45574 0.070 0.000 0.070 0.000 {built-in method findall}
1 0.000 0.000 0.000 0.000 {getattr}
7 0.000 0.000 0.000 0.000 {hasattr}
91173 0.029 0.000 0.029 0.000 {isinstance}
2 0.000 0.000 0.000 0.000 {issubclass}
45645/45644 0.008 0.000 0.008 0.000 {len}
1 0.013 0.013 0.013 0.013 {max}
43 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}
3 0.000 0.000 0.000 0.000 {method 'astype' of 'numpy.ndarray' objects}
45574 0.004 0.000 0.004 0.000 {method 'clear' of 'dict' objects}
45574 0.069 0.000 0.162 0.000 {method 'decode' of 'str' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
6 0.000 0.000 0.000 0.000 {method 'get' of 'dict' objects}
1 0.000 0.000 0.000 0.000 {method 'items' of 'dict' objects}
45574 0.007 0.000 0.007 0.000 {method 'iteritems' of 'dict' objects}
1 0.000 0.000 0.000 0.000 {method 'itervalues' of 'dict' objects}
45574 0.013 0.000 0.013 0.000 {method 'lower' of 'unicode' objects}
1 0.000 0.000 0.000 0.000 {method 'mro' of 'type' objects}
3 0.000 0.000 0.000 0.000 {method 'newbyteorder' of 'numpy.dtype' objects}
4 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects}
8 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}
45578 0.021 0.000 0.021 0.000 {min}
15 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array}
3 0.000 0.000 0.000 0.000 {numpy.core.multiarray.zeros}
1 0.000 0.000 0.000 0.000 {sklearn.utils.sparsefuncs.inplace_csr_row_normalize_l2}
In [25]: cProfile.run("clf.predict(document_vector)")
99046 function calls in 1.012 CPU seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 1.012 1.012 :1()
1 0.000 0.000 0.000 0.000 base.py:384(_get_sub_estimator)
1 0.000 0.000 0.000 0.000 base.py:395(is_classifier)
7818 0.009 0.000 0.067 0.000 base.py:553(isspmatrix)
2606 0.020 0.000 0.021 0.000 base.py:593(_get_solver_type)
2606 0.017 0.000 0.966 0.000 base.py:707(decision_function)
2606 0.008 0.000 0.009 0.000 base.py:734(_check_n_features)
2606 0.017 0.000 0.163 0.000 base.py:742(_validate_for_predict)
2606 0.001 0.000 0.001 0.000 base.py:800(_get_bias)
5212 0.002 0.000 0.002 0.000 base.py:81(get_shape)
2606 0.003 0.000 0.026 0.000 compressed.py:310(sum)
2606 0.001 0.000 0.001 0.000 csr.py:129(tocsr)
5212 0.004 0.000 0.004 0.000 data.py:20(_get_dtype)
2606 0.007 0.000 0.021 0.000 fromnumeric.py:1044(ravel)
1 0.000 0.000 0.000 0.000 multiclass.py:163(_check_is_fitted)
1 0.000 0.000 1.012 1.012 multiclass.py:167(predict)
2606 0.011 0.000 1.001 0.000 multiclass.py:50(_predict_binary)
1 0.007 0.007 1.012 1.012 multiclass.py:79(predict_ovr)
2606 0.005 0.000 0.011 0.000 numeric.py:167(asarray)
1 0.000 0.000 0.000 0.000 preprocessing.py:695(_check_fitted)
1 0.000 0.000 0.002 0.002 preprocessing.py:788(inverse_transform)
7818 0.038 0.000 0.059 0.000 sputils.py:116(_isinstance)
2606 0.024 0.000 0.050 0.000 validation.py:10(assert_all_finite)
2606 0.015 0.000 0.102 0.000 validation.py:95(atleast2d_or_csr)
2611 0.003 0.000 0.003 0.000 {hasattr}
7819 0.005 0.000 0.005 0.000 {isinstance}
1 0.000 0.000 0.000 0.000 {len}
1 0.002 0.002 0.002 0.002 {method 'argmax' of 'numpy.ndarray' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
2606 0.004 0.000 0.004 0.000 {method 'ravel' of 'numpy.ndarray' objects}
15636 0.015 0.000 0.015 0.000 {method 'split' of 'str' objects}
2606 0.023 0.000 0.023 0.000 {method 'sum' of 'numpy.ndarray' objects}
5212 0.001 0.000 0.001 0.000 {method 'upper' of 'str' objects}
2607 0.008 0.000 0.008 0.000 {numpy.core.multiarray.array}
2606 0.762 0.000 0.763 0.000 {sklearn.svm.liblinear.csr_decision_function_wrap}