Limitations:
- for binary classification/OVR/OVO only
- suitable for documents that are not too long
Advantage:
- take class label into consideration, correct the inappropriate scaling by IDF
- better than TF-IDF in most benchmarks
Pratical advice:
import numpy as np | |
import matplotlib | |
matplotlib.use("Agg") | |
import matplotlib.pyplot as plt | |
import matplotlib.animation as manimation | |
FFMpegWriter = manimation.writers['ffmpeg'] | |
writer = FFMpegWriter(fps=5) | |
fig = plt.figure(figsize=(12, 8)) |
import numpy as np | |
import matplotlib.pyplot as plt | |
# here's the precomputed histogram via `plt.hist` or `np.histogram` | |
bins = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]).astype(float) | |
counts = np.array([5, 3, 4, 5, 6, 1, 3, 7]).astype(float) | |
assert len(bins) == len(counts) + 1 | |
# recover | |
centroids = (bins[1:] + bins[:-1]) / 2 | |
counts_, bins_, _ = plt.hist(centroids, bins=len(counts), |