Skip to content

Instantly share code, notes, and snippets.

@denysthegitmenace
Created December 28, 2014 19:12
Show Gist options
  • Save denysthegitmenace/1ac1165874416bb7cf40 to your computer and use it in GitHub Desktop.
Save denysthegitmenace/1ac1165874416bb7cf40 to your computer and use it in GitHub Desktop.
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.3 (/usr/bin/python)" project-jdk-type="Python SDK" />
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/classifying.iml" filepath="$PROJECT_DIR$/.idea/classifying.iml" />
</modules>
</component>
</project>
<component name="DependencyValidationManager">
<state>
<option name="SKIP_IMPORT_STATEMENTS" value="false" />
</state>
</component>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="" />
</component>
</project>
['6.1', '3.0', '4.9', '1.8', 'Iris-virginica']
['5.8', '2.8', '5.1', '2.4', 'Iris-virginica']
['6.3', '2.9', '5.6', '1.8', 'Iris-virginica']
['6.7', '2.5', '5.8', '1.8', 'Iris-virginica']
['6.3', '2.5', '5.0', '1.9', 'Iris-virginica']
['4.4', '2.9', '1.4', '0.2', 'Iris-setosa']
['4.9', '3.1', '1.5', '0.1', 'Iris-setosa']
['4.8', '3.0', '1.4', '0.3', 'Iris-setosa']
['4.5', '2.3', '1.3', '0.3', 'Iris-setosa']
['5.1', '3.8', '1.6', '0.2', 'Iris-setosa']
['5.8', '2.7', '4.1', '1.0', 'Iris-versicolor']
['5.5', '2.4', '3.7', '1.0', 'Iris-versicolor']
['5.5', '2.6', '4.4', '1.2', 'Iris-versicolor']
['6.5', '2.8', '4.6', '1.5', 'Iris-versicolor']
['6.0', '2.9', '4.5', '1.5', 'Iris-versicolor']
['6.3', '3.4', '5.6', '2.4', 'Iris-virginica']
['6.3', '3.3', '6.0', '2.5', 'Iris-virginica']
['6.2', '2.8', '4.8', '1.8', 'Iris-virginica']
['6.9', '3.2', '5.7', '2.3', 'Iris-virginica']
['7.9', '3.8', '6.4', '2.0', 'Iris-virginica']
['5.1', '3.7', '1.5', '0.4', 'Iris-setosa']
['4.8', '3.1', '1.6', '0.2', 'Iris-setosa']
['5.4', '3.4', '1.7', '0.2', 'Iris-setosa']
['5.8', '4.0', '1.2', '0.2', 'Iris-setosa']
['5.5', '4.2', '1.4', '0.2', 'Iris-setosa']
['5.5', '2.3', '4.0', '1.3', 'Iris-versicolor']
['6.6', '3.0', '4.4', '1.4', 'Iris-versicolor']
['5.6', '3.0', '4.1', '1.3', 'Iris-versicolor']
['7.0', '3.2', '4.7', '1.4', 'Iris-versicolor']
['5.7', '3.0', '4.2', '1.2', 'Iris-versicolor']
['6.3', '2.7', '4.9', '1.8', 'Iris-virginica']
['7.6', '3.0', '6.6', '2.1', 'Iris-virginica']
['6.0', '3.0', '4.8', '1.8', 'Iris-virginica']
['7.2', '3.2', '6.0', '1.8', 'Iris-virginica']
['6.9', '3.1', '5.4', '2.1', 'Iris-virginica']
['4.9', '3.0', '1.4', '0.2', 'Iris-setosa']
['5.0', '3.5', '1.6', '0.6', 'Iris-setosa']
['5.3', '3.7', '1.5', '0.2', 'Iris-setosa']
['5.2', '3.4', '1.4', '0.2', 'Iris-setosa']
['4.7', '3.2', '1.6', '0.2', 'Iris-setosa']
['6.9', '3.1', '4.9', '1.5', 'Iris-versicolor']
['6.2', '2.9', '4.3', '1.3', 'Iris-versicolor']
['5.9', '3.2', '4.8', '1.8', 'Iris-versicolor']
['6.6', '2.9', '4.6', '1.3', 'Iris-versicolor']
['6.2', '2.2', '4.5', '1.5', 'Iris-versicolor']
['5.8', '2.7', '5.1', '1.9', 'Iris-virginica']
['7.1', '3.0', '5.9', '2.1', 'Iris-virginica']
['6.7', '3.1', '5.6', '2.4', 'Iris-virginica']
['7.7', '2.6', '6.9', '2.3', 'Iris-virginica']
['6.3', '2.8', '5.1', '1.5', 'Iris-virginica']
['5.0', '3.5', '1.3', '0.3', 'Iris-setosa']
['5.1', '3.4', '1.5', '0.2', 'Iris-setosa']
['5.0', '3.0', '1.6', '0.2', 'Iris-setosa']
['4.6', '3.4', '1.4', '0.3', 'Iris-setosa']
['4.8', '3.4', '1.9', '0.2', 'Iris-setosa']
['5.7', '2.8', '4.1', '1.3', 'Iris-versicolor']
['6.1', '2.8', '4.0', '1.3', 'Iris-versicolor']
['6.1', '2.8', '4.7', '1.2', 'Iris-versicolor']
['5.6', '3.0', '4.5', '1.5', 'Iris-versicolor']
['6.8', '2.8', '4.8', '1.4', 'Iris-versicolor']
['5.6', '2.8', '4.9', '2.0', 'Iris-virginica']
['7.7', '2.8', '6.7', '2.0', 'Iris-virginica']
['6.5', '3.0', '5.8', '2.2', 'Iris-virginica']
['7.7', '3.0', '6.1', '2.3', 'Iris-virginica']
['5.7', '2.5', '5.0', '2.0', 'Iris-virginica']
['5.0', '3.6', '1.4', '0.2', 'Iris-setosa']
['4.6', '3.2', '1.4', '0.2', 'Iris-setosa']
['5.0', '3.3', '1.4', '0.2', 'Iris-setosa']
['4.9', '3.1', '1.5', '0.1', 'Iris-setosa']
['4.7', '3.2', '1.3', '0.2', 'Iris-setosa']
['6.4', '2.9', '4.3', '1.3', 'Iris-versicolor']
['6.3', '3.3', '4.7', '1.6', 'Iris-versicolor']
['5.8', '2.6', '4.0', '1.2', 'Iris-versicolor']
['5.5', '2.5', '4.0', '1.3', 'Iris-versicolor']
['5.9', '3.0', '4.2', '1.5', 'Iris-versicolor']
['7.7', '3.8', '6.7', '2.2', 'Iris-virginica']
['6.4', '2.8', '5.6', '2.1', 'Iris-virginica']
['6.2', '3.4', '5.4', '2.3', 'Iris-virginica']
['6.7', '3.3', '5.7', '2.1', 'Iris-virginica']
['6.7', '3.0', '5.2', '2.3', 'Iris-virginica']
['5.2', '4.1', '1.5', '0.1', 'Iris-setosa']
['5.4', '3.9', '1.3', '0.4', 'Iris-setosa']
['4.6', '3.1', '1.5', '0.2', 'Iris-setosa']
['5.1', '3.8', '1.9', '0.4', 'Iris-setosa']
['5.4', '3.9', '1.7', '0.4', 'Iris-setosa']
['4.9', '2.4', '3.3', '1.0', 'Iris-versicolor']
['5.0', '2.3', '3.3', '1.0', 'Iris-versicolor']
['5.5', '2.4', '3.8', '1.1', 'Iris-versicolor']
['5.4', '3.0', '4.5', '1.5', 'Iris-versicolor']
['6.0', '3.4', '4.5', '1.6', 'Iris-versicolor']
['6.4', '2.8', '5.6', '2.2', 'Iris-virginica']
['6.5', '3.0', '5.2', '2.0', 'Iris-virginica']
['4.9', '2.5', '4.5', '1.7', 'Iris-virginica']
['7.2', '3.0', '5.8', '1.6', 'Iris-virginica']
['6.4', '2.7', '5.3', '1.9', 'Iris-virginica']
['5.0', '3.2', '1.2', '0.2', 'Iris-setosa']
['5.0', '3.4', '1.5', '0.2', 'Iris-setosa']
['5.2', '3.5', '1.5', '0.2', 'Iris-setosa']
['4.6', '3.6', '1.0', '0.2', 'Iris-setosa']
['5.1', '3.5', '1.4', '0.2', 'Iris-setosa']
['5.7', '2.6', '3.5', '1.0', 'Iris-versicolor']
['6.1', '2.9', '4.7', '1.4', 'Iris-versicolor']
['6.7', '3.0', '5.0', '1.7', 'Iris-versicolor']
['5.8', '2.7', '3.9', '1.2', 'Iris-versicolor']
['6.3', '2.5', '4.9', '1.5', 'Iris-versicolor']
['6.7', '3.3', '5.7', '2.5', 'Iris-virginica']
['7.4', '2.8', '6.1', '1.9', 'Iris-virginica']
['7.2', '3.6', '6.1', '2.5', 'Iris-virginica']
['6.8', '3.0', '5.5', '2.1', 'Iris-virginica']
['6.8', '3.2', '5.9', '2.3', 'Iris-virginica']
['4.4', '3.2', '1.3', '0.2', 'Iris-setosa']
['5.7', '4.4', '1.5', '0.4', 'Iris-setosa']
['5.1', '3.8', '1.5', '0.3', 'Iris-setosa']
['4.4', '3.0', '1.3', '0.2', 'Iris-setosa']
['5.0', '3.4', '1.6', '0.4', 'Iris-setosa']
['5.1', '2.5', '3.0', '1.1', 'Iris-versicolor']
['5.6', '2.5', '3.9', '1.1', 'Iris-versicolor']
['6.7', '3.1', '4.7', '1.5', 'Iris-versicolor']
['5.7', '2.9', '4.2', '1.3', 'Iris-versicolor']
['6.1', '3.0', '4.6', '1.4', 'Iris-versicolor']
['6.5', '3.0', '5.5', '1.8', 'Iris-virginica']
['6.4', '3.1', '5.5', '1.8', 'Iris-virginica']
['6.1', '2.6', '5.6', '1.4', 'Iris-virginica']
['6.9', '3.1', '5.1', '2.3', 'Iris-virginica']
['6.5', '3.2', '5.1', '2.0', 'Iris-virginica']
['4.9', '3.1', '1.5', '0.1', 'Iris-setosa']
['4.8', '3.0', '1.4', '0.1', 'Iris-setosa']
['5.7', '3.8', '1.7', '0.3', 'Iris-setosa']
['4.8', '3.4', '1.6', '0.2', 'Iris-setosa']
['5.4', '3.7', '1.5', '0.2', 'Iris-setosa']
['5.6', '2.9', '3.6', '1.3', 'Iris-versicolor']
['6.3', '2.3', '4.4', '1.3', 'Iris-versicolor']
['6.4', '3.2', '4.5', '1.5', 'Iris-versicolor']
['6.0', '2.7', '5.1', '1.6', 'Iris-versicolor']
['5.0', '2.0', '3.5', '1.0', 'Iris-versicolor']
['7.3', '2.9', '6.3', '1.8', 'Iris-virginica']
['6.4', '3.2', '5.3', '2.3', 'Iris-virginica']
['6.0', '2.2', '5.0', '1.5', 'Iris-virginica']
['5.9', '3.0', '5.1', '1.8', 'Iris-virginica']
['5.8', '2.7', '5.1', '1.9', 'Iris-virginica']
['4.3', '3.0', '1.1', '0.1', 'Iris-setosa']
['5.5', '3.5', '1.3', '0.2', 'Iris-setosa']
['5.1', '3.3', '1.7', '0.5', 'Iris-setosa']
['5.4', '3.4', '1.5', '0.4', 'Iris-setosa']
['5.1', '3.5', '1.4', '0.3', 'Iris-setosa']
['6.0', '2.2', '4.0', '1.0', 'Iris-versicolor']
['5.7', '2.8', '4.5', '1.3', 'Iris-versicolor']
['5.2', '2.7', '3.9', '1.4', 'Iris-versicolor']
['6.7', '3.1', '4.4', '1.4', 'Iris-versicolor']
['5.6', '2.7', '4.2', '1.3', 'Iris-versicolor']
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.4,3.7,1.5,0.2,Iris-setosa
4.8,3.4,1.6,0.2,Iris-setosa
4.8,3.0,1.4,0.1,Iris-setosa
4.3,3.0,1.1,0.1,Iris-setosa
5.8,4.0,1.2,0.2,Iris-setosa
5.7,4.4,1.5,0.4,Iris-setosa
5.4,3.9,1.3,0.4,Iris-setosa
5.1,3.5,1.4,0.3,Iris-setosa
5.7,3.8,1.7,0.3,Iris-setosa
5.1,3.8,1.5,0.3,Iris-setosa
5.4,3.4,1.7,0.2,Iris-setosa
5.1,3.7,1.5,0.4,Iris-setosa
4.6,3.6,1.0,0.2,Iris-setosa
5.1,3.3,1.7,0.5,Iris-setosa
4.8,3.4,1.9,0.2,Iris-setosa
5.0,3.0,1.6,0.2,Iris-setosa
5.0,3.4,1.6,0.4,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.2,3.4,1.4,0.2,Iris-setosa
4.7,3.2,1.6,0.2,Iris-setosa
4.8,3.1,1.6,0.2,Iris-setosa
5.4,3.4,1.5,0.4,Iris-setosa
5.2,4.1,1.5,0.1,Iris-setosa
5.5,4.2,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.0,3.2,1.2,0.2,Iris-setosa
5.5,3.5,1.3,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
4.4,3.0,1.3,0.2,Iris-setosa
5.1,3.4,1.5,0.2,Iris-setosa
5.0,3.5,1.3,0.3,Iris-setosa
4.5,2.3,1.3,0.3,Iris-setosa
4.4,3.2,1.3,0.2,Iris-setosa
5.0,3.5,1.6,0.6,Iris-setosa
5.1,3.8,1.9,0.4,Iris-setosa
4.8,3.0,1.4,0.3,Iris-setosa
5.1,3.8,1.6,0.2,Iris-setosa
4.6,3.2,1.4,0.2,Iris-setosa
5.3,3.7,1.5,0.2,Iris-setosa
5.0,3.3,1.4,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
5.7,2.8,4.5,1.3,Iris-versicolor
6.3,3.3,4.7,1.6,Iris-versicolor
4.9,2.4,3.3,1.0,Iris-versicolor
6.6,2.9,4.6,1.3,Iris-versicolor
5.2,2.7,3.9,1.4,Iris-versicolor
5.0,2.0,3.5,1.0,Iris-versicolor
5.9,3.0,4.2,1.5,Iris-versicolor
6.0,2.2,4.0,1.0,Iris-versicolor
6.1,2.9,4.7,1.4,Iris-versicolor
5.6,2.9,3.6,1.3,Iris-versicolor
6.7,3.1,4.4,1.4,Iris-versicolor
5.6,3.0,4.5,1.5,Iris-versicolor
5.8,2.7,4.1,1.0,Iris-versicolor
6.2,2.2,4.5,1.5,Iris-versicolor
5.6,2.5,3.9,1.1,Iris-versicolor
5.9,3.2,4.8,1.8,Iris-versicolor
6.1,2.8,4.0,1.3,Iris-versicolor
6.3,2.5,4.9,1.5,Iris-versicolor
6.1,2.8,4.7,1.2,Iris-versicolor
6.4,2.9,4.3,1.3,Iris-versicolor
6.6,3.0,4.4,1.4,Iris-versicolor
6.8,2.8,4.8,1.4,Iris-versicolor
6.7,3.0,5.0,1.7,Iris-versicolor
6.0,2.9,4.5,1.5,Iris-versicolor
5.7,2.6,3.5,1.0,Iris-versicolor
5.5,2.4,3.8,1.1,Iris-versicolor
5.5,2.4,3.7,1.0,Iris-versicolor
5.8,2.7,3.9,1.2,Iris-versicolor
6.0,2.7,5.1,1.6,Iris-versicolor
5.4,3.0,4.5,1.5,Iris-versicolor
6.0,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3.0,4.1,1.3,Iris-versicolor
5.5,2.5,4.0,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3.0,4.6,1.4,Iris-versicolor
5.8,2.6,4.0,1.2,Iris-versicolor
5.0,2.3,3.3,1.0,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3.0,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3.0,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3.0,5.8,2.2,Iris-virginica
7.6,3.0,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica
6.5,3.2,5.1,2.0,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3.0,5.5,2.1,Iris-virginica
5.7,2.5,5.0,2.0,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3.0,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6.0,2.2,5.0,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2.0,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6.0,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3.0,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3.0,5.8,1.6,Iris-virginica
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2.0,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6.0,3.0,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3.0,5.2,2.3,Iris-virginica
6.3,2.5,5.0,1.9,Iris-virginica
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica
import random
class classifier:
def buckets (self, fileName, classColumn, bucketsNo):
data = {}
buckets = []
f = open(fileName)
lines = f.readlines()
f.close()
for line in lines:
cur_line = line.strip().split(',')
cur_class = cur_line[classColumn]
data.setdefault(cur_class,[])
data[cur_class].append(cur_line[0:classColumn])
for i in range(bucketsNo):
buckets.append([])
for key in data:
buNum = 0
random.shuffle(data[key])
for values in data[key]:
values.append(key)
buckets[buNum].append(values)
buNum = (buNum + 1) % bucketsNo
for cc in range(bucketsNo):
f = open ("%s - %02i.data" % ('bucket',cc),'w')
for values in buckets[cc]:
f.write(str(values) + '\n')
f.close()
class Classifier:
def __init__(self, filename):
self.medianAndDeviation = []
# reading the data in from the file
f = open(filename)
lines = f.readlines()
f.close()
self.format = lines[0].strip().split('\t')
self.data = []
for line in lines[1:]:
fields = line.strip().split('\t')
ignore = []
vector = []
for i in range(len(fields)):
if self.format[i] == 'num':
vector.append(float(fields[i]))
elif self.format[i] == 'comment':
ignore.append(fields[i])
elif self.format[i] == 'class':
classification = fields[i]
self.data.append((classification, vector, ignore))
self.rawData = list(self.data)
# get length of instance vector
self.vlen = len(self.data[0][1])
# now normalize the data
for i in range(self.vlen):
self.normalizeColumn(i)
##################################################
###
### CODE TO COMPUTE THE MODIFIED STANDARD SCORE
def getMedian(self, alist):
"""return median of alist"""
if alist == []:
return []
blist = sorted(alist)
length = len(alist)
if length % 2 == 1:
# length of list is odd so return middle element
return blist[int(((length + 1) / 2) - 1)]
else:
# length of list is even so compute midpoint
v1 = blist[int(length / 2)]
v2 =blist[(int(length / 2) - 1)]
return (v1 + v2) / 2.0
def getAbsoluteStandardDeviation(self, alist, median):
"""given alist and median return absolute standard deviation"""
sum = 0
for item in alist:
sum += abs(item - median)
return sum / len(alist)
def normalizeColumn(self, columnNumber):
"""given a column number, normalize that column in self.data"""
# first extract values to list
col = [v[1][columnNumber] for v in self.data]
median = self.getMedian(col)
asd = self.getAbsoluteStandardDeviation(col, median)
#print("Median: %f ASD = %f" % (median, asd))
self.medianAndDeviation.append((median, asd))
for v in self.data:
v[1][columnNumber] = (v[1][columnNumber] - median) / asd
def normalizeVector(self, v):
"""We have stored the median and asd for each column.
We now use them to normalize vector v"""
vector = list(v)
for i in range(len(vector)):
(median, asd) = self.medianAndDeviation[i]
vector[i] = (vector[i] - median) / asd
return vector
###
### END NORMALIZATION
##################################################
def manhattan(self, vector1, vector2):
"""Computes the Manhattan distance."""
return sum(map(lambda v1, v2: abs(v1 - v2), vector1, vector2))
def nearestNeighbor(self, itemVector):
"""return nearest neighbor to itemVector"""
return min([ (self.manhattan(itemVector, item[1]), item)
for item in self.data])
def classify(self, itemVector):
"""Return class we think item Vector is in"""
return(self.nearestNeighbor(self.normalizeVector(itemVector))[1][0])
def test(training_filename, test_filename):
"""Test the classifier on a test set of data"""
classifier = Classifier(training_filename)
f = open(test_filename)
lines = f.readlines()
f.close()
numCorrect = 0.0
for line in lines:
data = line.strip().split('\t')
vector = []
classInColumn = -1
for i in range(len(classifier.format)):
if classifier.format[i] == 'num':
vector.append(float(data[i]))
elif classifier.format[i] == 'class':
classInColumn = i
theClass= classifier.classify(vector)
prefix = '-'
if theClass == data[classInColumn]:
# it is correct
numCorrect += 1
prefix = '+'
print("%s %12s %s" % (prefix, theClass, line))
print("%4.2f%% correct" % (numCorrect * 100/ len(lines)))
##
## Here are examples of how the classifier is used on different data sets
## in the book.
# test('athletesTrainingSet.txt', 'athletesTestSet.txt')
# test("irisTrainingSet.data", "irisTestSet.data")
# test("mpgTrainingSet.txt", "mpgTestSet.txt")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment