aditya1702/Data used for training SVM model

## Data used for training SVM model
+------------------+-----+
|          features|label|
+------------------+-----+
|  [1.9643, 4.5957]|    1|
|  [2.2753, 3.8589]|    1|
|  [2.9781, 4.5651]|    1|
|   [2.932, 3.5519]|    1|
|   [3.5772, 2.856]|    1|
|   [4.015, 3.1937]|    1|
|  [3.3814, 3.4291]|    1|
|  [3.9113, 4.1761]|    1|
|  [2.7822, 4.0431]|    1|
|  [2.5518, 4.6162]|    1|
|  [3.3698, 3.9101]|    1|
|  [3.1048, 3.0709]|    1|
|  [1.9182, 4.0534]|    1|
|  [2.2638, 4.3706]|    1|
|  [2.6555, 3.5008]|    1|
|  [3.1855, 4.2888]|    1|
|  [3.6579, 3.8692]|    1|
|  [3.9113, 3.4291]|    1|
|  [3.6002, 3.1221]|    1|
|  [3.0357, 3.3165]|    1|
|  [1.5841, 3.3575]|    0|
|  [2.0103, 3.2039]|    0|
|  [1.9527, 2.7843]|    0|
|  [2.2753, 2.7127]|    0|
|  [2.3099, 2.9584]|    0|
|  [2.8283, 2.6309]|    0|
|  [3.0473, 2.2931]|    0|
|  [2.4827, 2.0373]|    0|
|  [2.5057, 2.3853]|    0|
|  [1.8721, 2.0577]|    0|
|  [2.0103, 2.3546]|    0|
|  [1.2269, 2.3239]|    0|
|  [1.8951, 2.9174]|    0|
|   [1.561, 3.0709]|    0|
|  [1.5495, 2.6923]|    0|
|  [1.6878, 2.4057]|    0|
|  [1.4919, 2.0271]|    0|
|    [0.962, 2.682]|    0|
|  [1.1693, 2.9276]|    0|
|  [0.8122, 2.9992]|    0|
|  [0.9735, 3.3881]|    0|
|    [1.25, 3.1937]|    0|
|  [1.3191, 3.5109]|    0|
|   [2.2292, 2.201]|    0|
|  [2.4482, 2.6411]|    0|
|  [2.7938, 1.9656]|    0|
|   [2.091, 1.6177]|    0|
|  [2.5403, 2.8867]|    0|
|  [0.9044, 3.0198]|    0|
| [0.76615, 2.5899]|    0|
|[0.086405, 4.1045]|    1|
+------------------+-----+


count=51
-----------------------------------------------------------------------
from pyspark.mllib.classification import SVMWithSGD

data_rdd=x_df.map(lambda x:LabeledPoint(x[1],x[0]))
model = SVMWithSGD.train(data_rdd, iterations=1000,regParam=1.0,intercept=True,step=0.1)

#model.setThreshold(0.15)
#model.clearThreshold()

X=x_df.map(lambda x:x[0]).collect()
Y=x_df.map(lambda x:x[1]).collect()
-------------------------------------------------------------------------

pred=[]
for i in X:
  pred.append(model.predict(i))
print pred

Output:
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
	+------------------+-----+
	\| features\|label\|
	+------------------+-----+
	\| [1.9643, 4.5957]\| 1\|
	\| [2.2753, 3.8589]\| 1\|
	\| [2.9781, 4.5651]\| 1\|
	\| [2.932, 3.5519]\| 1\|
	\| [3.5772, 2.856]\| 1\|
	\| [4.015, 3.1937]\| 1\|
	\| [3.3814, 3.4291]\| 1\|
	\| [3.9113, 4.1761]\| 1\|
	\| [2.7822, 4.0431]\| 1\|
	\| [2.5518, 4.6162]\| 1\|
	\| [3.3698, 3.9101]\| 1\|
	\| [3.1048, 3.0709]\| 1\|
	\| [1.9182, 4.0534]\| 1\|
	\| [2.2638, 4.3706]\| 1\|
	\| [2.6555, 3.5008]\| 1\|
	\| [3.1855, 4.2888]\| 1\|
	\| [3.6579, 3.8692]\| 1\|
	\| [3.9113, 3.4291]\| 1\|
	\| [3.6002, 3.1221]\| 1\|
	\| [3.0357, 3.3165]\| 1\|
	\| [1.5841, 3.3575]\| 0\|
	\| [2.0103, 3.2039]\| 0\|
	\| [1.9527, 2.7843]\| 0\|
	\| [2.2753, 2.7127]\| 0\|
	\| [2.3099, 2.9584]\| 0\|
	\| [2.8283, 2.6309]\| 0\|
	\| [3.0473, 2.2931]\| 0\|
	\| [2.4827, 2.0373]\| 0\|
	\| [2.5057, 2.3853]\| 0\|
	\| [1.8721, 2.0577]\| 0\|
	\| [2.0103, 2.3546]\| 0\|
	\| [1.2269, 2.3239]\| 0\|
	\| [1.8951, 2.9174]\| 0\|
	\| [1.561, 3.0709]\| 0\|
	\| [1.5495, 2.6923]\| 0\|
	\| [1.6878, 2.4057]\| 0\|
	\| [1.4919, 2.0271]\| 0\|
	\| [0.962, 2.682]\| 0\|
	\| [1.1693, 2.9276]\| 0\|
	\| [0.8122, 2.9992]\| 0\|
	\| [0.9735, 3.3881]\| 0\|
	\| [1.25, 3.1937]\| 0\|
	\| [1.3191, 3.5109]\| 0\|
	\| [2.2292, 2.201]\| 0\|
	\| [2.4482, 2.6411]\| 0\|
	\| [2.7938, 1.9656]\| 0\|
	\| [2.091, 1.6177]\| 0\|
	\| [2.5403, 2.8867]\| 0\|
	\| [0.9044, 3.0198]\| 0\|
	\| [0.76615, 2.5899]\| 0\|
	\|[0.086405, 4.1045]\| 1\|
	+------------------+-----+


	count=51
	-----------------------------------------------------------------------
	from pyspark.mllib.classification import SVMWithSGD

	data_rdd=x_df.map(lambda x:LabeledPoint(x[1],x[0]))
	model = SVMWithSGD.train(data_rdd, iterations=1000,regParam=1.0,intercept=True,step=0.1)

	#model.setThreshold(0.15)
	#model.clearThreshold()

	X=x_df.map(lambda x:x[0]).collect()
	Y=x_df.map(lambda x:x[1]).collect()
	-------------------------------------------------------------------------

	pred=[]
	for i in X:
	pred.append(model.predict(i))
	print pred

	Output:
	[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]