Skip to content

Instantly share code, notes, and snippets.

View albrzykowski's full-sized avatar

Leszek Albrzykowski albrzykowski

View GitHub Profile
'''
Classification of Autistic Spectrum Disorder based on:
https://archive.ics.uci.edu/ml/datasets/Autistic+Spectrum+Disorder+Screening+Data+for+Children++ dataset.
Only 10 answears are taking account as features.
'''
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.externals import joblib
from sklearn.grid_search import GridSearchCV
from nltk.stem import SnowballStemmer
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.yarn.webapp.hamlet.HamletSpec.P;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineModel;
import org.apache.spark.ml.PipelineStage;