smihael/Combining several imperfect classifiers to reduce overall classification error.ipynb

## Combining several imperfect classifiers to reduce overall classification error.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Combining several imperfect classifiers to reduce overall classification error.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## grid_search.py
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import ParameterGrid


# Define the search space
subset_seed_range = range(0, 100)  # Range of seeds to try
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [0.001, 0.01, 0.1, 1],
    'kernel': ['rbf'],
    'random_state': range(0,5)
}
all_combinations = [(seed, params) for seed in subset_seed_range for params in ParameterGrid(param_grid)]

# Initialize variables to track the best performance
best_ensemble_accuracy = 0
best_combination = None

n_passed = 0

# Perform the search
for seed, params in all_combinations:
    #print(seed)
    #print(params)
    # Create the subsets using the seed
    np.random.seed(seed)
    subset_indices_1 = np.random.choice(range(len(X)), size=int(len(X)/3), replace=False)
    subset_indices_2 = np.random.choice(list(set(range(len(X))) - set(subset_indices_1)), size=int(len(X)/3), replace=False)
    subset_indices_3 = list(set(range(len(X))) - set(subset_indices_1) - set(subset_indices_2))

    X_sub1, y_sub1 = X[subset_indices_1], y[subset_indices_1]
    X_sub2, y_sub2 = X[subset_indices_2], y[subset_indices_2]
    X_sub3, y_sub3 = X[subset_indices_3], y[subset_indices_3]

    # Train SVC models on each subset with the given parameters
    classifiers = []
    for i, (X_sub, y_sub) in enumerate([(X_sub1, y_sub1), (X_sub2, y_sub2), (X_sub3, y_sub3)]):
        clf = SVC(**params).fit(X_sub, y_sub)
        classifiers.append(clf)

    # Create the ensemble
    ensemble = VotingClassifier(estimators=[(f'clf{i}', clf) for i, clf in enumerate(classifiers)], voting='hard')
    ensemble.fit(X, y)

    # Evaluate the ensemble and base classifiers
    ensemble_accuracy = accuracy_score(y, ensemble.predict(X))
    base_accuracies = [accuracy_score(y, clf.predict(X)) for clf in classifiers]

    # We don't want 'bad' examples
    passed = all(acc < ensemble_accuracy for acc in base_accuracies)

    if passed:
        n_passed = n_passed + 1

    # Check if this combination is the best so far
    if ensemble_accuracy > best_ensemble_accuracy and passed:
        best_ensemble_accuracy = ensemble_accuracy
        best_combination = (seed, params)

    #print("----")

# Output the results
print(f"Best ensemble accuracy: {best_ensemble_accuracy:.2f}")
print(f"Best combination: Seed={best_combination[0]}, Parameters={best_combination[1]}")

## Združevanje_več_klasifikatorjev (alt).ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Združevanje_več_klasifikatorjev (alt).ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Združevanje_več_klasifikatorjev.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Združevanje_več_klasifikatorjev.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	from sklearn.svm import SVC
	from sklearn.ensemble import VotingClassifier
	from sklearn.metrics import accuracy_score
	from sklearn.model_selection import ParameterGrid


	# Define the search space
	subset_seed_range = range(0, 100) # Range of seeds to try
	param_grid = {
	'C': [0.1, 1, 10],
	'gamma': [0.001, 0.01, 0.1, 1],
	'kernel': ['rbf'],
	'random_state': range(0,5)
	}
	all_combinations = [(seed, params) for seed in subset_seed_range for params in ParameterGrid(param_grid)]

	# Initialize variables to track the best performance
	best_ensemble_accuracy = 0
	best_combination = None

	n_passed = 0

	# Perform the search
	for seed, params in all_combinations:
	#print(seed)
	#print(params)
	# Create the subsets using the seed
	np.random.seed(seed)
	subset_indices_1 = np.random.choice(range(len(X)), size=int(len(X)/3), replace=False)
	subset_indices_2 = np.random.choice(list(set(range(len(X))) - set(subset_indices_1)), size=int(len(X)/3), replace=False)
	subset_indices_3 = list(set(range(len(X))) - set(subset_indices_1) - set(subset_indices_2))

	X_sub1, y_sub1 = X[subset_indices_1], y[subset_indices_1]
	X_sub2, y_sub2 = X[subset_indices_2], y[subset_indices_2]
	X_sub3, y_sub3 = X[subset_indices_3], y[subset_indices_3]

	# Train SVC models on each subset with the given parameters
	classifiers = []
	for i, (X_sub, y_sub) in enumerate([(X_sub1, y_sub1), (X_sub2, y_sub2), (X_sub3, y_sub3)]):
	clf = SVC(**params).fit(X_sub, y_sub)
	classifiers.append(clf)

	# Create the ensemble
	ensemble = VotingClassifier(estimators=[(f'clf{i}', clf) for i, clf in enumerate(classifiers)], voting='hard')
	ensemble.fit(X, y)

	# Evaluate the ensemble and base classifiers
	ensemble_accuracy = accuracy_score(y, ensemble.predict(X))
	base_accuracies = [accuracy_score(y, clf.predict(X)) for clf in classifiers]

	# We don't want 'bad' examples
	passed = all(acc < ensemble_accuracy for acc in base_accuracies)

	if passed:
	n_passed = n_passed + 1

	# Check if this combination is the best so far
	if ensemble_accuracy > best_ensemble_accuracy and passed:
	best_ensemble_accuracy = ensemble_accuracy
	best_combination = (seed, params)

	#print("----")

	# Output the results
	print(f"Best ensemble accuracy: {best_ensemble_accuracy:.2f}")
	print(f"Best combination: Seed={best_combination[0]}, Parameters={best_combination[1]}")