Skip to content

Instantly share code, notes, and snippets.

@GuokaiLiu
Last active October 21, 2020 02:58
Show Gist options
  • Save GuokaiLiu/24828bb60222d7083b90e27532b95274 to your computer and use it in GitHub Desktop.
Save GuokaiLiu/24828bb60222d7083b90e27532b95274 to your computer and use it in GitHub Desktop.
[构造失衡数据集] #data #imbalance
from ServerUtils import loadcwru
from imblearn.datasets import make_imbalance
from imblearn.over_sampling import BorderlineSMOTE, SMOTE, ADASYN, SVMSMOTE, RandomOverSampler
# Set parameters
datast = 'DataPre_CWRU_Demo'
outdim = 10
source = 'D'
Inshape = '1D'
resample = 'SMOTE'
# Load dataset
X_train, Y_train = loadcwru(datast, outdim, source, shape=Inshape, norm = True, tensor=False, onehot=True)
# Make imbalanced dataset
set_imbalance = {0:20, 1:100, 2:10, 3:100,4:100,5:100,6:100,7:100,8:100,9:100}
X_train_im, Y_train_im = make_imbalance(X_train, np.argmax(Y_train,axis=1),
sampling_strategy=set_imbalance,
random_state=42)
print('Before resampling:')
print(sorted(Counter(Y_train_im).items()))
# Resample for balance based on FIVE algorithms
algorithm = ['ADASYN', 'BorderlinneSMOTE','RandomOverSampler','RandomOverSampler','SVMSMOTE']
if resample == algorithm[0]:
oversample = ADASYN() # unavailable
elif resample == algorithm[1]:
oversample = BorderlineSMOTE() # unavailable
elif resample == algorithm[2]:
oversample = RandomOverSampler(random_state=0) # available
elif resample == algorithm[3]:
oversample = SMOTE() # unavailable
elif resample == algorithm[4]:
oversample = SVMSMOTE() # unavailable: cant't gurantee the same data number
else:
assert resample in algorithm, 'Please input the correct algorithm.'
X_train_im, Y_train_im = oversample.fit_resample(X_train_im, Y_train_im)
print('After resamlling:')
print(sorted(Counter(Y_train_im).items()))
Y_train = np.eye(10)[[i for i in Y_train_im]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment