Skip to content

Instantly share code, notes, and snippets.

@kohnakagawa
Created August 11, 2019 02:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kohnakagawa/ecc0dfdf3e81c0a69531fb1602101bdf to your computer and use it in GitHub Desktop.
Save kohnakagawa/ecc0dfdf3e81c0a69531fb1602101bdf to your computer and use it in GitHub Desktop.
Malware Data Science chapter 8の内容をEmberで使われている特徴量で実施した場合の結果
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import os\n",
"import glob\n",
"import sklearn\n",
"import ember\n",
"import numpy as np\n",
"import yara\n",
"import pickle"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"b_paths = glob.glob(os.path.join(\"data\", \"benignware\", \"*\"))\n",
"m_paths = glob.glob(os.path.join(\"data\", \"malware\", \"*\"))\n",
"labels = [0 for _ in b_paths] + [1 for _ in m_paths]\n",
"data_paths = b_paths + m_paths"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"rule = yara.compile(source='rule IsPeFile {strings:$mz = \"MZ\"condition:$mz at 0 and uint32(uint32(0x3C)) == 0x4550}')\n",
"path_labels = [(p, l) for p, l in zip(data_paths, labels) if rule.match(p)]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"fextractor = ember.PEFeatureExtractor()\n",
"fvector = np.array([fextractor.feature_vector(bytez=open(p, \"rb\").read()) for p, _ in path_labels])\n",
"labels = np.array([l for _, l in path_labels])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"with open(\"fvector.pickle\", \"wb\") as fb:\n",
" pickle.dump(fvector, fb)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"X = fvector\n",
"y = labels\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
" \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
]
}
],
"source": [
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.model_selection import KFold\n",
"\n",
"clf = RandomForestClassifier()\n",
"kf = KFold(n_splits=4)\n",
"results = []\n",
"for train_idx, test_idx in kf.split(X_train, y_train):\n",
" clf.fit(X_train[train_idx], y_train[train_idx])\n",
" y_pred = clf.predict(X_train[test_idx])\n",
" results.append(accuracy_score(y_train[test_idx], y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.978494623655914\n"
]
}
],
"source": [
"y_pred = clf.predict(X_test)\n",
"print(accuracy_score(y_test, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import roc_curve, auc\n",
"y_pred_prob = clf.predict_proba(X_test)[:,1]\n",
"fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)\n",
"roc_auc = auc(fpr, tpr)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"plt.title('Receiver Operating Characteristic')\n",
"plt.plot(fpr, tpr, 'b', label = 'AUC = %0.5f' % roc_auc)\n",
"plt.legend(loc = 'lower right')\n",
"plt.xlim([0, 0.03])\n",
"plt.ylim([0, 1])\n",
"plt.ylabel('True Positive Rate')\n",
"plt.xlabel('False Positive Rate')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f7908ba6668>"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAa8AAAD4CAYAAABbl2n6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAcZUlEQVR4nO3deZwdVZ3+8c/DviSGTZgAQmtgCFsSSUBZBXH4ubEoYUAWiTpmcEURfi4wGBwcYRB/riwRMSBxxIBIAGURkE2C6YYshEVkERF+IlsIhGASnvnjnuCl6U53ktv3prqf9+uVV1edOlX1PbdDHk5V3Xtlm4iIiCpZpdUFRERELKuEV0REVE7CKyIiKifhFRERlZPwioiIylmt1QUMFBtttJHb2tpaXUZERKV0dHQ8ZfuNndsTXk3S1tZGe3t7q8uIiKgUSX/qqj2XDSMionIy82qSex97mtEnXNjqMiIimqrjjA/3yXEz84qIiMpJeEVEROUkvCIionISXhERUTkJr4iIqJyVIrwktUm6u4v2r0l611L2O0jSdn1bXURErGxWivDqju2Tbf9mKV0OAhoSXpLytoGIiIpYmcJrVUk/lDRH0rWS1pY0SdJYAEmnSbpH0ixJ35S0G3AAcIakGZKGSRolaVrpc5mk9cu+O5e2GZLOWDLLkzRO0lRJNwDXSxok6XpJd0qaLenA0q9N0n2lnj9ImizpXZJuk/SApF1a9JpFRAxIK1N4bQ38wPb2wHPAwUs2SNoQ+ACwve0RwKm2fwdMBU6wPcr2g8CFwBdLn9nAV8shfgz8u+1RwOJO590JGGv7HcAC4AO2dwL2Ac6UpNJvK+BMYHj5cziwB3A88JWuBiRpvKR2Se2L5s9b7hcmIiJea2UKr4dtzyjLHUBb3ba51ILlR5I+CMzvvLOkIcB6tm8qTRcAe0laDxhs+/bS/tNOu15n+5klhwH+S9Is4DfAZsAmdfXNtv0KMAe43raphWQbXbA90fYY22NWW2dwz69ARET0ysoUXi/XLS+m7qOrbC8CdgEuAd4PXN3A875Yt3wE8EZgdJml/RVYq4v6Xqlbf4V8zFZERFOtTOHVLUmDgCG2fwV8HhhZNs0DBgPYngs8K2nPsu0o4CbbzwHzJL2ttB+2lFMNAZ60vVDSPsCWDR5KREQ0QFVmDIOByyWtRe3S3nGl/WfADyV9FhgLHA2cI2kd4CHgI6Xfx0q/V4CbqF2G7Mpk4ApJs4F24L6+GExERKwY1W7b9G+SBtl+oSx/CRhq+9hm1rDuP73Zw486pZmnjIhouRX9VHlJHbbHdG6vysxrRb1P0pepjfdPwLjWlhMREStiQISX7YuBi1tdR0RENEYlHtiIiIiol/CKiIjKGRCXDVcG226+Ie199HXYEREDTWZeERFROQmviIionIRXRERUTsIrIiIqJw9sNMnfn5jDo1/bsdVlRCdbnDy71SVExHLIzCsiIion4RUREZWT8IqIiMpJeEVEROUkvCIionL6NLwktUm6exn6f6WP6thb0m59ceyIiGi+lW3m1SfhBewNdBlekvJ2gYiIimlGeK0mabKkeyVdIum9kn65ZKOkf5F0maTTgLUlzZA0uWw7UtLvS9u5klYt7ftJul3SnZKmSBpU2h+RdEppny1puKQ24Bjg8+U4e0qaJOkcSXcA/y1pA0m/lDRL0jRJI8rxJkj6STnXA5I+XtovlHRQ3RgmSzqwCa9lRETQnPDaBjjL9rbA88D2wHBJbyzbPwKcb/tLwEu2R9k+QtK2wKHA7rZHAYuBIyRtBJwEvMv2TkA7cFzd+Z4q7WcDx9t+BDgH+H/l2LeUfpsDu9k+DjgFuMv2CGqzvwvrjjcCeCewK3CypE2BH1G+jVnSEGqzuqsa8WJFRETPmhFef7Z9W1m+CNgd+AlwpKT1qIXCr7vYb19gNDBd0oyy/hbg7cB2wG2l/Whgy7r9flF+dgBtS6lriu3FZXmPUhO2bwA2lPSGsu1y2y/Zfgq4EdjF9k3A1iWAPwRcantR5xNIGi+pXVL7My8u7rw5IiKWUzPu97iL9R8DVwALqIXI6/7hBwRcYPvLr2mU9geus/2hbs73cvm5mKWP78WeCq+rt6v1C4EjgcOozR5fv6M9EZgIMGKztTsfJyIillMzZl5bSNq1LB8O3Gr7ceBxapf/flzXd6Gk1cvy9cBYSRsDlPtSWwLTgN0lbVXa15X0zz3UMA8YvJTttwBHlOPtTe3S4/Nl24GS1pK0IbUHP6aX9knA5wBs39PD+SMiooGaEV73A5+SdC+wPrV7UQCTqV1SvLeu70RglqTJJRBOAq6VNAu4Dhhq+2/U7jf9T2m/HRjeQw1XAB9Y8sBGF9snAKPL8U6jdilyiVnULhdOA/6zBC+2/wrcy2vDNyIimkB2a65mSfo+tYckftSSAnpB0gTgBdvf7GLbOsBsYCfbc3s61ojN1vaV/75V44uMFZJPlY9YuUnqsD2mc3tL3uclqYPaU3wXteL8K0rSu6jNur7Xm+CKiIjGaskbdG2PbsV5l5XtCd20/4bXPuEYERFNtLJ9wkZERESPEl4REVE5+Vy/Jllj6PZscXJ7q8uIiOgXMvOKiIjKSXhFRETlJLwiIqJyEl4REVE5eWCjSe578j52/97urS4j+tBtn7mt504R0RCZeUVEROUkvCIionISXhERUTkJr4iIqJyEV0REVE7CKyIiKqffhpekz5UvjOxu+3mStmtmTRER0Rj9NryAzwFdhpekVW3/m+17mlxTREQ0QL8IL0nrSrpK0kxJd0v6KrApcKOkG0ufFySdKWkmsKuk30oaU7ft62X/aZI2Ke3DyvpsSadKeqG0D5V0s6QZ5Xx7tmjoEREDUr8IL+DdwOO2R9reAfg28Diwj+19Sp91gTtKn1s77b8uMM32SOBm4OOl/TvAd2zvCDxW1/9w4Brbo4CRwIyuipI0XlK7pPaFLyxswDAjIgL6T3jNBv5F0umS9rQ9t4s+i4FLu9n/78CVZbkDaCvLuwJTyvJP6/pPBz4iaQKwo+15XR3U9kTbY2yPWX3Q6r0eTERELF2/CC/bfwB2ohZip0o6uYtuC2wv7uYQC227LC+mh898tH0zsBfwF2CSpA8vX+UREbE8+kV4SdoUmG/7IuAMakE2Dxi8goeeBhxclg+rO9+WwF9t/xA4r5wvIiKapL98qvyOwBmSXgEWAp+gdsnvakmP1933WlafAy6SdCJwNbDkcuTewAmSFgIvAJl5RUQ0kf5xtSw6K+8Te8m2JR0GfMj2gctzrEFbDPLIE0Y2tsBYqeQrUSIaT1KH7TGd2/vLzKuvjAa+L0nAc8BHW1xPRESQ8Foq27dQexQ+IiJWIv3igY2IiBhYMvNqkuEbD889kYiIBsnMKyIiKifhFRERlZPwioiIykl4RURE5SS8mmTe/fdz017vaHUZERH9QsIrIiIqJ+EVERGVk/CKiIjKSXhFRETlJLwiIqJyEl4REVE5Ca8VJOm3kl73XTMREdF3El49UE1ep4iIlUhl/1GW9B+S7pd0q6T/kXS8pGGSrpbUIekWScNL30mSvivpd5IekjS27jgnSJouaZakU0pbWzn2hcDdwJsknS2pXdKcJf0iIqI1KvmVKJJ2Bg6m9kWRqwN3Ah3AROAY2w9IehtwFvDOsttQYA9gODAVuETSfsDWwC6AgKmS9gIeLe1H255Wznmi7WckrQpcL2mE7Vk91DkeGA+wyZprNmz8EREDXSXDC9gduNz2AmCBpCuAtYDdgCmSlvSrT4xf2n4FuEfSJqVtv/LnrrI+iFpoPQr8aUlwFf9awmg1akG4HbDU8LI9kVqgss3gwV6egUZExOtVNby6sgrwnO1R3Wx/uW5ZdT+/Yfvc+o6S2oAX69bfDBwP7Gz7WUmTqIVlRES0QFXved0G7C9pLUmDgPcD84GHJR0Crz5oMbKH41wDfLQcA0mbSdq4i35voBZmc8us7T2NGkhERCy7Ss68bE+XNJXaZbu/ArOBucARwNmSTqJ2L+xnwMylHOdaSdsCt5dLjS8ARwKLO/WbKeku4D7gz9TCMyIiWkR2NW/FSBpk+wVJ6wA3A+Nt39nqurqzzeDBnvjWnXjHzTe1upSIiMqQ1GH7de+lreTMq5goaTtq954uWJmDKyIiGquy4WX78FbXEBERrVHVBzYiImIAS3hFRETlJLyaZPA22+RhjYiIBkl4RURE5SS8IiKichJeERFROQmvJnnysbl8/wtXtLqMiIh+IeEVERGVk/CKiIjKSXhFRETlJLwiIqJyEl4REVE5Ca+IiKicfhFektok3d0Hx31E0kaNPm5ERKyYfhFeKwtJq7a6hoiIgaA/hdeqkn4oaY6kayWtLWmYpKsldUi6RdJwAEn7S7pD0l2SfiNpk9K+Ydl3jqTzAC05uKQjJf1e0gxJ5y4JKkkvSDpT0kxg11YMPCJioOlP4bU18APb2wPPAQcDE4HP2B4NHA+cVfreCrzd9luBnwH/t7R/Fbi1HOMyYAsASdsChwK72x4FLAaOKPusC9xhe6TtW+sLkjReUruk9hfmz+2TQUdEDESV/SblLjxse0ZZ7gDagN2AKdKrE6g1y8/NgYslDQXWAB4u7XsBHwSwfZWkZ0v7vsBoYHo51trAk2XbYuDSrgqyPZFagLLFP23tFRteREQs0Z/C6+W65cXAJsBzZabU2feAb9meKmlvYEIPxxZwge0vd7Ftge3Fy1FvREQsp/502bCz54GHJR0CoJqRZdsQ4C9l+ei6fW4GDi/93wOsX9qvB8ZK2rhs20DSln1cf0REdKM/hxfU7kt9rDxMMQc4sLRPoHY5sQN4qq7/KcBekuZQu3z4KIDte4CTgGslzQKuA4Y2ZQQREfE6/eKyoe1HgB3q1r9Zt/ndXfS/HLi8i/angf26OcfFwMVdtA9a9oojImJF9PeZV0RE9EMJr4iIqJyEV0REVE7CKyIiKifh1SQbbz6ET5+5f6vLiIjoFxJeERFROQmviIionIRXRERUTsIrIiIqp198wkYVPPHwg3z9yLErdIwTL7qkQdVERFRbZl4REVE5Ca+IiKichFdERFROwisiIion4RUREZUz4MNLUpukw+vWx0j6bitrioiIpRvw4QW0Aa+Gl+12259tXTkREdGTyoeXpHUlXSVppqS7JR0qabSkmyR1SLpG0tDSdytJvyl975Q0DDgN2FPSDEmfl7S3pCtL/w0k/VLSLEnTJI0o7RMknS/pt5IekpSwi4hoov7wJuV3A4/bfh+ApCHAr4EDbf9N0qHA14GPApOB02xfJmktauH9JeB42+8v++9dd+xTgLtsHyTpncCFwKiybTiwDzAYuF/S2bYX1hcmaTwwHmDIOms3fuQREQNUfwiv2cCZkk4HrgSeBXYArpMEsCrwhKTBwGa2LwOwvQCg9OnOHsDBpf8NkjaU9Iay7SrbLwMvS3oS2AR4rH5n2xOBiQCbbbi+GzDWiIigH4SX7T9I2gl4L3AqcAMwx/au9f1KeDXSy3XLi+kHr2VERFX0h3temwLzbV8EnAG8DXijpF3L9tUlbW97HvCYpINK+5qS1gHmUbv015VbgCNK/72Bp2w/36cDioiIHvWH2cKOwBmSXgEWAp8AFgHfLfe/VgO+DcwBjgLOlfS10vcQYBawWNJMYBJwV92xJwDnS5oFzAeObsaAIiJi6WTnVkwzbLbh+v7ke/ZdoWPkU+UjYqCR1GF7TOf2yl82jIiIgSfhFRERlZPwioiIykl4RURE5fSHpw0rYeibh+WBi4iIBsnMKyIiKifhFRERlZPwioiIykl4RURE5eSBjSZZ8MQ87v36DU0737YnvrNp54qIaLbMvCIionISXhERUTkJr4iIqJyEV0REVE7CKyIiKqey4SXpd00+X5ukw5t5zoiI6Fplw8v2bs06l6TVgDYg4RURsRKo7Pu8JL1ge5CkvYFTgOeAHYGfA7OBY4G1gYNsPyhpErAAGAO8ATjO9pWS1gLOLu2LSvuNksYBHwQGAasCawLbSpoBXABcC/wYWIPa/wQcbPuBZow9ImKgq2x4dTIS2BZ4BngIOM/2LpKOBT4DfK70awN2AYYBN0raCvgUYNs7ShoOXCvpn0v/nYARtp8pIXm87fcDSPoe8B3bkyWtQS3gXkPSeGA8wNAhG/fBsCMiBqbKXjbsZLrtJ2y/DDxIbVYEtRlYW12/n9t+pcyQHgKGA3sAFwHYvg/4E7AkvK6z/Uw357wd+IqkLwJb2n6pcwfbE22PsT1mg3XXW7ERRkTEq/pLeL1ct/xK3forvHZ26U77dV7v7MXuNtj+KXAA8BLwK0n5PKaIiCbpL+HVW4dIWkXSMOAtwP3ALcARAOVy4RalvbN5wOAlK5LeAjxk+7vA5cCIPq49IiKK/nLPq7ceBX5P7YGNY2wvkHQWcLak2dQe2Bhn+2VJnfedBSyWNBOYRO0BjqMkLQT+P/BfTRpDRMSAJ7unK2f9Q3na8Erbl7Ti/Dtsto2nfPLspp0vnyofEf2BpA7bYzq3D7TLhhER0Q8MmMuGtse1uoaIiGiMzLwiIqJyEl4REVE5A+ayYautNXRwHqKIiGiQzLwiIqJyEl4REVE5Ca+IiKichFdERFROHthokscff5wJEya8pq3zekRE9E5mXhERUTkJr4iIqJyEV0REVE7CKyIiKifhFRERlZPw6iVJ4yRt2uo6IiIi4dUrklYFxgEJr4iIlUClw0vSkZJ+L2mGpHMlbSnpAUkbSVpF0i2S9pPUJuk+SZMl3SvpEknrlGPsK+kuSbMlnS9pzdL+iKTTJd0JfAgYA0wu51pb0mmS7pE0S9I3W/gyREQMOJUNL0nbAocCu9seBSwG3gGcDpwNfAG4x/a1ZZdtgLNsbws8D3xS0lrAJOBQ2ztSe9P2J+pO87TtnWxfBLQDR5RzrQN8ANje9gjg1L4dbURE1KtseAH7AqOB6ZJmlPW32D4PeANwDHB8Xf8/276tLF8E7EEt0B62/YfSfgGwV90+F3dz7rnAAuBHkj4IzO+qk6Txktoltc+f32WXiIhYDlX+eCgBF9j+8msaa5cDNy+rg4B5Zdmd9u+83pUXu2q0vUjSLtQCcyzwaeB1X9ZleyIwEWDTTTftzfkiIqIXqjzzuh4YK2ljAEkbSNqS2mXDycDJwA/r+m8hadeyfDhwK3A/0CZpq9J+FHBTN+ebBwwu5xoEDLH9K+DzwMiGjSoiInpU2ZmX7XsknQRcK2kVYCFwHLAztftgiyUdLOkjwI3UgupTks4H7gHOtr2gbJ8iaTVgOnBON6ecBJwj6SXgPcDl5Z6ZynkjIqJJKhteALYv5vX3pd5et/2DAJLagEW2j+ziGNcDb+2iva3T+qXApXVNuyxn2RERsYKqfNkwIiIGqErPvHrL9iPADq2uIyIiGiMzr4iIqJyEV0REVI7svP2oGcaMGeP29vZWlxERUSmSOmyP6dyemVdERFROwisiIion4RUREZWT8IqIiMoZEO/zWhk8++y9/HzKin8ox78e8vsGVBMRUW2ZeUVEROUkvCIionISXhERUTkJr4iIqJyEV0REVE7CKyIiKmeZw0vSBEnHL2X7QZK2W47jvlB+birpkmXdv8G1HCPpw42oISIiGq8vZl4HAcscGEvYftz22M7tkpbnPWnLXIuk1WyfY/vC5ThfREQ0Qa/CS9KJkv4g6VZgm9L2cUnTJc2UdKmkdSTtBhwAnCFphqRhXfUr+79Z0u2SZks6te5cbZLuLsvjJE2VdANwfWk7oRxvlqRT6vb7cGmbKekn3dQyStK00u8ySeuXfX8r6duS2oFj62eXZb+rJXVIukXS8NJ+iKS7y/luXsHfQ0RELIMew0vSaOAwYBTwXmDnsukXtne2PRK4F/iY7d8BU4ETbI+y/WBX/cr+3wHOtr0j8MRSStgJGGv7HZL2A7YGdin1jJa0l6TtgZOAd5bzHNtNLRcCX7Q9ApgNfLXuPGvYHmP7zE7nnwh8xvZo4HjgrNJ+MvB/yvkO6Oa1Gy+pXVL7888vWsoQIyJiWfTmUtyewGW25wNImlradygzpvWAQcA13ezfXb/dgYPL8k+A07vZ/zrbz5Tl/cqfu8r6IGphNhKYYvspgLr+r5I0BFjP9k2l6QJgSl2Xi7vYZxCwGzBF0pLmNcvP24BJkn4O/KKrwm1PpBZ+DBu2br44LSKiQVbksw0nAQfZnilpHLD3cvTrzT/oL9YtC/iG7XPrO0j6TK8q7v15llgFeM72qM4bbB8j6W3A+4AOSaNtP92AOiIioge9ued1M3CQpLUlDQb2L+2DgSckrQ4cUdd/XtlGD/1uo3Y5kk7tS3MN8NEyI0LSZpI2Bm4ADpG0YWnfoHMttucCz0ras2w7CriJpbD9PPCwpEPKcSVpZFkeZvsO2ycDfwPe1MsxRETECuoxvGzfSe2S2kzg18D0suk/gDuohdB9dbv8DDhB0l2Shi2l37HApyTNBjbrTbG2rwV+Ctxe9rsEGGx7DvB14CZJM4FvdVPL0dQe4JhF7Z7Z13px2iOAj5XjzgEOLO1nlIdN7gZ+R+31iYiIJpCdWzHNMGzYuv7Gaduv8HHylSgRMZBI6rA9pnN7PmEjIiIqJ+EVERGVk/CKiIjKWZFH5WMZrL/+trlfFRHRIJl5RURE5SS8IiKicvKofJNImgfc3+o6mmwj4KlWF9ECA3HcGfPA0exxb2n7jZ0bc8+ree7v6r0K/Zmk9oE2ZhiY486YB46VZdy5bBgREZWT8IqIiMpJeDXPxFYX0AIDccwwMMedMQ8cK8W488BGRERUTmZeERFROQmviIionIRXg0l6t6T7Jf1R0pe62L6mpIvL9jsktTW/ysbqxZj3knSnpEWSxraixkbrxZiPk3SPpFmSrpe0ZSvqbLRejPuY8j13MyTdKmm7VtTZSD2Nua7fwZIsqeWPka+oXvyex0n6W/k9z5D0b00v0nb+NOgPsCrwIPAWYA1qX1C5Xac+nwTOKcuHARe3uu4mjLkNGAFcCIxtdc1NGvM+wDpl+RNV/z0vw7jfULd8AHB1q+vu6zGXfoOpfev8NGBMq+tuwu95HPD9VtaZmVdj7QL80fZDtv9O7ZucD+zU50DggrJ8CbCvJDWxxkbrccy2H7E9C3ilFQX2gd6M+Ubb88vqNGDzJtfYF3oz7ufrVtcFqv5EWG/+mwb4T+B0YEEzi+sjvR1zSyW8Gmsz4M9164+Vti772F4EzAU2bEp1faM3Y+5vlnXMHwN+3acVNUevxi3pU5IeBP4b+GyTausrPY5Z0k7Am2xf1czC+lBv/34fXC6LXyLpTc0p7R8SXhF9SNKRwBjgjFbX0iy2f2B7GPBF4KRW19OXJK0CfAv4QqtrabIrgDbbI4Dr+MfVpKZJeDXWX4D6/wPZvLR12UfSasAQ4OmmVNc3ejPm/qZXY5b0LuBE4ADbLzeptr60rL/rnwEH9WlFfa+nMQ8GdgB+K+kR4O3A1Io/tNHj79n203V/p88DRjeptlclvBprOrC1pDdLWoPaAxlTO/WZChxdlscCN7jcAa2o3oy5v+lxzJLeCpxLLbiebEGNfaE34966bvV9wANNrK8vLHXMtufa3sh2m+02avc3D7Dd3ppyG6I3v+ehdasHAPc2sT4gnyrfULYXSfo0cA21J3bOtz1H0teAdttTgR8BP5H0R+AZan8xKqs3Y5a0M3AZsD6wv6RTbG/fwrJXSC9/z2cAg4Ap5XmcR20f0LKiG6CX4/50mXEuBJ7lH/+jVkm9HHO/0ssxf1bSAcAiav+OjWt2nfl4qIiIqJxcNoyIiMpJeEVEROUkvCIionISXhERUTkJr4iIqJyEV0REVE7CKyIiKud/Af95FsLTkTi5AAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"fti = clf.feature_importances_ \n",
"idx = 0\n",
"feature_dim_names = []\n",
"for f in fextractor.features:\n",
" feature_dim_names.append((f.name, idx, idx + f.dim))\n",
" idx += f.dim\n",
"\n",
"importances = []\n",
"for name, idx_beg, idx_end in feature_dim_names:\n",
" importance_sum = np.sum(fti[idx_beg:idx_end])\n",
" importances.append(importance_sum)\n",
"\n",
"names = [f.name for f in fextractor.features] \n",
"sns.barplot(x=importances, y=names)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@kohnakagawa
Copy link
Author

AUCを比較すると、Emberのほうが優れていた。

Malware Data Scienceのモデルだと AUC が0.9951
Emberだと AUC が0.9972

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment