Skip to content

Instantly share code, notes, and snippets.

@e96031413
Last active March 9, 2022 08:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save e96031413/55cca80f42849a33f5da5d448a064fa4 to your computer and use it in GitHub Desktop.
Save e96031413/55cca80f42849a33f5da5d448a064fa4 to your computer and use it in GitHub Desktop.
# 針對每個元件計算其Overkill rate和Leakage rate
# 目前只適用於batch size = 1的情形
# 用來產生overkill和leakage數值的dataframe
import pandas as pd
import torch
test_df_mapping2_label = test_df.copy() #複製一份要mapping到2個label的testing資料
test_df_mapping2_label.loc[test_df_mapping2_label['class'] == 0, 'class'] = 0 #將大於1的label轉成1
test_df_mapping2_label.loc[test_df_mapping2_label['class'] == 1, 'class'] = 1
test_df_mapping2_label.loc[test_df_mapping2_label['class'] == 2, 'class'] = 1
test_df_mapping2_label.loc[test_df_mapping2_label['class'] == 3, 'class'] = 1
test_df_mapping2_label.loc[test_df_mapping2_label['class'] == 4, 'class'] = 1
test_df_mapping2_label.loc[test_df_mapping2_label['class'] == 5, 'class'] = 1
name_of_each_component = test_df_mapping2_label['component_name'].value_counts().index.tolist() # 取得每個元件的名稱
num_of_image_in_each_component = test_df_mapping2_label['component_name'].value_counts().values # 取得每個元件的圖片總數量
test_component_name_df = pd.DataFrame(list(zip(name_of_each_component, num_of_image_in_each_component)), columns =['component_name', 'total']) #產生包含所有元件名稱和總數的資料
for name in set(test_df_mapping2_label['component_name'].values): # 對所有的元件名稱
temp_data = test_df_mapping2_label.loc[(test_df_mapping2_label["component_name"] == name)] # 選取出符合指定元件名稱的資料(如R0402)
for k, v in zip(temp_data['class'].value_counts().keys(), temp_data['class'].value_counts()): # 取得該元件下的k(class名稱)及v(class中樣本的數量)
if k == 0: #如果k=0,即Good label,新增新欄位good,給予在該元件名稱下其label為Good的樣本數量
test_component_name_df.loc[test_component_name_df['component_name'] == name, 'good'] = temp_data['class'].value_counts().sort_index().values[0]
elif k ==1: #如果k=1,即Bad label,新增新欄位bad,給予在該元件名稱下其label為bad的樣本數量
try: # 通常來說第0個為Good的資料、第1個為Bad的資料;但有可能有些元件只有Bad的資料,資料只會在第0個出現,這時就會執行except的部份
test_component_name_df.loc[test_component_name_df['component_name'] == name, 'bad'] = temp_data['class'].value_counts().sort_index().values[1]
except:
print(f"{name} only contains bad label.")
test_component_name_df.loc[test_component_name_df['component_name'] == name, 'bad'] = temp_data['class'].value_counts().sort_index().values[0]
test_component_name_df['good'] = test_component_name_df['good'].fillna(0).astype(int) # 將NAN值填0並轉成INT格式
test_component_name_df['bad'] = test_component_name_df['bad'].fillna(0).astype(int) # 將NAN值填0並轉成INT格式
test_component_name_df = test_component_name_df[['component_name', 'total', 'good', 'bad']] # 欄位排序
col = {'overkill': 0, 'leakage': 0} # 新增overkill和leakage的預設值資料
test_component_name_df = test_component_name_df.assign(**col) # 使用assign將所有的overkill和leakage欄位都填上0
def testing(test_component_name_df, , .........):
.....
preds = torch.tensor([(lambda i: 1 if i > 1 else i)(i) for i in preds]).cuda() # 將預測結果大於1的轉成1
target = torch.tensor([(lambda i: 1 if i > 1 else i)(i) for i in target]).cuda() # 將label大於1的轉成1
.....
if args.batch_size==1: # batch size等於1的情況下才執行以下動作,省去處理元件名稱的問題,1筆資料對1筆資料
if target.data == 0 and target.data == preds.data: # True Positive的情況
tp += 1
elif target.data == 0 and target.data != preds.data: # False Negative(Overkill)的情況
fn += 1
test_component_name_df.loc[(test_component_name_df["component_name"] == ''.join(component_name)), 'overkill'] +=1 #找該batch下的元件名稱資料,並在overkill中加1
elif target.data == 1 and target.data == preds.data: # True Negative的情況
tn += 1
elif target.data == 1 and target.data != preds.data: # False Postive(Leakage)的情況
fp += 1
test_component_name_df.loc[(test_component_name_df["component_name"] == ''.join(component_name)), 'leakage'] +=1 #找該batch下的元件名稱資料,並在leakage中加1
return test_component_name_df #回傳蒐集到的資料
# 計算overkill rate和leakage rate、取到下數後2位、加上百分比符號
test_component_name_df['overkill_rate'] = (test_component_name_df['overkill'] / test_component_name_df['total'] * 100).round(decimals = 2).astype(str) + '%'
test_component_name_df['leakage_rate'] = (test_component_name_df['leakage'] / test_component_name_df['total'] * 100).round(decimals = 2).astype(str) + '%'
# 計算整體測試樣本最終之overkill rate和leakage rate
test_component_name_df = test_component_name_df.append({'total':sum(test_component_name_df['total']),'good':sum(test_component_name_df['good']),'bad':sum(test_component_name_df['bad']),'overkill':sum(test_component_name_df['overkill']), 'leakage':sum(test_component_name_df['leakage']), 'overkill_rate':sum(test_component_name_df['overkill'])/sum(test_component_name_df['total']), 'leakage_rate': sum(test_component_name_df['leakage'])/sum(test_component_name_df['total'])}, ignore_index=True)
# 將資料輸出成Excel格式
test_component_name_df.to_excel('./overkill_and_leakage.xlsx', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment