Created
November 28, 2020 14:05
-
-
Save yamasakih/90243e8645f27c0113b5246367e75119 to your computer and use it in GitHub Desktop.
mishima-syk16-pigeonXT.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from pigeonXT import annotate\n", | |
"\n", | |
"from IPython.display import Image\n", | |
"from IPython.display import Video\n", | |
"import nglview as nv\n", | |
"import pandas as pd\n", | |
"from rdkit import Chem" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### 1. Binary classification" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"examples = ['I love this movie', 'I was really disappointed by the book', 'He likes this cookie']\n", | |
"binary_labels = ['positive', 'negative']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annotations = annotate(examples=examples, options=binary_labels)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annotations" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"以下のようにすればDataFrameに加工できる。`final_process_fn` で後処理も指定可能。" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pd.DataFrame(annotations.items(), columns=['sentence', 'label'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### 2. Multi classification " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"multi_classification_labels = ['positive', 'negative', 'inbetween']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annotations = annotate(examples=examples, options=multi_classification_labels)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pd.DataFrame(annotations.items(), columns=['sentence', 'label'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### 3. Multi label " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"movie_examples = [\"Star wars\", \"Mask\", \"Frozen\", \"Home alone\", \"Kiki's Delivery Service\", \"Devil's Blade\"]\n", | |
"multi_labels = [\"Romance\", \"Commedy\", \"Fantasy\", \"Science fiction\", \"Horror\", \"Anime\", \"Magician\", \"Devil\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annotate?" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"`task_type='multilabel-classification'` で複数のラベルを選択できる" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annotations = annotate(examples=movie_examples, options=multi_labels,\n", | |
" task_type='multilabel-classification', reset_buttons_after_click=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annotations" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### 4. Image labeling" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"image_examples = ['images/001.jpg', 'images/002.jpg', 'images/003.jpg']\n", | |
"image_labels = ['イワトビペンギン', '皇帝ペンギン']\n", | |
"\n", | |
"def display_image(img):\n", | |
" return display(Image(img))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"`display_fn` に関数を指定することで `examples` の表示の仕方をカスタマイズできる" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annotations = annotate(examples=image_examples, options=image_labels, display_fn=display_image)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pd.DataFrame(annotations.items(), columns=['sentence', 'label'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"匿名関数も利用可能" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annotations = annotate(examples=image_examples, options=image_labels, display_fn=lambda img: display(Image(img)))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### 5. Video labeling" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"video_examples = ['videos/001.mp4']\n", | |
"video_labels = ['dog', 'penguin']\n", | |
"\n", | |
"def display_video(video):\n", | |
" return display(Video(video, height=360))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annotations = annotate(examples=video_examples, options=video_labels, display_fn=display_video)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pd.DataFrame(annotations.items(), columns=['sentence', 'label'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### 6. Chemical structure labeling" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"structure_examples = ['c1ccccc1', 'c1ccccc1Cl']\n", | |
"structure_labels = ['OK', 'NG']\n", | |
"\n", | |
"def display_structure(smiles):\n", | |
" return display(Chem.MolFromSmiles(smiles))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annotations = annotate(examples=structure_examples, options=structure_labels, display_fn=display_structure)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pd.DataFrame(annotations.items(), columns=['sentence', 'label'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### 7. Ligand protein binding pose labeling" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"binding_pose_examples = ['pdb/6lu7.pdb', 'pdb/4b05.pdb']\n", | |
"binding_pose_labels = ['OK', 'NG']\n", | |
"\n", | |
"def display_binding_pose(pdb):\n", | |
" view = nv.show_file(pdb, default=False)\n", | |
" view.layout.width = '640px'\n", | |
" view.layout.height = '640px'\n", | |
" \n", | |
" view[0].add_line(selection='protein')\n", | |
" view[0].add_ball_and_stick(selection='not protein', color='green')\n", | |
" view[0].center(selection='HETATM')\n", | |
" view[0].add_surface(selection='protein and not _H', opacity=0.7, counter=True)\n", | |
" view[0].add_contact()\n", | |
" view[0].add_cartoon(selection='protein', color='residueindex', clipDist=100, aspectRatio=0.1)\n", | |
" return display(view)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annotations = annotate(examples=binding_pose_examples, options=binding_pose_labels, display_fn=display_binding_pose)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pd.DataFrame(annotations.items(), columns=['sentence', 'label'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Have a nice labeling life :)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
nglview
をJupyter Notebookで用いる際はextensionのインストールが必要です。nglview
のREADMEを参考にしてください。https://github.com/nglviewer/nglview#released-version