yamasakih/mishimasyk16.ipynb

## mishimasyk16.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pigeonXT import annotate\n",
    "\n",
    "from IPython.display import Image\n",
    "from IPython.display import Video\n",
    "import nglview as nv\n",
    "import pandas as pd\n",
    "from rdkit import Chem"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. Binary classification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "examples = ['I love this movie', 'I was really disappointed by the book', 'He likes this cookie']\n",
    "binary_labels = ['positive', 'negative']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "annotations = annotate(examples=examples, options=binary_labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "annotations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "以下のようにすればDataFrameに加工できる。`final_process_fn` で後処理も指定可能。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(annotations.items(), columns=['sentence', 'label'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. Multi classification "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "multi_classification_labels = ['positive', 'negative', 'inbetween']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "annotations = annotate(examples=examples, options=multi_classification_labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(annotations.items(), columns=['sentence', 'label'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. Multi label "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "movie_examples = [\"Star wars\", \"Mask\", \"Frozen\", \"Home alone\", \"Kiki's Delivery Service\", \"Devil's Blade\"]\n",
    "multi_labels = [\"Romance\", \"Commedy\", \"Fantasy\", \"Science fiction\", \"Horror\", \"Anime\", \"Magician\", \"Devil\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "annotate?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`task_type='multilabel-classification'` で複数のラベルを選択できる"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "annotations = annotate(examples=movie_examples, options=multi_labels,\n",
    "    task_type='multilabel-classification', reset_buttons_after_click=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "annotations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4. Image labeling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "image_examples = ['images/001.jpg', 'images/002.jpg', 'images/003.jpg']\n",
    "image_labels = ['イワトビペンギン', '皇帝ペンギン']\n",
    "\n",
    "def display_image(img):\n",
    "    return display(Image(img))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`display_fn` に関数を指定することで `examples` の表示の仕方をカスタマイズできる"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "annotations = annotate(examples=image_examples, options=image_labels, display_fn=display_image)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(annotations.items(), columns=['sentence', 'label'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "匿名関数も利用可能"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "annotations = annotate(examples=image_examples, options=image_labels, display_fn=lambda img: display(Image(img)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5. Video labeling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "video_examples = ['videos/001.mp4']\n",
    "video_labels = ['dog', 'penguin']\n",
    "\n",
    "def display_video(video):\n",
    "    return display(Video(video, height=360))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "annotations = annotate(examples=video_examples, options=video_labels, display_fn=display_video)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(annotations.items(), columns=['sentence', 'label'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 6. Chemical structure labeling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "structure_examples = ['c1ccccc1', 'c1ccccc1Cl']\n",
    "structure_labels = ['OK', 'NG']\n",
    "\n",
    "def display_structure(smiles):\n",
    "    return display(Chem.MolFromSmiles(smiles))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "annotations = annotate(examples=structure_examples, options=structure_labels, display_fn=display_structure)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(annotations.items(), columns=['sentence', 'label'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7. Ligand protein binding pose labeling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "binding_pose_examples = ['pdb/6lu7.pdb', 'pdb/4b05.pdb']\n",
    "binding_pose_labels = ['OK', 'NG']\n",
    "\n",
    "def display_binding_pose(pdb):\n",
    "    view = nv.show_file(pdb, default=False)\n",
    "    view.layout.width = '640px'\n",
    "    view.layout.height = '640px'\n",
    "    \n",
    "    view[0].add_line(selection='protein')\n",
    "    view[0].add_ball_and_stick(selection='not protein', color='green')\n",
    "    view[0].center(selection='HETATM')\n",
    "    view[0].add_surface(selection='protein and not _H', opacity=0.7, counter=True)\n",
    "    view[0].add_contact()\n",
    "    view[0].add_cartoon(selection='protein', color='residueindex', clipDist=100, aspectRatio=0.1)\n",
    "    return display(view)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "annotations = annotate(examples=binding_pose_examples, options=binding_pose_labels, display_fn=display_binding_pose)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(annotations.items(), columns=['sentence', 'label'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Have a nice labeling life :)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"from pigeonXT import annotate\n",
	"\n",
	"from IPython.display import Image\n",
	"from IPython.display import Video\n",
	"import nglview as nv\n",
	"import pandas as pd\n",
	"from rdkit import Chem"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 1. Binary classification"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"examples = ['I love this movie', 'I was really disappointed by the book', 'He likes this cookie']\n",
	"binary_labels = ['positive', 'negative']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"annotations = annotate(examples=examples, options=binary_labels)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"annotations"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"以下のようにすればDataFrameに加工できる。`final_process_fn` で後処理も指定可能。"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"pd.DataFrame(annotations.items(), columns=['sentence', 'label'])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 2. Multi classification "
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"multi_classification_labels = ['positive', 'negative', 'inbetween']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"annotations = annotate(examples=examples, options=multi_classification_labels)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"pd.DataFrame(annotations.items(), columns=['sentence', 'label'])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 3. Multi label "
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"movie_examples = [\"Star wars\", \"Mask\", \"Frozen\", \"Home alone\", \"Kiki's Delivery Service\", \"Devil's Blade\"]\n",
	"multi_labels = [\"Romance\", \"Commedy\", \"Fantasy\", \"Science fiction\", \"Horror\", \"Anime\", \"Magician\", \"Devil\"]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"annotate?"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"`task_type='multilabel-classification'` で複数のラベルを選択できる"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"annotations = annotate(examples=movie_examples, options=multi_labels,\n",
	" task_type='multilabel-classification', reset_buttons_after_click=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"annotations"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 4. Image labeling"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"image_examples = ['images/001.jpg', 'images/002.jpg', 'images/003.jpg']\n",
	"image_labels = ['イワトビペンギン', '皇帝ペンギン']\n",
	"\n",
	"def display_image(img):\n",
	" return display(Image(img))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"`display_fn` に関数を指定することで `examples` の表示の仕方をカスタマイズできる"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"annotations = annotate(examples=image_examples, options=image_labels, display_fn=display_image)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"pd.DataFrame(annotations.items(), columns=['sentence', 'label'])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"匿名関数も利用可能"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"annotations = annotate(examples=image_examples, options=image_labels, display_fn=lambda img: display(Image(img)))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 5. Video labeling"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"video_examples = ['videos/001.mp4']\n",
	"video_labels = ['dog', 'penguin']\n",
	"\n",
	"def display_video(video):\n",
	" return display(Video(video, height=360))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"annotations = annotate(examples=video_examples, options=video_labels, display_fn=display_video)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"pd.DataFrame(annotations.items(), columns=['sentence', 'label'])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 6. Chemical structure labeling"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"structure_examples = ['c1ccccc1', 'c1ccccc1Cl']\n",
	"structure_labels = ['OK', 'NG']\n",
	"\n",
	"def display_structure(smiles):\n",
	" return display(Chem.MolFromSmiles(smiles))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"annotations = annotate(examples=structure_examples, options=structure_labels, display_fn=display_structure)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"pd.DataFrame(annotations.items(), columns=['sentence', 'label'])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 7. Ligand protein binding pose labeling"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"binding_pose_examples = ['pdb/6lu7.pdb', 'pdb/4b05.pdb']\n",
	"binding_pose_labels = ['OK', 'NG']\n",
	"\n",
	"def display_binding_pose(pdb):\n",
	" view = nv.show_file(pdb, default=False)\n",
	" view.layout.width = '640px'\n",
	" view.layout.height = '640px'\n",
	" \n",
	" view[0].add_line(selection='protein')\n",
	" view[0].add_ball_and_stick(selection='not protein', color='green')\n",
	" view[0].center(selection='HETATM')\n",
	" view[0].add_surface(selection='protein and not _H', opacity=0.7, counter=True)\n",
	" view[0].add_contact()\n",
	" view[0].add_cartoon(selection='protein', color='residueindex', clipDist=100, aspectRatio=0.1)\n",
	" return display(view)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"annotations = annotate(examples=binding_pose_examples, options=binding_pose_labels, display_fn=display_binding_pose)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"pd.DataFrame(annotations.items(), columns=['sentence', 'label'])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Have a nice labeling life :)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.7"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}