Skip to content

Instantly share code, notes, and snippets.

@ezralanglois
Last active November 21, 2022 15:04
Show Gist options
  • Save ezralanglois/2647a183fab2a8925de1c15e998e2987 to your computer and use it in GitHub Desktop.
Save ezralanglois/2647a183fab2a8925de1c15e998e2987 to your computer and use it in GitHub Desktop.
Plot Q30 per cycle using the Illumina InterOp library
# Tested with
# - Anaconda Python 3.7
# - Illumina InterOp 1.1.12
import interop.py_interop_plot as interop_plot
import interop.py_interop_run as interop_run
import interop.py_interop_run_metrics as interop_run_metrics
import pandas as pd
run_folder = ""
metric = "Q30Percent"
# List of metrics can be found with
# metrics = interop_run.string_vector()
# interop_run.list_metric_type(metrics)
# [metrics[m] for m in range(metrics.size())]
#
# However only a subset can be plotted by cycle; this can determined with the following
#
# feature = interop_run.parse_metric_feature_type('CycleFeature')
# metrics = interop_run.string_vector()
# interop_run.list_metric_type(metrics)
# [metrics[m] for m in range(metrics.size()) if (interop_run_metrics.to_feature(interop_run.parse_metric_type(metrics[m])) & feature) == feature ]
#
# List can be found here:
# https://github.com/Illumina/interop/blob/master/interop/constants/enums.h#L41
#
# Caveat 1: not all metrics are available on every platform. You will get NaN if the metric is not available.
# For example, Signal To Noise is only available on MiSeq
#
# Caveat 2: Certain plots are box plots/candle stick plots. Any plot that returns multiple curves is an average, so only
# x/y are defined; e.g. Intensity returns a plot per channel. The interface for these plots is the same.
# Load only the metrics necessary to plot % >= Q30
metric = interop_run.parse_metric_type(metric)
valid_to_load = interop_run.uchar_vector(interop_run.MetricCount, 0)
interop_run_metrics.list_metrics_to_load_by_type(metric, valid_to_load)
run_metrics = interop_run_metrics.run_metrics()
run_metrics.read(run_folder, valid_to_load)
# Plot everything
options = interop_plot.filter_options(run_metrics.run_info().flowcell().naming_method())
plot_data = interop_plot.candle_stick_plot_data()
interop_plot.plot_by_cycle(run_metrics, metric, options, plot_data)
if plot_data.size() == 1:
# Convert candlestick (kind of box plot) to a pandas DataFrame
# https://en.wikipedia.org/wiki/Box_plot
# P25 = 25th percentile
# P50 = median
# P75 = 75th percentile
# Lower Whisker = lowest data point excluding any outliers or p25 - 1.5 * (p75-p25)
# Upper Whisker = highest data point excluding any outliers or p75 + 1.5 * (p75-p25)
columns = ["x", "lower", "p25", "p50", "p75", "upper"]
data = []
for j in range(plot_data.at(0).size()):
data_point = plot_data.at(0).at(j)
data.append([data_point.x()
, data_point.lower()
, data_point.p25()
, data_point.p50()
, data_point.p75()
, data_point.upper()])
elif plot_data.size() > 1:
# Convert several x/y curves where the tiles are averaged to a pandas DataFrame
columns = ["x", "y", "label"]
data = []
for i in range(plot_data.size()):
title = plot_data.at(i).title()
for j in range(plot_data.at(i).size()):
data_point = plot_data.at(i).at(j)
data.append([data_point.x(), data_point.y(), title])
df = pd.DataFrame(data=data, columns=columns)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment