Skip to content

Instantly share code, notes, and snippets.

@alienzj
Created August 7, 2017 08:31
Show Gist options
  • Save alienzj/6125cba353995ca2fb328c01a4d22cc4 to your computer and use it in GitHub Desktop.
Save alienzj/6125cba353995ca2fb328c01a4d22cc4 to your computer and use it in GitHub Desktop.
Overall Accurate
#pragma once
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string_regex.hpp>
#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/program_options.hpp>
#include <boost/regex.hpp>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <tuple>
#include <cmath>
namespace bio
{
std::tuple<double, int, int, int>
calculate_cutoff(std::string& qual, double seed_oa_cutoff, double frag_oa_cutoff)
{
int qual_system = 33;
double min = 0.0;
double oa_0 = 1.0, oa_1 = 1.0, ca = 1.0;
int position = 0;
std::vector<int> qual_value_vec;
std::vector<double> correct_rate_vec;
for (auto& qual_base : qual)
{
int qual_value = static_cast<int>(qual_base) - qual_system;
qual_value_vec.push_back(qual_value);
double correct_rate = 1 - std::pow(10, -qual_value / 10);
correct_rate = (correct_rate == 0) ? 0.1 : correct_rate;
correct_rate_vec.push_back(correct_rate);
//std::cout << qual_base << " : " << qual_value << " : " << correct_rate << "\n";
}
std::vector<double> seed_oa(qual.size() - 4, 1);
int i = 0;
while (i < 5 )
{
seed_oa[0] *= correct_rate_vec[i];
++i;
}
while (i < qual.size())
{
seed_oa[i - 4] = seed_oa[i - 5] * correct_rate_vec[i] / correct_rate_vec[i - 5];
position = (seed_oa[i - 4] > seed_oa[position]) ? i - 4 : position;
++i;
if (seed_oa[position] > seed_oa_cutoff)
break;
}
//std::cout << position << std::endl;
i = position + 5;
ca = seed_oa[position];
while (i < qual.size())
{
double acc = correct_rate_vec[i];
if (acc < min)
{
oa_1 = ca * min;
min = acc;
}
else
{
oa_1 = ca * acc;
}
if (oa_1 < frag_oa_cutoff) break;
++i;
ca = oa_1;
}
int qual_sum = 0;
for (int j = position; j < i; ++j)
qual_sum += qual_value_vec[j];
i = i - position;
return { ca, qual_sum, position, i };
}
int filter_se_reads(const std::string& fq_path,
const int min_len,
double seed_oa_cutoff,
double frag_oa_cutoff)
{
std::stringstream fastq_str;
std::ifstream fq_file(fq_path, std::ios_base::in | std::ios_base::binary);
try
{
boost::iostreams::filtering_istreambuf in;
in.push(boost::iostreams::gzip_decompressor());
in.push(fq_file);
boost::iostreams::copy(in, fastq_str);
}
catch (const boost::iostreams::gzip_error& e)
{
std::cerr << e.what() << "\n";
}
const std::string fq_path_filter = boost::algorithm::replace_last_copy(fq_path, "fq", "clean.fq");
std::ofstream fq_file_filter(fq_path_filter, std::ios_base::out | std::ios_base::binary);
boost::iostreams::filtering_ostreambuf out;
out.push(boost::iostreams::gzip_compressor());
out.push(fq_file_filter);
std::string line;
while (std::getline(fastq_str, line))
{
std::vector<std::string> header;
boost::algorithm::split_regex(header, line, boost::regex("\\s+"));
std::string reads_id = header[0];
std::getline(fastq_str, line);
std::string reads_seq = line;
std::getline(fastq_str, line);
std::string reads_direction = line;
std::getline(fastq_str, line);
std::string reads_qual = line;
//std::cout << reads_id << "\n";
//std::cout << reads_direction << "\n";
//std::cout << reads_seq << "\n";
//std::cout << reads_qual << "\n";
double ca;
int qual_sum, start, len;
std::tie(ca, qual_sum, start, len) = bio::calculate_cutoff(reads_qual, seed_oa_cutoff, frag_oa_cutoff);
if (len >= min_len && ca >= frag_oa_cutoff)
{
std::string seq_filter = reads_seq.substr(len, start);
std::string qual_filter = reads_qual.substr(len, start);
std::ostringstream reads_filter;
reads_filter
<< reads_id << " length=" << len << "\n"
<< seq_filter << "\n"
<< reads_direction << "\n"
<< qual_filter << "\n";
try
{
boost::iostreams::copy(out, reads_filter);
}
catch (const boost::iostreams::gzip_error& e)
{
std::cerr << e.what() << "\n";
}
return 0;
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment