Skip to content

Instantly share code, notes, and snippets.

@tflori
Last active December 21, 2016 08:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tflori/1542cb898b07bbe0879aa9954d2c11db to your computer and use it in GitHub Desktop.
Save tflori/1542cb898b07bbe0879aa9954d2c11db to your computer and use it in GitHub Desktop.
fasta php
<?php
/* The Computer Language Benchmarks Game
http://benchmarksgame.alioth.debian.org/
contributed by Thomas Flori
*/
while (ob_get_level()) ob_end_clean();
ini_set('memory_limit', '512M');
define('LINE_LENGTH', 60);
define('NUM_THREADS', 16);
define('BUFFER_LINE_COUNT', 1024);
define('BUFFER_SIZE', LINE_LENGTH * BUFFER_LINE_COUNT);
define('IA', 3877);
define('IC', 29573);
define('IM', 139968);
$workers = [];
$n = 1000;
if ($_SERVER['argc'] > 1) {
$n = (int)$_SERVER['argv'][1];
}
echo ">ONE Homo sapiens alu\n";
$alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"
. "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"
. "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"
. "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"
. "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"
. "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"
. "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
$length = $n * 2;
$buffer = strlen($alu) * 60;
while ($length > 0) {
$c = $length > $buffer ? $buffer : $length;
echo chunk_split(str_pad('', $c, $alu), LINE_LENGTH, "\n"); // yes a one liner
$length -= $c;
}
echo ">TWO IUB ambiguity codes\n";
$iub = [
'a' => 0.27, 'c' => 0.12, 'g' => 0.12, 't' => 0.27, 'B' => 0.02, 'D' => 0.02, 'H' => 0.02, 'K' => 0.02,
'M' => 0.02, 'N' => 0.02, 'R' => 0.02, 'S' => 0.02, 'V' => 0.02, 'W' => 0.02, 'Y' => 0.02
];
accumulatePropabilities($iub);
writeDNA(function ($randoms) use ($iub) {
return getDNA($randoms, $iub);
}, $n * 3);
echo ">THREE Homo sapiens frequency\n";
$homosapiens = [
'a' => 0.3029549426680, 'c' => 0.1979883004921,
'g' => 0.1975473066391, 't' => 0.3015094502008
];
accumulatePropabilities($homosapiens);
writeDNA(function ($randoms) use ($homosapiens) {
return getDNA($randoms, $homosapiens);
}, $n * 5);
function writeDNA($callback, $length) {
static $seed = 42;
$threads = [];
$i = 0;
$j = 0;
// here we need a lot of memory how to avoid this?
$dna = shmop_open(ftok(__FILE__, chr(0)), 'c', 0644, $length);
while ($length > 0) {
$c = BUFFER_SIZE;
if ($c + $i > $length) {
$c = $length - $i;
}
if (count($threads) === NUM_THREADS) {
$pid = pcntl_wait($status);
array_splice($threads, array_search($pid, $threads), 1);
}
$randoms = generateRandoms($seed, $c);
if (!$pid = pcntl_fork()) {
$offset = BUFFER_SIZE * $j;
shmop_write($dna, $callback($randoms), $offset);
exit();
} else {
$threads[] = $pid;
}
$length -= $c;
$j++;
}
while (count($threads)) {
$pid = pcntl_wait($status);
array_splice($threads, array_search($pid, $threads), 1);
}
echo chunk_split(shmop_read($dna, 0, $length), LINE_LENGTH, "\n");
shmop_delete($dna);
}
function generateRandoms(&$seed, $count) {
$randoms = array_fill(0, $count, 0);
foreach ($randoms as &$r) {
$r = $seed = ($seed * IA + IC) % IM;
}
return $randoms;
}
function getDNA($randoms, $genelist) {
$dna = '';
foreach ($randoms as $r) {
foreach ($genelist as $nucleoid => $v) {
if ($r < $v) {
$dna .= $nucleoid;
break;
}
}
}
return $dna;
}
function accumulatePropabilities(&$genelist) {
$cumul = 0;
foreach($genelist as $k=>&$v) {
$cumul = $v = $v * IM + $cumul;
}
$genelist = array_map('intval', array_map('ceil', $genelist));
}
<?php
/* The Computer Language Benchmarks Game
http://benchmarksgame.alioth.debian.org/
contributed by Wing-Chung Leung
modified by Isaac Gouy
modified by anon
modified by Thomas Flori
*/
ob_implicit_flush(1);
ob_start(NULL, 4096);
define('IA', 3877);
define('IC', 29573);
define('IM', 139968);
$last = 42;
function gen_random(&$last, &$randoms) {
foreach($randoms as &$r) {
$r = $last = ($last * IA + IC) % IM;
}
}
/* Weighted selection from alphabet */
function makeCumulative(&$genelist) {
$cumul = 0;
foreach($genelist as $k=>&$v) {
$cumul = $v = $v * IM + $cumul;
}
$genelist = array_map('intval', array_map('ceil', $genelist));
}
/* Generate and write FASTA format */
function makeRandomFasta(&$genelist, $n) {
$width = 60;
$lines = (int) ($n / $width);
$pick = str_repeat('?', $width)."\n";
$randoms = array_fill(0, $width, 0.0);
global $last;
// full lines
for ($i = 0; $i < $lines; ++$i) {
gen_random($last, $randoms);
$j = 0;
foreach ($randoms as $r) {
foreach($genelist as $k=>$v) {
if ($r < $v) {
break;
}
}
$pick[$j++] = $k;
}
echo $pick;
}
// last, partial line
$w = $n % $width;
if ($w !== 0) {
$randoms = array_fill(0, $w, 0.0);
gen_random($last, $randoms);
$j = 0;
foreach ($randoms as $r) {
foreach($genelist as $k=>$v) {
if ($r < $v) {
break;
}
}
$pick[$j++] = $k;
}
$pick[$w] = "\n";
echo substr($pick, 0, $w+1);
}
}
function makeRepeatFasta($s, $n) {
$i = 0; $sLength = strlen($s); $lineLength = 60;
while ($n > 0) {
if ($n < $lineLength) $lineLength = $n;
if ($i + $lineLength < $sLength){
print(substr($s,$i,$lineLength)); print("\n");
$i += $lineLength;
} else {
print(substr($s,$i));
$i = $lineLength - ($sLength - $i);
print(substr($s,0,$i)); print("\n");
}
$n -= $lineLength;
}
}
/* Main -- define alphabets, make 3 fragments */
$iub=array(
'a' => 0.27,
'c' => 0.12,
'g' => 0.12,
't' => 0.27,
'B' => 0.02,
'D' => 0.02,
'H' => 0.02,
'K' => 0.02,
'M' => 0.02,
'N' => 0.02,
'R' => 0.02,
'S' => 0.02,
'V' => 0.02,
'W' => 0.02,
'Y' => 0.02
);
$homosapiens = array(
'a' => 0.3029549426680,
'c' => 0.1979883004921,
'g' => 0.1975473066391,
't' => 0.3015094502008
);
$alu =
'GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG' .
'GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA' .
'CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT' .
'ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA' .
'GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG' .
'AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC' .
'AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA';
$n = 1000;
if ($_SERVER['argc'] > 1) $n = $_SERVER['argv'][1];
makeCumulative($iub);
makeCumulative($homosapiens);
echo ">ONE Homo sapiens alu\n";
makeRepeatFasta($alu, $n*2);
echo ">TWO IUB ambiguity codes\n";
makeRandomFasta($iub, $n*3);
echo ">THREE Homo sapiens frequency\n";
makeRandomFasta($homosapiens, $n*5);
file_put_contents('php://stderr', "memory peak: " . memory_get_peak_usage(true) . "\n");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment