ipetrushin/ccf-run-with-steps.sh

## ccf-run-with-steps.sh
DIR=/hpcws/ipetrushin-arab

FILE=$1
BASENAME=${FILE%%.*}

for n in {10..55}
do
  MIN=$n
  MAX=$n
  echo $BASENAME with -minDR $MIN -maxDR $MAX

  mkdir "$DIR/$BASENAME.$MIN.$MAX"
  cd "$DIR/$BASENAME.$MIN.$MAX"
  ln -s ../$1 $1

  /home/ipetrushin/soft/ccf/CRISPRCasFinder.pl -cpuM 16 -vi 10000000 -minDR $MIN -maxDR $MAX -log -keep \
   -so /home/ipetrushin/miniconda3/lib/sel392v2.so -in $1 -out $BASENAME.out

  cp $BASENAME.out/result.json ../../$BASENAME.$MIN.$MAX.json
  cd "$DIR"
done

## get-crispr-arrays-stats-start-stop.py
#!/usr/bin/env python3

import os, json

for (root, dirs, files) in sorted(os.walk(".", topdown=True, onerror=None, followlinks=False), key=lambda dir: dir[0]):
    for f in files:
        if ('.json' in f):
            filename = root + '/' + f
            with open(filename, "r") as read_file:
                data = json.load(read_file); accession = data["Sequences"][0]["Id"]
                organism = data["Sequences"][0]["Description"].split(',')[0]
                crisprs = data["Sequences"][0]["Crisprs"] # we have one sequence only: [0]
                cas = data["Sequences"][0]["Cas"] # we have one sequence only: [0]
                if len(crisprs) > 0:
                    for c in crisprs:
                        num_spacers = (len(c["Regions"])-3) // 2
                        print("%s\t%d\t%d\t%d" % (c["Name"], c["Start"], c["End"], num_spacers))

## get-crispr-drs.py
#!/usr/bin/env python3

# -*- coding: utf-8 -*-
import os, json

for (root, dirs, files) in sorted(os.walk(".", topdown=True, onerror=None, followlinks=False), key=lambda dir: dir[0]):
    for f in files:
        if ('.json' in f):
            filename = root + '/' + f
            with open(filename, "r") as read_file:
                data = json.load(read_file)
                organism = data["Sequences"][0]["Description"].split(',')[0]
                crisprs = data["Sequences"][0]["Crisprs"] # we have one sequence only: [0]
                cas = data["Sequences"][0]["Cas"] # we have one sequence only: [0]
                if len(crisprs) > 0:
                    n = 1
                    for c in crisprs:
                        print(">%s;%s\n%s" % (c["Name"], organism, c["DR_Consensus"]))

## gistfile1.txt
#!/usr/bin/env python3

# -*- coding: utf-8 -*-
import os, json

for (root, dirs, files) in sorted(os.walk(".", topdown=True, onerror=None, followlinks=False), key=lambda dir: dir[0]):
    for f in files:
        if ('.json' in f):
            filename = root + '/' + f
            with open(filename, "r") as read_file:
                data = json.load(read_file)
                organism = data["Sequences"][0]["Description"].split(',')[0]
                crisprs = data["Sequences"][0]["Crisprs"] # we have one sequence only: [0]
                cas = data["Sequences"][0]["Cas"] # we have one sequence only: [0]
                if len(crisprs) > 0:
                    for c in crisprs:
                        for (number, r) in enumerate(c["Regions"]):
                            if r["Type"] == "Spacer":
                                print(">%s_%d;%d_%d\n%s" % (c["Name"], number//2, r["Start"], r["End"], r["Sequence"]))
	DIR=/hpcws/ipetrushin-arab

	FILE=$1
	BASENAME=${FILE%%.*}

	for n in {10..55}
	do
	MIN=$n
	MAX=$n
	echo $BASENAME with -minDR $MIN -maxDR $MAX

	mkdir "$DIR/$BASENAME.$MIN.$MAX"
	cd "$DIR/$BASENAME.$MIN.$MAX"
	ln -s ../$1 $1

	/home/ipetrushin/soft/ccf/CRISPRCasFinder.pl -cpuM 16 -vi 10000000 -minDR $MIN -maxDR $MAX -log -keep \
	-so /home/ipetrushin/miniconda3/lib/sel392v2.so -in $1 -out $BASENAME.out

	cp $BASENAME.out/result.json ../../$BASENAME.$MIN.$MAX.json
	cd "$DIR"
	done
	#!/usr/bin/env python3

	import os, json

	for (root, dirs, files) in sorted(os.walk(".", topdown=True, onerror=None, followlinks=False), key=lambda dir: dir[0]):
	for f in files:
	if ('.json' in f):
	filename = root + '/' + f
	with open(filename, "r") as read_file:
	data = json.load(read_file); accession = data["Sequences"][0]["Id"]
	organism = data["Sequences"][0]["Description"].split(',')[0]
	crisprs = data["Sequences"][0]["Crisprs"] # we have one sequence only: [0]
	cas = data["Sequences"][0]["Cas"] # we have one sequence only: [0]
	if len(crisprs) > 0:
	for c in crisprs:
	num_spacers = (len(c["Regions"])-3) // 2
	print("%s\t%d\t%d\t%d" % (c["Name"], c["Start"], c["End"], num_spacers))
	#!/usr/bin/env python3

	# -- coding: utf-8 --
	import os, json

	for (root, dirs, files) in sorted(os.walk(".", topdown=True, onerror=None, followlinks=False), key=lambda dir: dir[0]):
	for f in files:
	if ('.json' in f):
	filename = root + '/' + f
	with open(filename, "r") as read_file:
	data = json.load(read_file)
	organism = data["Sequences"][0]["Description"].split(',')[0]
	crisprs = data["Sequences"][0]["Crisprs"] # we have one sequence only: [0]
	cas = data["Sequences"][0]["Cas"] # we have one sequence only: [0]
	if len(crisprs) > 0:
	n = 1
	for c in crisprs:
	print(">%s;%s\n%s" % (c["Name"], organism, c["DR_Consensus"]))