Swarchal/parse_yokogawa.py

## parse_yokogawa.py
import os
from collections import namedtuple
from typing import NamedTuple, List
import pandas as pd

def parse_filepath(filepath: str) -> NamedTuple:
    """
    0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20
    T|0|0|0|1|F|0|0|6|L|0 |1 |A |0 |4 |Z |0 |1 |C |0 |2
    ------------------------------------------------------
    example:
        >>> filepath = "test_N22_T0001F006L01A04Z01C02.tif"
        >>> parse_filepath(filepath)
        ("well": "N22",
         "site": 6,
         "z": 1
         "channel": 2,
         "filepath: "test_N22_T0001F006L01A04Z01C02.tif")
    """
    final_path = filepath.split(os.path.sep)[-1]
    output = namedtuple("Yoko", ["well", "site", "z", "channel", "filepath"])
    *_, well, rest = final_path.split("_")
    rest = rest.replace(".tif", "")
    site = int(rest[6:9])
    z = int(rest[16:18])
    channel = int(rest[-2:])
    return output(well, site, z, channel, filepath)


def clean_paths(paths: List[str]) -> List[str]:
    """
    remove unwanted files, likely to break as it makes a lot of assumptions
    """
   output = []
   for p in paths:
       final_path = p.split(os.sep)[-1]
       if p.endswith(".tif") and len(final_path.split("_")) == 3 and "#" not in p:
           output.append(p)
   assert len(output) >= 1
   return output


def create_metadata_dataframe(data_dir: str) -> pd.DataFrame:
    """
    create a simple dataframe of the filepath and the metadata
    """
    assert os.path.isdir(data_dir), f"{data_dir} not found"
    all_paths = os.listdir(data_dir)
    assert len(all_paths) > 1, f"no files found in {data_dir}"
    full_paths = [os.path.join(data_dir, p) for p in all_paths]
    paths = clean_paths(full_paths)
    tuple_list = [parse_filepath(p) for p in paths]
    return pd.DataFrame(tuple_list)
	import os
	from collections import namedtuple
	from typing import NamedTuple, List
	import pandas as pd

	def parse_filepath(filepath: str) -> NamedTuple:
	"""
	0\|1\|2\|3\|4\|5\|6\|7\|8\|9\|10\|11\|12\|13\|14\|15\|16\|17\|18\|19\|20
	T\|0\|0\|0\|1\|F\|0\|0\|6\|L\|0 \|1 \|A \|0 \|4 \|Z \|0 \|1 \|C \|0 \|2
	------------------------------------------------------
	example:
	>>> filepath = "test_N22_T0001F006L01A04Z01C02.tif"
	>>> parse_filepath(filepath)
	("well": "N22",
	"site": 6,
	"z": 1
	"channel": 2,
	"filepath: "test_N22_T0001F006L01A04Z01C02.tif")
	"""
	final_path = filepath.split(os.path.sep)[-1]
	output = namedtuple("Yoko", ["well", "site", "z", "channel", "filepath"])
	*_, well, rest = final_path.split("_")
	rest = rest.replace(".tif", "")
	site = int(rest[6:9])
	z = int(rest[16:18])
	channel = int(rest[-2:])
	return output(well, site, z, channel, filepath)


	def clean_paths(paths: List[str]) -> List[str]:
	"""
	remove unwanted files, likely to break as it makes a lot of assumptions
	"""
	output = []
	for p in paths:
	final_path = p.split(os.sep)[-1]
	if p.endswith(".tif") and len(final_path.split("_")) == 3 and "#" not in p:
	output.append(p)
	assert len(output) >= 1
	return output


	def create_metadata_dataframe(data_dir: str) -> pd.DataFrame:
	"""
	create a simple dataframe of the filepath and the metadata
	"""
	assert os.path.isdir(data_dir), f"{data_dir} not found"
	all_paths = os.listdir(data_dir)
	assert len(all_paths) > 1, f"no files found in {data_dir}"
	full_paths = [os.path.join(data_dir, p) for p in all_paths]
	paths = clean_paths(full_paths)
	tuple_list = [parse_filepath(p) for p in paths]
	return pd.DataFrame(tuple_list)