Spaces:

xiazhi
/

LKCell

Running

App Files Files Community

LKCell / cell_segmentation /datasets /prepare_monuseg.py

qingke1

initial commit

aea73e2 8 months ago

raw

history blame

4.13 kB

	# -- coding: utf-8 --
	# Prepare MoNuSeg Dataset By converting and resorting files
	#
	# @ Fabian Hörst, fabian.hoerst@uk-essen.de
	# Institute for Artifical Intelligence in Medicine,
	# University Medicine Essen

	from PIL import Image
	import xml.etree.ElementTree as ET
	from skimage import draw
	import numpy as np
	from pathlib import Path
	from typing import Union
	import argparse


	def convert_monuseg(
	input_path: Union[Path, str], output_path: Union[Path, str]
	) -> None:
	"""Convert the MoNuSeg dataset to a new format (1000 -> 1024, tiff to png and xml to npy)

	Args:
	input_path (Union[Path, str]): Input dataset
	output_path (Union[Path, str]): Output path
	"""
	input_path = Path(input_path)
	output_path = Path(output_path)
	output_path.mkdir(exist_ok=True, parents=True)

	# testing and training
	parts = ["testing", "training"]
	for part in parts:
	print(f"Prepare: {part}")
	input_path_part = input_path / part
	output_path_part = output_path / part
	output_path_part.mkdir(exist_ok=True, parents=True)
	(output_path_part / "images").mkdir(exist_ok=True, parents=True)
	(output_path_part / "labels").mkdir(exist_ok=True, parents=True)

	# images
	images = [f for f in sorted((input_path_part / "images").glob("*.tif"))]
	for img_path in images:
	loaded_image = Image.open(img_path)
	resized = loaded_image.resize(
	(1024, 1024), resample=Image.Resampling.LANCZOS
	)
	new_img_path = output_path_part / "images" / f"{img_path.stem}.png"
	resized.save(new_img_path)
	# masks
	annotations = [f for f in sorted((input_path_part / "labels").glob("*.xml"))]
	for annot_path in annotations:
	binary_mask = np.transpose(np.zeros((1000, 1000)))

	# extract xml file
	tree = ET.parse(annot_path)
	root = tree.getroot()
	child = root[0]

	for x in child:
	r = x.tag
	if r == "Regions":
	element_idx = 1
	for y in x:
	y_tag = y.tag

	if y_tag == "Region":
	regions = []
	vertices = y[1]
	coords = np.zeros((len(vertices), 2))
	for i, vertex in enumerate(vertices):
	coords[i][0] = vertex.attrib["X"]
	coords[i][1] = vertex.attrib["Y"]
	regions.append(coords)
	vertex_row_coords = regions[0][:, 0]
	vertex_col_coords = regions[0][:, 1]
	fill_row_coords, fill_col_coords = draw.polygon(
	vertex_col_coords, vertex_row_coords, binary_mask.shape
	)
	binary_mask[fill_row_coords, fill_col_coords] = element_idx

	element_idx = element_idx + 1
	inst_image = Image.fromarray(binary_mask)
	resized_mask = np.array(
	inst_image.resize((1024, 1024), resample=Image.Resampling.NEAREST)
	)
	new_mask_path = output_path_part / "labels" / f"{annot_path.stem}.npy"
	np.save(new_mask_path, resized_mask)
	print("Finished")


	parser = argparse.ArgumentParser(
	formatter_class=argparse.ArgumentDefaultsHelpFormatter,
	description="Convert the MoNuSeg dataset",
	)
	parser.add_argument(
	"--input_path",
	type=str,
	help="Input path of the original MoNuSeg dataset",
	required=True,
	)
	parser.add_argument(
	"--output_path",
	type=str,
	help="Output path to store the processed MoNuSeg dataset",
	required=True,
	)

	if __name__ == "__main__":
	opt = parser.parse_args()
	configuration = vars(opt)

	input_path = Path(configuration["input_path"])
	output_path = Path(configuration["output_path"])

	convert_monuseg(input_path=input_path, output_path=output_path)