Compare revisions

s184058 · fima · fima · Felipe Delestro Matos · fima · Felipe Delestro Matos
--- a/docs/notebooks/Unet.ipynb
+++ b/docs/notebooks/Unet.ipynb
--- a/qim3d/css/gradio.css
+++ b/qim3d/css/gradio.css
@@ -51,7 +51,7 @@ footer {

 /* Input */
 input[type="number" i] {
-  width: 36px !important;
+  width: 64px !important;
 }

 input::-webkit-outer-spin-button,
@@ -235,6 +235,10 @@ div.svelte-1frtwj3 {
  box-shadow: none !important;
 }

+.matplotlib > img{
+  border-radius: 0.375rem !important;
+}
+
 /* Hide icons from outputs */
 .icon.svelte-1u5vjgs {
  visibility: hidden !important;

--- a/qim3d/gui/data_explorer.py
+++ b/qim3d/gui/data_explorer.py
@@ -2,12 +2,13 @@ import gradio as gr
 import numpy as np
 import os
 from qim3d.utils import internal_tools
-from qim3d.io import DataLoader
+from qim3d.io import load
 from qim3d.io.logger import log
 import tifffile
 import outputformat as ouf
 import datetime
 import matplotlib
+
 # matplotlib.use("Agg")
 import matplotlib.pyplot as plt

@@ -31,17 +32,29 @@ class Interface:

    def create_interface(self):
        with gr.Blocks(css=self.css_path) as gradio_interface:
-            gr.Markdown("# Data Explorer \n Quick insights from large datasets")
+            gr.Markdown("# Data Explorer")

            with gr.Row():
+                with gr.Column(scale=0.75):
                    data_path = gr.Textbox(
-                    value="/home/fima/Downloads/MarineGatropod_1.tif",
+                        value="gbar/zhome/15/b/200707/img_examples/shell_225x128x128.tif",
                        max_lines=1,
                        label="Path to the 3D volume",
                    )
+                with gr.Column(scale=0.25):
+                    dataset_name = gr.Textbox(
+                        label="Dataset name (in case of H5 files, for example)"
+                    )
+
+            with gr.Row(elem_classes="w-256"):
+                cmap = gr.Dropdown(
+                    value="viridis",
+                    choices=plt.colormaps(),
+                    label="Colormap",
+                    interactive=True,
+                )
            with gr.Row(elem_classes="w-128"):
                btn_run = gr.Button(value="Load & Run", elem_classes="btn btn-run")
-
            # Outputs
            with gr.Row():
                gr.Markdown("## Data overview")
@@ -50,6 +63,7 @@ class Interface:
                data_summary = gr.Text(
                    label=None, show_label=False, elem_classes="monospace-box"
                )
+            with gr.Row():
                with gr.Column():
                    zslice_plot = gr.Plot(label="Z slice", elem_classes="rounded")
                    zpos = gr.Slider(
@@ -85,24 +99,10 @@ class Interface:
            pipeline.verbose = self.verbose
            session = gr.State([])

-            with gr.Row():
-                gr.Markdown("## Local thickness")
-            with gr.Row():
-                gr.Plot()
-                gr.Plot()
-                gr.Plot()
-
-            with gr.Row():
-                gr.Markdown("## Structure tensor")
-            with gr.Row():
-                gr.Plot()
-                gr.Plot()
-                gr.Plot()
-
            ### Gradio objects lists

            # Inputs
-            inputs = [zpos, ypos, xpos]
+            inputs = [zpos, ypos, xpos, cmap, dataset_name]
            # Outputs
            outputs = [
                data_summary,
@@ -156,6 +156,8 @@ class Interface:
        session.zpos = args[0]
        session.ypos = args[1]
        session.xpos = args[2]
+        session.cmap = args[3]
+        session.dataset_name = args[4]

        return session

@@ -191,7 +193,6 @@ class Interface:
        else:
            quiet = True

-
        interface.launch(
            quiet=quiet,
            height=self.height,
@@ -208,6 +209,9 @@ class Session:
        self.zpos = 0.5
        self.ypos = 0.5
        self.xpos = 0.5
+        self.cmap = "viridis"
+        self.dataset_name = None
+        self.error_message = None

        # Volume info
        self.zsize = None
@@ -229,28 +233,35 @@ class Session:

    def get_data_info(self):
        # Open file
-        tif = tifffile.TiffFile(self.data_path)
-        first_slice = tif.pages[0]
+        try:
+            vol = load(
+                self.data_path, virtual_stack=True, dataset_name=self.dataset_name
+            )
+        except Exception as error_message:
+            self.error_message = error_message
+            return
+
+        first_slice = vol[0]

        # Get info
-        self.zsize = len(tif.pages)
+        self.zsize = len(vol)
        self.ysize, self.xsize = first_slice.shape
-        self.data_type = first_slice.dtype
-        self.axes = tif.series[0].axes
+        self.data_type = str(first_slice.dtype)
        self.last_modified = datetime.datetime.fromtimestamp(
            os.path.getmtime(self.data_path)
        ).strftime("%Y-%m-%d %H:%M")
        self.file_size = os.path.getsize(self.data_path)

-        # Close file
-        tif.close()

    def create_summary_dict(self):
        # Create dictionary
+        if self.error_message:
+            self.summary_dict = {"error_mesage": self.error_message}
+
+        else:
            self.summary_dict = {
                "Last modified": self.last_modified,
                "File size": internal_tools.sizeof(self.file_size),
-            "Axes": self.axes,
                "Z-size": str(self.zsize),
                "Y-size": str(self.ysize),
                "X-size": str(self.xsize),
@@ -261,6 +272,10 @@ class Session:
            }

    def summary_str(self):
+        if "error_mesage" in self.summary_dict:
+            error_box = ouf.boxtitle("ERROR", return_str=True)
+            return f"{error_box}\n{self.summary_dict['error_mesage']}"
+        else:
            display_dict = {k: v for k, v in self.summary_dict.items() if v is not None}
            return ouf.showdict(display_dict, return_str=True, title="Data summary")

@@ -295,7 +310,13 @@ class Pipeline:
        session.create_summary_dict()

        # Memory map data as a virtual stack
-        session.vol = DataLoader().load_tiff(session.data_path)
+
+        try:
+            session.vol = load(
+                session.data_path, virtual_stack=True, dataset_name=session.dataset_name
+            )
+        except:
+            return session

        if self.verbose:
            log.info(ouf.br(3, return_str=True) + session.summary_str())
@@ -324,6 +345,7 @@ class Pipeline:
    def create_slice_fig(self, axis, session):
        plt.close()
        vol = session.vol
+        plt.set_cmap(session.cmap)

        zslice = session.zslice_from_zpos()
        yslice = session.yslice_from_ypos()

--- a/qim3d/gui/local_thickness.py
+++ b/qim3d/gui/local_thickness.py
--- a/qim3d/io/load.py
+++ b/qim3d/io/load.py
@@ -5,6 +5,7 @@ import sys
 import difflib
 import tifffile
 import h5py
+import numpy as np
 from qim3d.io.logger import log
 from qim3d.utils.internal_tools import sizeof

@@ -14,7 +15,7 @@ class DataLoader:

    Args:
        virtual_stack (bool, optional): Specifies whether to use virtual stack
-        when loading TIFF files. Default is False.
+        when loading files. Default is False.

    Attributes:
        virtual_stack (bool): Specifies whether virtual stack is enabled.
@@ -22,6 +23,8 @@ class DataLoader:
    Methods:
        load_tiff(path): Load a TIFF file from the specified path.
        load_h5(path): Load an HDF5 file from the specified path.
+        load_tiff_stack(path): Load a stack of TIFF files from the specified path.
+        load_txrm(path): Load a TXRM/TXM/XRM file from the specified path
        load(path): Load a file or directory based on the given path.

    Raises:
@@ -36,11 +39,19 @@ class DataLoader:
        """Initializes a new instance of the DataLoader class.

        Args:
+            path (str): The path to the file or directory.
            virtual_stack (bool, optional): Specifies whether to use virtual
-            stack when loading TIFF files. Default is False.
+            stack when loading files. Default is False.
+            dataset_name (str, optional): Specifies the name of the dataset to be loaded
+            in case multiple dataset exist within the same file. Default is None (only for HDF5 files)
+            return_metadata (bool, optional): Specifies whether to return metadata or not. Default is False (only for HDF5 files)
+            contains (str, optional): Specifies a part of the name that is common for the TIFF file stack to be loaded (only for TIFF stacks)   
        """
        # Virtual stack is False by default
        self.virtual_stack = kwargs.get("virtual_stack", False)
+        self.dataset_name = kwargs.get("dataset_name", None)
+        self.return_metadata = kwargs.get("return_metadata", False)
+        self.contains = kwargs.get("contains", None)

    def load_tiff(self, path):
        """Load a TIFF file from the specified path.
@@ -69,12 +80,163 @@ class DataLoader:
        Args:
            path (str): The path to the HDF5 file.

+        Returns:
+            numpy.ndarray or tuple: The loaded volume as a NumPy array.
+                If 'return_metadata' is True, returns a tuple (volume, metadata).
+
+        Raises:
+            ValueError: If the specified dataset_name is not found or is invalid.
+            ValueError: If the dataset_name is not specified in case of multiple datasets in the HDF5 file
+            ValueError: If no datasets are found in the file.
+        """
+
+        # Read file
+        f = h5py.File(path, "r")
+        data_keys = _get_h5_dataset_keys(f)
+        datasets = []
+        metadata = {}
+        for key in data_keys:
+            if (
+                f[key].ndim > 1
+            ):  # Data is assumed to be a dataset if it is two dimensions or more
+                datasets.append(key)
+            if f[key].attrs.keys():
+                metadata[key] = {
+                    "value": f[key][()],
+                    **{attr_key: val for attr_key, val in f[key].attrs.items()},
+                }
+
+        # Only one dataset was found
+        if len(datasets) == 1:
+            if self.dataset_name:
+                log.info(
+                    "'dataset_name' argument is unused since there is only one dataset in the file"
+                )
+            name = datasets[0]
+            vol = f[name]
+
+        # Multiple datasets were found
+        elif len(datasets) > 1:
+            if self.dataset_name in datasets:  # Provided dataset name is valid
+                name = self.dataset_name
+                vol = f[name]
+            else:
+                if self.dataset_name:  # Dataset name is provided
+                    similar_names = difflib.get_close_matches(
+                        self.dataset_name, datasets
+                    )  # Find closest matching name if any
+                    if similar_names:
+                        suggestion = similar_names[0]  # Get the closest match
+                        raise ValueError(
+                            f"Invalid dataset name. Did you mean '{suggestion}'?"
+                        )
+                    else:
+                        raise ValueError(
+                            f"Invalid dataset name. Please choose between the following datasets: {datasets}"
+                        )
+                else:
+                    raise ValueError(
+                        f"Found multiple datasets: {datasets}. Please specify which of them that you want to load with the argument 'dataset_name'"
+                    )
+
+        # No datasets were found
+        else:
+            raise ValueError(f"Did not find any data in the file: {path}")
+
+        if not self.virtual_stack:
+            vol = vol[()]  # Load dataset into memory
+            f.close()
+        else:
+            log.info("Using virtual stack")
+
+        log.info("Loaded the following dataset: %s", name)
+        log.info("Loaded shape: %s", vol.shape)
+        log.info("Using %s of memory", sizeof(sys.getsizeof(vol)))
+
+        if self.return_metadata:
+            return vol, metadata
+        else:
+            return vol
+        
+    def load_tiff_stack(self, path):
+        """Load a stack of TIFF files from the specified path.
+
+        Args:
+            path (str): The path to the stack of TIFF files.
+
        Returns:
            numpy.ndarray: The loaded volume as a NumPy array.

+        Raises:
+            ValueError: If the 'contains' argument is not specified.
+            ValueError: If the 'contains' argument matches multiple TIFF stacks in the directory
+        """
+
+        if not self.contains:
+            raise ValueError(
+                "Please specify a part of the name that is common for the TIFF file stack with the argument 'contains'"
+            )
+
+        tiff_stack = [file for file in os.listdir(path) if (file.endswith('.tif') or file.endswith('.tiff')) and self.contains in file]
+        tiff_stack.sort()  # Ensure proper ordering
+
+        # Check that only one TIFF stack in the directory contains the provided string in its name
+        tiff_stack_only_letters = []
+        for filename in tiff_stack:
+            name = os.path.splitext(filename)[0] # Remove file extension
+            tiff_stack_only_letters.append(''.join(filter(str.isalpha, name))) # Remove everything else than letters from the name
+
+        # Get unique elements from tiff_stack_only_letters
+        unique_names = list(set(tiff_stack_only_letters))
+        if len(unique_names)>1:
+            raise ValueError(f"The provided part of the filename for the TIFF stack matches multiple TIFF stacks: {unique_names}.\nPlease provide a string that is unique for the TIFF stack that is intended to be loaded")
+    
+
+        vol = tifffile.imread([os.path.join(path, file) for file in tiff_stack],out='memmap')
+
+        if not self.virtual_stack:
+            vol = np.copy(vol) # Copy to memory
+        else:
+            log.info("Using virtual stack")
+
+        log.info("Found %s file(s)", len(tiff_stack))
+        log.info("Loaded shape: %s", vol.shape)
+        log.info("Using %s of memory", sizeof(sys.getsizeof(vol)))
+
+        return vol
+    
+    def load_txrm(self,path):
+        """Load a TXRM/XRM/TXM file from the specified path.
+
+        Args:
+            path (str): The path to the HDF5 file.
+
+        Returns:
+            numpy.ndarray or tuple: The loaded volume as a NumPy array.
+                If 'return_metadata' is True, returns a tuple (volume, metadata).
+
+        Raises:
+            ValueError: If the dxchange library is not installed
        """
-        with h5py.File(path, "r") as f:
-            vol = f["data"][:]
+
+        try:
+            import dxchange
+        except ImportError:
+            raise ValueError('The library dxchange is required to load TXRM files. Please find installation instructions at https://dxchange.readthedocs.io/en/latest/source/install.html')
+        
+        vol, metadata = dxchange.read_txrm(path)
+        vol = vol.squeeze() # In case of an XRM file, the third redundant dimension is removed
+        
+
+        log.info("Loaded shape: %s", vol.shape)
+        log.info("Using %s of memory", sizeof(sys.getsizeof(vol)))
+
+        if self.virtual_stack:
+            raise NotImplementedError("Using virtual stack for TXRM files is not implemented yet")
+
+        if self.return_metadata:
+            return vol, metadata
+        else:
            return vol
    
    def load(self, path):
@@ -102,12 +264,14 @@ class DataLoader:
                return self.load_tiff(path)
            elif path.endswith(".h5"):
                return self.load_h5(path)
+            elif path.endswith((".txrm",".txm",".xrm")):
+                return self.load_txrm(path)
            else:
                raise ValueError("Unsupported file format")

        # Load a directory
        elif os.path.isdir(path):
-            raise NotImplementedError("Loading from directory is not implemented yet")
+            return self.load_tiff_stack(path)

        # Fails
        else:
@@ -123,16 +287,26 @@ class DataLoader:
            else:
                raise ValueError("Invalid path")

+def _get_h5_dataset_keys(f):
+    keys = []
+    f.visit(
+        lambda key: keys.append(key) if isinstance(f[key], h5py.Dataset) else None
+    )
+    return keys
+

-def load(path, virtual_stack=False, **kwargs):
+def load(path, virtual_stack=False, dataset_name=None, return_metadata=False, contains=None, **kwargs):
    """
    Load data from the specified file or directory.

    Args:
        path (str): The path to the file or directory.
        virtual_stack (bool, optional): Specifies whether to use virtual
-        stack when loading TIFF files. Default is False.
-
+        stack when loading TIFF and HDF5 files. Default is False.
+        dataset_name (str, optional): Specifies the name of the dataset to be loaded
+        in case multiple dataset exist within the same file. Default is None (only for HDF5 files)
+        return_metadata (bool, optional): Specifies whether to return metadata or not. Default is False (only for HDF5 and TXRM files)
+        contains (str, optional): Specifies a part of the name that is common for the TIFF file stack to be loaded (only for TIFF stacks)
        **kwargs: Additional keyword arguments to be passed
        to the DataLoader constructor.

@@ -147,6 +321,12 @@ def load(path, virtual_stack=False, **kwargs):
    Example:
        data = load("image.tif", virtual_stack=True)
    """
-    loader = DataLoader(virtual_stack=virtual_stack, **kwargs)
+    loader = DataLoader(
+        virtual_stack=virtual_stack,
+        dataset_name=dataset_name,
+        return_metadata=return_metadata,
+        contains=contains,
+        **kwargs,
+    )

    return loader.load(path)
\ No newline at end of file
--- a/qim3d/viz/__init__.py
+++ b/qim3d/viz/__init__.py
-from .img import grid_pred, grid_overview
\ No newline at end of file
+from .img import grid_pred, grid_overview, slice_viz
\ No newline at end of file
--- a/qim3d/viz/img.py
+++ b/qim3d/viz/img.py
@@ -5,7 +5,7 @@ from matplotlib import cm
 import torch
 import numpy as np
 from qim3d.io.logger import log
-
+from qim3d.io.load import load

 def grid_overview(data, num_images=7, cmap_im="gray", cmap_segm="viridis", alpha=0.5):
    """Displays an overview grid of images, labels, and masks (if they exist).
@@ -192,4 +192,71 @@ def grid_pred(in_targ_preds, num_images=7, cmap_im="gray", cmap_segm="viridis",

    fig.show()

+def slice_viz(input, position = 'mid', cmap="viridis", axis=False, img_height=2, img_width=2):
+    """ Displays one or several slices from a 3d array.
+
+    Args:
+        input (str, numpy.ndarray): Path to the file or 3-dimensional array.
+        position (str, int, list, array, optional): One or several slicing levels.
+        cmap (str, optional): Specifies the color map for the image.
+        axis (bool, optional): Specifies whether the axes should be included.
+
+    Raises:
+        ValueError: If provided string for 'position' argument is not valid (not upper, middle or bottom).
+
+    Usage:
+        image_path = '/my_image_path/my_image.tif'
+        slice_viz(image_path)
+    """
+    
+    # Filepath input
+    if isinstance(input,str):
+        vol = load(input) # Function has its own ValueErrors
+        dim = vol.ndim
+        
+        if dim == 3:
+            pass
+        else:
+            raise ValueError(f"Given array is not a volume! Current dimension: {dim}")
+            
        
+    # Numpy array input
+    elif isinstance(input,np.ndarray):
+        dim = input.ndim
+        
+        if dim == 3:
+            vol = input
+        else:
+            raise ValueError(f"Given array is not a volume! Current dimension: {dim}")
+
+    # Position is a string
+    if isinstance(position,str):
+        if position.lower() in ['mid','middle']:
+            height = [int(vol.shape[-1]/2)]
+        elif position.lower() in ['top','upper', 'start']:
+            height = [0]
+        elif position.lower() in ['bot','bottom', 'end']:
+            height = [vol.shape[-1]-1]
+        else:
+            raise ValueError('Position not recognized. Choose an integer, list, array or "start","mid","end".')
+    
+    # Position is an integer
+    elif isinstance(position,int):
+        height = [position]
+
+    # Position is a list or array of integers
+    elif isinstance(position,(list,np.ndarray)):
+        height = position
+
+    num_images = len(height)
+    
+    fig = plt.figure(figsize=(img_width * num_images, img_height), constrained_layout = True)
+    axs = fig.subplots(nrows = 1, ncols = num_images)
+    
+    for col, ax in enumerate(np.atleast_1d(axs)):
+        ax.imshow(vol[:,:,height[col]],cmap = cmap)
+        ax.set_title(f'Slice {height[col]}', fontsize=8)
+        if not axis:
+            ax.axis('off')
+    
+    fig.show()
\ No newline at end of file
No results found