From 49715728a382ecf5263fe8f079bfd7cb73fbb4fd Mon Sep 17 00:00:00 2001
From: HavardStridBuholdt <havard.strid.buholdt@gmail.com>
Date: Wed, 11 Mar 2026 16:40:11 +0100
Subject: [PATCH 1/3] Added cloudScreen_Zhao implementation

---
 ppcpy/cloudmask/cloudscreen.py | 340 +++++++++++++++++++++++++++++----
 1 file changed, 305 insertions(+), 35 deletions(-)

diff --git a/ppcpy/cloudmask/cloudscreen.py b/ppcpy/cloudmask/cloudscreen.py
index 35b0043..6f1f801 100644
--- a/ppcpy/cloudmask/cloudscreen.py
+++ b/ppcpy/cloudmask/cloudscreen.py
@@ -1,7 +1,9 @@
 
 
 import numpy as np
+from scipy.ndimage import label
 from ppcpy.misc.helper import uniform_filter
+from ppcpy.retrievals.ramanhelpers import savgol_filter
 
 # Helper functions
 def smooth_signal(signal:np.ndarray, window_len:int) -> np.ndarray:
@@ -27,9 +29,20 @@ def smooth_signal(signal:np.ndarray, window_len:int) -> np.ndarray:
     return uniform_filter(signal, window_len)
 
 
-def cloudscreen(data_cube):
-    """ """
+def cloudscreen(data_cube) -> np.ndarray:
+    """Preform cloud screening.
 
+    Parameters
+    ----------
+    data_cube : object
+        Main PicassoProc object.
+    
+    Returns
+    -------
+    falgCloudFree : ndarray
+        1 dimensional temporal boolean array. 0 = cloudy, 1 = cloud free. 
+    
+    """
     config_dict = data_cube.polly_config_dict
     print('Starting cloud screen')
     print('cloud screen mode', config_dict['cloudScreenMode'])
@@ -66,32 +79,37 @@ def cloudscreen(data_cube):
 
 
 
-def cloudScreen_MSG(height, RCS, slope_thres, search_region):
-    """
-    CLOUDSCREEN_MSG cloud screen with maximum signal gradient.
-
-
-    INPUTS:
-        height: array
-            Height in meters.
-        signal: array (time, height) !! this is transposed compared to the original implementation 
-            Photon count rate in MHz.
-        slope_thres: float
-            Threshold of the slope to determine whether there is strong backscatter signal. [MHz*m]
-        search_region: list or array (2 elements)
-            [baseHeight, topHeight] in meters.
-
-    OUTPUTS:
-        flagCloudFree: boolean array
-            Indicates whether the profile is cloud free.
-        layerStatus: matrix (height x time)
-            Layer status for each bin (0: unknown, 1: cloud, 2: aerosol).
-
-    HISTORY:
-        - 2021-05-18: First edition by Zhenping
-        - 2025-03-20: Translated into python
-    """
+def cloudScreen_MSG(height:np.ndarray, signal:np.ndarray, slope_thres:float, search_region:list) -> float:
+    """Cloud screen with maximum signal gradient.
+
+    Parameters
+    ----------
+    height : array
+        Height in meters.
+    signal : array (time, height) !! this is transposed compared to the original implementation 
+        Photon count rate in MHz.
+    slope_thres : float
+        Threshold of the slope to determine whether there is strong backscatter signal. [MHz*m]
+    search_region : list or array (2 elements)
+        [baseHeight, topHeight] in meters.
+
+    Returns
+    -------
+    flagCloudFree : boolean array
+        Indicates whether the profile is cloud free.
+    layerStatus: matrix (height x time)
+        Layer status for each bin (0: unknown, 1: cloud, 2: aerosol).
 
+    History
+    -------
+    - 2021-05-18: First edition by Zhenping.
+    - 2025-03-20: Translated into python.
+
+    Notes
+    -----
+    - layerStatus is currently not calculated in this module.
+
+    """
     if len(search_region) != 2 or search_region[1] <= height[0]:
         raise ValueError("Not a valid search_region.")
 
@@ -99,17 +117,17 @@ def cloudScreen_MSG(height, RCS, slope_thres, search_region):
         print(f"Warning: Base of search_region is lower than {height[0]}, setting it to {height[0]}")
         search_region[0] = height[0]
 
-    flagCloudFree = np.zeros(RCS.shape[0], dtype=bool)
-    layerStatus = np.zeros_like(RCS, dtype=int)
+    flagCloudFree = np.zeros(signal.shape[0], dtype=bool)
+    layerStatus = np.zeros_like(signal, dtype=int)
 
     # Find indices corresponding to search_region
     search_indx = np.array(((np.array(search_region) - height[0]) / (height[1] - height[0])) + 1, dtype=int)
 
-    for indx in range(RCS.shape[0]):
-        if np.isnan(RCS[indx, 0]):
+    for indx in range(signal.shape[0]):
+        if np.isnan(signal[indx, 0]):
             continue
 
-        slope = np.concatenate(([0], np.diff(smooth_signal(RCS[indx, :], 10)))) / (height[1] - height[0])
+        slope = np.concatenate(([0], np.diff(smooth_signal(signal[indx, :], 10)))) / (height[1] - height[0])
 
         if not np.any(slope[search_indx[0]:search_indx[1]] >= slope_thres):
             flagCloudFree[indx] = True
@@ -117,6 +135,258 @@ def cloudScreen_MSG(height, RCS, slope_thres, search_region):
     return flagCloudFree, layerStatus
 
 
-def cloudScreen_Zhao(height, RCS, slope_thres, search_region):
-    """ """
-    raise ValueError('not yet implemented')
\ No newline at end of file
+def cloudScreen_Zhao(time:np.ndarray, height:np.ndarray, signal:np.ndarray, bg:np.ndarray, search_region:list=[0, 10000],
+                     minDepth:float=100, heightFullOverlap:float=600, smoothWin:int=8, minSNR:float=1) -> tuple:
+    """Cloud layer detection based on Zhao's algorithm.
+
+    Parameters
+    ----------
+    time : ndarray
+        measurement time for each profile [datenum].
+    height : ndarray
+        height above ground [m].
+    signal : ndarray
+        Photon count rate signal [MHz].
+    bg : ndarray
+        Background of the Photon count rate signal [MHz].
+    search_region : list
+        bottom and top height for cloud detection [m].
+    minDepth : flaot
+        minimum layer depth [m]. Default is 100.
+    heightFullOverlap : float
+        minimum heght with full overlap [m]. Default is 600.
+    smoothWin : int
+        smoothing window width [bins]. Default is 8.
+    minSNR : flaot
+        minimum layer mean signal-noise-ratio. Default is 1.
+
+    Returns
+    -------
+    flagCloudFree : boolean array
+        Indicates whether the profile is cloud free.
+    layerStatus: matrix (height x time)
+        Layer status for each bin (0: unknown, 1: cloud, 2: aerosol).
+    
+    References
+    ----------
+    Zhao, C., Y. Wang, Q. Wang, Z. Li, Z. Wang, and D. Liu (2014), A new cloud and aerosol
+    layer detection method based on micropulse lidar measurements, Journal of Geophysical
+    Research: Atmospheres, 119(11), 6788-6802.
+
+    History
+    -------
+    - 2021-05-18: First edition by Zhengping.
+    - 2026-03-11: Translated into python.
+
+    Notes
+    -----
+    - Not yet tested or croschecked with the Matlab version!
+
+    """
+    if (signal.shape[0] != height.shape[0] or
+        signal.shape[1] != time.shape[0] or
+        signal.shape[1] != bg.shape[0]):
+        raise ValueError('Dimensions are not matched!')
+
+    flagCloudFree = np.ones((1, len(time)), dtype=bool)
+    layerStatus = np.zeros(signal.shape, dtype=int)
+
+    flagDetectBins = (height >= search_region[0] & height <= search_region[2])
+
+    for iTime in range(len(time)):
+        ## layer detection
+        layerInfo = VDE_cld(
+            signal=signal[flagDetectBins, iTime],
+            height=height[flagDetectBins] / 1e3,
+            BG=bg(iTime),
+            minLayerDepth=minDepth / 1e3,
+            minHeight=heightFullOverlap / 1e3,
+            smoothWin=smoothWin,
+            minSNR=minSNR
+        )
+
+        for iLayer in layerInfo:
+            ## gridding the layers
+            layerIndex = (height >= layerInfo[iLayer]['baseHeight'] * 1e3 and
+                          height <= layerInfo[iLayer]['topHeight'] * 1e3)
+            
+            if layerInfo[iLayer]['flagCloud']:
+                ## cloud layer
+                flagCloudFree[iTime] = False
+                layerStatus[layerIndex, iTime] = 1
+
+            else:
+                ## aerosol layer
+                layerStatus[layerIndex, iTime] = 2
+
+    return flagCloudFree, layerStatus
+
+
+def VDE_cld(signal:np.ndarray, height:np.ndarray, BG:float, minLayerDepth:float=0.2,
+            minHeight:float=0.4, smoothWin:int=3, minSNR:int=5) -> tuple:
+    """VDE_CLD cloud layer detection with VDE method. This method only required elstic signal.
+    
+    Parameters
+    ----------
+    signal : ndarray
+        raw signal without background [photon count].
+    height : ndarray
+        height above the ground [km].
+    BG : float
+        background signal [photon count].
+    minLayerDepth : float
+        minimun layer geometrical depth [km]. Default is 0.2.
+    minHeight : float
+        minimum height to start the searching with [km]. Default is 0.4.
+    smoothWin : int
+        smoothindow in bins. Default is 3.
+    minSNR : int
+        minimum layer SNR to filter the fake features. Default is 5.
+    
+    Returns
+    -------
+    layerInfo : dict
+        id : int
+            identity of the layer.
+        baseHeight : float
+            the layer base height [km].
+        topHeight : float
+            the layer top height [km].
+        peakHeight : float
+            the layer height with maximum backscatter signal [km].
+        layerDepth : int
+            geometrical depth of the layer [km].
+        flagCloud: bool
+            cloud flag.
+    PD : ndarray
+        SDP signal.
+    PN : ndarray
+        VDE signal
+    
+    References
+    ----------
+    Zhao, C., Y. Wang, Q. Wang, Z. Li, Z. Wang, and D. Liu (2014), A new cloud and aerosol
+    layer detection method based on micropulse lidar measurements, Journal of Geophysical
+    Research: Atmospheres, 119(11), 6788-6802.
+    
+    History
+    -------
+    - 2021-06-13: First edition by Zhenping
+    - 2026-03-11: Translated into pyhton
+
+    Notes
+    -----
+    - Not yet tested or croschecked with the Matlab version!
+    - Breaking errors in the signal shape are suspected, ei. transposed vs not compared to the matlab version. 
+
+    """
+    if np.floor(minLayerDepth / (height[1] - height[0])) < 3:
+        raise ValueError('minLayerDepth must be assured there are at least 3 bins in each layer')
+    
+    layerInfo = []
+
+    minIndex = np.where(height >= minHeight)[0][0]
+    
+    P = signal[minIndex:]
+    height = height[minIndex:]
+
+    # ----------------------------------------------------
+    # 1. Semi-Discretization Process (SDP)
+    # ----------------------------------------------------
+    noise_level = np.sqrt(BG + P)
+    Ps = smooth_signal(P, smoothWin)
+
+    ## bottom to top semi-discretization
+    PD1 = Ps.copy()
+    for i in range(1, len(PD1)):
+        if np.abs(PD1[i] - PD1[i - 1]) <= np.nanmax([noise_level[i]*3, np.sqrt(3)*3]):
+            PD1[i] = PD1[i - 1]
+
+    ### top to bottom semi-discretizion
+    PD2 = Ps.copy()
+    for i in range(len(PD2) - 2, -1, -1):
+        if np.abs(PD2[i] - PD2[i + 1]) <= np.nanmax([noise_level(i)*3, np.sqrt(3)*3]):
+            PD2[i] = PD2[i + 1]
+    
+    PD = np.nanmean(np.vstack([PD1, PD2]), axis=0)
+
+    # ----------------------------------------------------
+    # 2. Value Distribution Equalization (VDE) Process
+    # ----------------------------------------------------
+    IS = np.argsort(PD)
+    RS = PD[IS]
+
+    MA = RS[-1]
+    MI = RS[0]
+
+    PE = np.arange(len(RS)) / len(RS)
+    epsilon = 1e-6
+
+    for i in range(1, len(RS)):
+        if np.abs(RS[i] - RS[i - 1]) <= epsilon:
+            PE[i] = PE[i - 1]
+    
+    yi = PE * (MA - MI) + MI
+    
+    RIS = np.argsort(IS) ## ?????
+    PN = yi[RIS]
+
+    # ----------------------------------------------------
+    # 3. Layer detection
+    # ----------------------------------------------------
+    BZ = np.arange(len(PN), 0, -1)/len(PN) * (MA - MI) + MI
+    layerN = 0
+    L, nLayer = label(PN > BZ)
+
+    for iLayer in range(1, nLayer + 1):
+
+        baseIndex = np.where(L == iLayer)[0][0]
+        topIndex = np.where(L == iLayer)[0][-1]
+
+        layerDepth = height[topIndex] - height[baseIndex]
+        layerIndex = (L == iLayer)
+        layerSNR = np.nanmean(P[layerIndex]) / np.sqrt(np.nanmean(P[layerIndex] + BG))
+
+        if (layerDepth >= minLayerDepth) and (layerSNR >= minSNR):
+
+            layerN += 1
+            layerInfo.append({
+                'id': layerN,
+                'baseHeight': height[baseIndex],
+                'topHeight': height[topIndex],
+                'layerDepth': layerDepth,
+                'flagCloud': False
+            })
+
+    # ----------------------------------------------------
+    # 4. Layer classification
+    # ----------------------------------------------------
+    for iLayer in range(len(layerInfo)):
+        Ps_1 = savgol_filter(P, 10, 2)
+        mask = (height >= layerInfo[iLayer]['baseHeight']) & (height <= layerInfo[iLayer]['topHeight'])
+        sig = Ps_1[mask] * height[mask]**2
+        sig[sig <= 0] = np.nan
+
+        maxIndex = np.nanargmax(sig) # potential crash here if all of sig = NaN
+        maxSig = sig[maxIndex]
+        maxIndex = np.where(mask)[0][0] + maxIndex - 1 # layerIndex + max_Signal_Index
+        T = maxSig / sig[0]
+        D = (np.log(sig[-1] / maxSig)) / (layerInfo[iLayer]['topHeight'] - height[maxIndex])
+        layerHeight = np.nanmean([layerInfo[iLayer]['baseHeight'],
+                                  layerInfo[iLayer]['topHeight']])
+        layerInfo[iLayer]['peekHeight'] = height[maxIndex]
+
+        ## segmented threshold for the determination of layer status
+        if layerHeight <= 1.5:
+            if T > 6 or D < -15:
+                layerInfo[iLayer]['flagCloud'] = True
+        elif layerHeight > 1.5 and layerHeight <= 5:
+            if T > 4 or D < -9:
+                layerInfo[iLayer]['flagCloud'] = True
+        elif layerHeight > 5:
+            if T > 1.5 or D < -9:
+                layerInfo[iLayer]['flagCloud'] = True
+        else:
+            raise ValueError('Invalid height!')
+
+    return layerInfo, PD, PN
\ No newline at end of file

From 7df1e2b1bcf514bc4c535bc9d1483510706479bf Mon Sep 17 00:00:00 2001
From: HavardStridBuholdt <havard.strid.buholdt@gmail.com>
Date: Wed, 18 Mar 2026 15:07:06 +0100
Subject: [PATCH 2/3] Update to CloudScreening module

---
 ppcpy/cloudmask/cloudscreen.py      | 205 ++++++++++++++++------------
 ppcpy/cloudmask/profilesegment.py   |  24 ++--
 ppcpy/misc/helper.py                |  60 ++++++++
 ppcpy/preprocess/pollyPreprocess.py |   2 +-
 ppcpy/retrievals/ramanhelpers.py    |  61 +--------
 5 files changed, 195 insertions(+), 157 deletions(-)

diff --git a/ppcpy/cloudmask/cloudscreen.py b/ppcpy/cloudmask/cloudscreen.py
index 6f1f801..8000dc3 100644
--- a/ppcpy/cloudmask/cloudscreen.py
+++ b/ppcpy/cloudmask/cloudscreen.py
@@ -1,11 +1,8 @@
-
-
 import numpy as np
-from scipy.ndimage import label
-from ppcpy.misc.helper import uniform_filter
-from ppcpy.retrievals.ramanhelpers import savgol_filter
+from scipy.ndimage import label, uniform_filter1d
+from ppcpy.misc.helper import uniform_filter, savgol_filter
+
 
-# Helper functions
 def smooth_signal(signal:np.ndarray, window_len:int) -> np.ndarray:
     """Uniformly smooth the input signal
     
@@ -23,82 +20,114 @@ def smooth_signal(signal:np.ndarray, window_len:int) -> np.ndarray:
     
     History
     -------
-    - 2026-02-04 Changed from scipy.ndimage.uniform_filter1d to ppcpy.misc.helper.uniform_filter
-
+    - 2026-02-04: Changed from scipy.ndimage.uniform_filter1d to ppcpy.misc.helper.uniform_filter
+    - 2026-03-17: Changed back to scipy.ndimage.uniform_filter1d due to NaN-related issues in
+      Zhao's cloud screening algorithm.
     """
-    return uniform_filter(signal, window_len)
 
+    # return uniform_filter(signal, window_len)
+    return uniform_filter1d(signal, window_len, mode='nearest')
 
-def cloudscreen(data_cube) -> np.ndarray:
+
+def cloudscreen(data_cube, wv=532) -> np.ndarray:
     """Preform cloud screening.
 
     Parameters
     ----------
     data_cube : object
         Main PicassoProc object.
+    wv : int, optional
+        Wavelength of the channel to perfom cloud screening [nm]. Default is 532.
     
     Returns
     -------
     falgCloudFree : ndarray
         1 dimensional temporal boolean array. 0 = cloudy, 1 = cloud free. 
     
+    Notes
+    -----
+    - The cloud screening is done for both the far range and near range total channels of
+      wavelength wv if the channels exist.
     """
+
     config_dict = data_cube.polly_config_dict
     print('Starting cloud screen')
     print('cloud screen mode', config_dict['cloudScreenMode'])
-    print('slope_thres', config_dict['maxSigSlope4FilterCloud'])
-    height = data_cube.retrievals_highres['range']
 
-    wv = 532
-    RCS = np.squeeze(data_cube.retrievals_highres['RCS'][:, :, data_cube.gf(wv, 'total', 'FR')])
+    height = data_cube.retrievals_highres['range']
     bg = np.squeeze(data_cube.retrievals_highres['BG'][:, data_cube.gf(wv, 'total', 'FR')])
     hFullOL = np.array(config_dict['heightFullOverlap'])[data_cube.gf(wv, 'total', 'FR')][0]
 
+    # Cloud screen mode dependent configuration
     if config_dict['cloudScreenMode'] == 1:
         screenfunc = cloudScreen_MSG
+        sig = 'RCS'
+        kwargs = {
+            'slope_thres': config_dict['maxSigSlope4FilterCloud'],
+        }
     elif config_dict['cloudScreenMode'] == 2:
         screenfunc = cloudScreen_Zhao
+        sig = 'PCR_slice'
+        kwargs = {
+            'bg': bg,
+            'minSNR': 2, 
+            'heightFullOverlap': hFullOL
+        }
     else:
-        raise ValueError(f'cloudScreenMode not properly defined')
+        raise ValueError(f'cloudScreenMode {config_dict['cloudScreenMode']} not properly defined.')
+
+    signal = np.squeeze(data_cube.retrievals_highres[sig][:, :, data_cube.gf(wv, 'total', 'FR')])
 
     flagCloudFree, layerStatus = screenfunc(
-        height, RCS, config_dict['maxSigSlope4FilterCloud'], [hFullOL, 7000])
+        height=height, signal=signal,
+        search_region=[hFullOL, 7000],
+        **kwargs
+    )
 
     # and for near range if it exists
     if np.any(data_cube.gf(wv, 'total', 'NR')):
-        RCS = np.squeeze(data_cube.retrievals_highres['RCS'][:, :, data_cube.gf(wv, 'total', 'NR')])
+        signal = np.squeeze(data_cube.retrievals_highres[sig][:, :, data_cube.gf(wv, 'total', 'NR')])
         bg = np.squeeze(data_cube.retrievals_highres['BG'][:, data_cube.gf(wv, 'total', 'NR')])
         hFullOL = np.array(config_dict['heightFullOverlap'])[data_cube.gf(wv, 'total', 'NR')][0]
-        if config_dict['cloudScreenMode'] == 1:
-            flagCloudFree_NR, layerStatus_NR = screenfunc(
-                height, RCS, config_dict['maxSigSlope4FilterCloud'], [hFullOL, 2000])
+
+        if 'hFullOl' in kwargs and 'bg' in kwargs:
+            kwargs['heightFullOverlap'] = hFullOL
+            kwargs['bg'] = bg
+
+        flagCloudFree_NR, layerStatus_NR = screenfunc(
+            height=height, signal=signal,
+            search_region=[hFullOL, 2000],
+            **kwargs
+        )
 
         flagCloudFree = flagCloudFree & flagCloudFree_NR
 
     return flagCloudFree
 
 
-
-def cloudScreen_MSG(height:np.ndarray, signal:np.ndarray, slope_thres:float, search_region:list) -> float:
+def cloudScreen_MSG(height:np.ndarray, signal:np.ndarray, slope_thres:float, search_region:list, smooth_win:int=9) -> float:
     """Cloud screen with maximum signal gradient.
 
     Parameters
     ----------
-    height : array
+    height : ndarray
         Height in meters.
-    signal : array (time, height) !! this is transposed compared to the original implementation 
-        Photon count rate in MHz.
+    signal : ndarray (time, height)
+        Range corrected photon count rate signal [MHz].
     slope_thres : float
         Threshold of the slope to determine whether there is strong backscatter signal. [MHz*m]
     search_region : list or array (2 elements)
         [baseHeight, topHeight] in meters.
+    smooth_win : int, optional
+        Width of the applied smoothing filter [bins]. Default is 9.
 
     Returns
     -------
-    flagCloudFree : boolean array
-        Indicates whether the profile is cloud free.
-    layerStatus: matrix (height x time)
+    flagCloudFree : ndarray
+        A boolean array indicates whether the profile is cloud free.
+    layerStatus: ndarray (time x height)
         Layer status for each bin (0: unknown, 1: cloud, 2: aerosol).
+        
 
     History
     -------
@@ -107,9 +136,9 @@ def cloudScreen_MSG(height:np.ndarray, signal:np.ndarray, slope_thres:float, sea
 
     Notes
     -----
-    - layerStatus is currently not calculated in this module.
-
+    - This function does not yield any layerStatus information.
     """
+
     if len(search_region) != 2 or search_region[1] <= height[0]:
         raise ValueError("Not a valid search_region.")
 
@@ -118,53 +147,52 @@ def cloudScreen_MSG(height:np.ndarray, signal:np.ndarray, slope_thres:float, sea
         search_region[0] = height[0]
 
     flagCloudFree = np.zeros(signal.shape[0], dtype=bool)
-    layerStatus = np.zeros_like(signal, dtype=int)
+    layerStatus = None
 
     # Find indices corresponding to search_region
-    search_indx = np.array(((np.array(search_region) - height[0]) / (height[1] - height[0])) + 1, dtype=int)
+    search_index = np.searchsorted(height, np.array(search_region))
 
     for indx in range(signal.shape[0]):
         if np.isnan(signal[indx, 0]):
+            print(f"Skipping timestamp {indx}.")
             continue
 
-        slope = np.concatenate(([0], np.diff(smooth_signal(signal[indx, :], 10)))) / (height[1] - height[0])
+        slope = np.concatenate(([0], np.diff(smooth_signal(signal[indx, :], smooth_win)))) / (height[1] - height[0])
 
-        if not np.any(slope[search_indx[0]:search_indx[1]] >= slope_thres):
+        if not np.any(slope[search_index[0]:search_index[1] + 1] >= slope_thres):
             flagCloudFree[indx] = True
 
     return flagCloudFree, layerStatus
 
 
-def cloudScreen_Zhao(time:np.ndarray, height:np.ndarray, signal:np.ndarray, bg:np.ndarray, search_region:list=[0, 10000],
-                     minDepth:float=100, heightFullOverlap:float=600, smoothWin:int=8, minSNR:float=1) -> tuple:
+def cloudScreen_Zhao(height:np.ndarray, signal:np.ndarray, bg:np.ndarray, search_region:list=[0, 10000],
+                     minDepth:float=100, heightFullOverlap:float=600, smoothWin:int=7, minSNR:float=1) -> tuple:
     """Cloud layer detection based on Zhao's algorithm.
 
     Parameters
     ----------
-    time : ndarray
-        measurement time for each profile [datenum].
     height : ndarray
-        height above ground [m].
-    signal : ndarray
-        Photon count rate signal [MHz].
+        Height above ground [m].
+    signal : ndarray (time, height)
+        Background corrected photon count rate signal [MHz].
     bg : ndarray
-        Background of the Photon count rate signal [MHz].
+        Background of the photon count rate signal [MHz].
     search_region : list
-        bottom and top height for cloud detection [m].
-    minDepth : flaot
-        minimum layer depth [m]. Default is 100.
+        Bottom and top height for cloud detection [m].
+    minDepth : float
+        Minimum layer depth [m]. Default is 100.
     heightFullOverlap : float
-        minimum heght with full overlap [m]. Default is 600.
+        Minimum height with full overlap [m]. Default is 600.
     smoothWin : int
-        smoothing window width [bins]. Default is 8.
-    minSNR : flaot
-        minimum layer mean signal-noise-ratio. Default is 1.
+        Smoothing window width [bins]. Default is 7.
+    minSNR : float
+        Minimum layer mean signal-noise-ratio. Default is 1.
 
     Returns
     -------
-    flagCloudFree : boolean array
-        Indicates whether the profile is cloud free.
-    layerStatus: matrix (height x time)
+    flagCloudFree : ndarray
+        A boolean array indicates whether the profile is cloud free.
+    layerStatus: ndarray (time x height)
         Layer status for each bin (0: unknown, 1: cloud, 2: aerosol).
     
     References
@@ -177,75 +205,73 @@ def cloudScreen_Zhao(time:np.ndarray, height:np.ndarray, signal:np.ndarray, bg:n
     -------
     - 2021-05-18: First edition by Zhengping.
     - 2026-03-11: Translated into python.
-
-    Notes
-    -----
-    - Not yet tested or croschecked with the Matlab version!
-
     """
-    if (signal.shape[0] != height.shape[0] or
-        signal.shape[1] != time.shape[0] or
-        signal.shape[1] != bg.shape[0]):
-        raise ValueError('Dimensions are not matched!')
 
-    flagCloudFree = np.ones((1, len(time)), dtype=bool)
+    if (signal.shape[1] != height.shape[0] or
+        signal.shape[0] != signal.shape[0] or
+        signal.shape[0] != bg.shape[0]):
+        raise ValueError('Dimensions do not match!')
+
+    flagCloudFree = np.ones(signal.shape[0], dtype=bool)
     layerStatus = np.zeros(signal.shape, dtype=int)
 
-    flagDetectBins = (height >= search_region[0] & height <= search_region[2])
+    flagDetectBins = (height >= search_region[0]) & (height <= search_region[1])
 
-    for iTime in range(len(time)):
+    for iTime in range(signal.shape[0]):
         ## layer detection
-        layerInfo = VDE_cld(
-            signal=signal[flagDetectBins, iTime],
+        layerInfo, _, _ = VDE_cld(
+            signal=signal[iTime, flagDetectBins],
             height=height[flagDetectBins] / 1e3,
-            BG=bg(iTime),
+            BG=bg[iTime],
             minLayerDepth=minDepth / 1e3,
             minHeight=heightFullOverlap / 1e3,
             smoothWin=smoothWin,
             minSNR=minSNR
         )
 
-        for iLayer in layerInfo:
+        for iLayer in range(len(layerInfo)):
             ## gridding the layers
-            layerIndex = (height >= layerInfo[iLayer]['baseHeight'] * 1e3 and
-                          height <= layerInfo[iLayer]['topHeight'] * 1e3)
+            layerIndex = (height >= layerInfo[iLayer]['baseHeight'] * 1e3) & \
+                          (height <= layerInfo[iLayer]['topHeight'] * 1e3)
             
             if layerInfo[iLayer]['flagCloud']:
                 ## cloud layer
                 flagCloudFree[iTime] = False
-                layerStatus[layerIndex, iTime] = 1
+                layerStatus[iTime, layerIndex] = 1
 
             else:
                 ## aerosol layer
-                layerStatus[layerIndex, iTime] = 2
+                layerStatus[iTime, layerIndex] = 2
 
     return flagCloudFree, layerStatus
 
 
 def VDE_cld(signal:np.ndarray, height:np.ndarray, BG:float, minLayerDepth:float=0.2,
-            minHeight:float=0.4, smoothWin:int=3, minSNR:int=5) -> tuple:
-    """VDE_CLD cloud layer detection with VDE method. This method only required elstic signal.
+            minHeight:float=0.4, smoothWin:int=3, savgolSmoothWin:int=9, minSNR:int=5) -> tuple:
+    """Cloud layer detection with VDE method. This method only required elstic signal.
     
     Parameters
     ----------
-    signal : ndarray
+    signal : ndarray (height) 
         raw signal without background [photon count].
     height : ndarray
         height above the ground [km].
     BG : float
         background signal [photon count].
     minLayerDepth : float
-        minimun layer geometrical depth [km]. Default is 0.2.
+        minimum layer geometrical depth [km]. Default is 0.2.
     minHeight : float
         minimum height to start the searching with [km]. Default is 0.4.
     smoothWin : int
-        smoothindow in bins. Default is 3.
+        smoothing window bins. Default is 3.
+    savgolSmoothWin : int
+        smoothing window bins of the Savitzky-Golay filter. Default is 9.
     minSNR : int
         minimum layer SNR to filter the fake features. Default is 5.
     
     Returns
     -------
-    layerInfo : dict
+    layerInfo : list of dicts
         id : int
             identity of the layer.
         baseHeight : float
@@ -276,43 +302,46 @@ def VDE_cld(signal:np.ndarray, height:np.ndarray, BG:float, minLayerDepth:float=
 
     Notes
     -----
-    - Not yet tested or croschecked with the Matlab version!
-    - Breaking errors in the signal shape are suspected, ei. transposed vs not compared to the matlab version. 
-
+    - Some of the for-loops could be vectorised for enhanced performance. However, not in a pretty way.
+    - Also could consider using numba and its @njit decorator. That would also enhance the performance
+      but the code needs to be rewritten to fit numba's requirements.
+    - This function does not work with NaN values in the signal.
     """
+    
     if np.floor(minLayerDepth / (height[1] - height[0])) < 3:
         raise ValueError('minLayerDepth must be assured there are at least 3 bins in each layer')
     
     layerInfo = []
 
     minIndex = np.where(height >= minHeight)[0][0]
-    
+
     P = signal[minIndex:]
     height = height[minIndex:]
 
     # ----------------------------------------------------
     # 1. Semi-Discretization Process (SDP)
     # ----------------------------------------------------
-    noise_level = np.sqrt(BG + P)
+    noise_tresh = np.maximum(np.sqrt(BG + P)*3, np.sqrt(3)*3)
     Ps = smooth_signal(P, smoothWin)
 
     ## bottom to top semi-discretization
     PD1 = Ps.copy()
     for i in range(1, len(PD1)):
-        if np.abs(PD1[i] - PD1[i - 1]) <= np.nanmax([noise_level[i]*3, np.sqrt(3)*3]):
+        if np.abs(PD1[i] - PD1[i - 1]) <= noise_tresh[i]:
             PD1[i] = PD1[i - 1]
 
     ### top to bottom semi-discretizion
     PD2 = Ps.copy()
     for i in range(len(PD2) - 2, -1, -1):
-        if np.abs(PD2[i] - PD2[i + 1]) <= np.nanmax([noise_level(i)*3, np.sqrt(3)*3]):
+        if np.abs(PD2[i] - PD2[i + 1]) <= noise_tresh[i]:
             PD2[i] = PD2[i + 1]
-    
+
     PD = np.nanmean(np.vstack([PD1, PD2]), axis=0)
 
     # ----------------------------------------------------
     # 2. Value Distribution Equalization (VDE) Process
     # ----------------------------------------------------
+    PD = PD[~np.isnan(PD)]
     IS = np.argsort(PD)
     RS = PD[IS]
 
@@ -328,7 +357,7 @@ def VDE_cld(signal:np.ndarray, height:np.ndarray, BG:float, minLayerDepth:float=
     
     yi = PE * (MA - MI) + MI
     
-    RIS = np.argsort(IS) ## ?????
+    RIS = np.argsort(IS)
     PN = yi[RIS]
 
     # ----------------------------------------------------
@@ -361,8 +390,8 @@ def VDE_cld(signal:np.ndarray, height:np.ndarray, BG:float, minLayerDepth:float=
     # ----------------------------------------------------
     # 4. Layer classification
     # ----------------------------------------------------
+    Ps_1 = savgol_filter(P, savgolSmoothWin, 2)
     for iLayer in range(len(layerInfo)):
-        Ps_1 = savgol_filter(P, 10, 2)
         mask = (height >= layerInfo[iLayer]['baseHeight']) & (height <= layerInfo[iLayer]['topHeight'])
         sig = Ps_1[mask] * height[mask]**2
         sig[sig <= 0] = np.nan
diff --git a/ppcpy/cloudmask/profilesegment.py b/ppcpy/cloudmask/profilesegment.py
index de2485a..25b9729 100644
--- a/ppcpy/cloudmask/profilesegment.py
+++ b/ppcpy/cloudmask/profilesegment.py
@@ -2,15 +2,23 @@
 import numpy as np
 from scipy.ndimage import label
 
-def segment(data_cube):
-    """ """
+def segment(data_cube) -> np.ndarray:
+    """Perform cloud free profile segmentation
 
+    Parameters
+    ----------
+    data_cube : object
+        Main PicassoProc object
+    
+    Returns
+    -------
+    clFreeGrps : ndarray
+        Time index of the detected cloud free regions [start, end].
+    """
+    
     config_dict = data_cube.polly_config_dict
-    # the flag in the original matlab version
-    # flagValPrf = flagCloudFree & (~ data.fogMask) & (~ data.depCalMask) & (~ data.shutterOnMask);    
-
-    # TODO for some reason data_cube.retrievals_highres['depCalMask'] is of type masked_array
-    flagValPrf = data_cube.flagCloudFree & (~data_cube.retrievals_highres['depCalMask'])
+    flagValPrf = data_cube.flagCloudFree & (~data_cube.retrievals_highres['depCalMask']) \
+            & (~data_cube.retrievals_highres['shutterOnMask']) & (~data_cube.retrievals_highres['fogMask'])
 
     print('intNProfiles', config_dict['intNProfiles'], 'minIntNProfiles', config_dict['minIntNProfiles'])
     clFreeGrps = clFreeSeg(flagValPrf, config_dict['intNProfiles'], config_dict['minIntNProfiles'])
@@ -19,7 +27,7 @@ def segment(data_cube):
 
 
 
-def clFreeSeg(prfFlag, nIntPrf, minNIntPrf):
+def clFreeSeg(prfFlag:np.ndarray, nIntPrf:int, minNIntPrf:int) -> np.ndarray:
     """CLFREESEG splits continuous cloud-free profiles into small sections.
 
     INPUTS:
diff --git a/ppcpy/misc/helper.py b/ppcpy/misc/helper.py
index 6671e54..23e197d 100644
--- a/ppcpy/misc/helper.py
+++ b/ppcpy/misc/helper.py
@@ -10,6 +10,7 @@
 from pathlib import Path
 import numpy as np
 from scipy.sparse import diags
+from scipy.signal import savgol_coeffs
 
 
 def detect_path_type(fullpath):
@@ -713,6 +714,65 @@ def uniform_filter(x:np.ndarray, win:int, fill_val:float=np.nan) -> np.ndarray:
     raise ValueError("Invalid window configuration.")
 
 
+def savgol_filter(x:np.ndarray, window_length:int, polyorder:int=2, deriv:int=0,
+                  delta:float=1.0, fill_val:float=np.nan) -> np.ndarray:
+    """Savitzky-Golay filter
+
+    A Savitzky-Golay filter that works with NaN values.
+
+    Parameters
+    ----------
+    x : ndarray
+        Signal to be smoothed
+    window_length : int
+        Width of savgol filter
+    polyorder : int, optional
+        The order of the polynomial used to make the filter. 
+        must be less than 'window_length'. Default is 2.
+    deriv : int, optional
+        The order of the derivative to compute for the filter. Default is 0.
+    delta : float, optional
+        The spacing of which the filter will be applied. This is only used 
+        if 'deriv' > 0. Defualt is 1.0.
+    fill_val : float, optional
+        Value to be used for filling edges in order to recreate the input 
+        dimension. Default is np.nan. 
+    
+    Returns
+    -------
+    out : ndarray
+        Smoothed signal
+    
+    Notes
+    -----
+    This function is inspiered by scipy.signal's Savitzky-Golay filter [1].
+
+    References
+    ----------
+    [1] Virtanen, et al., Scipy 1.0: Fundamental Algorithms for Scientific
+    Computing in Python, Nature Methods, 2020, 17, 261-272, https://rdcu.be/b08Wh,
+    10.1038/s41592-019-0686-2
+    [2] A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of Data by
+    Simplified Least Squares Procedures. Analytical Chemistry, 1964, 36 (8),
+    pp 1627-1639.
+    [3] Jianwen Luo, Kui Ying, and Jing Bai. 2005. Savitzky-Golay smoothing and
+    differentiation filter for even number data. Signal Process.
+    85, 7 (July 2005), 1429-1434.
+
+    """
+    f = savgol_coeffs(window_length, polyorder, deriv=deriv, delta=delta)
+    x_smooth = np.convolve(x, f, mode='valid')
+    fill = np.full(int((window_length - 1)/2), fill_val)
+
+    # if window_length is even fill one more element at the start.
+    if window_length % 2 == 0:
+        out = np.hstack((np.append(fill, fill_val), x_smooth, fill))
+    else:
+        out = np.hstack((fill, x_smooth, fill))
+    
+    return out
+
+
 def mean_stable(x:np.ndarray, win:int, minBin:int=None, maxBin:int=None, minRelStd:float=None) -> tuple:
     """Calculate the mean value of x based on the least fluctuated 
     segment of x. The searching is based on the std inside each window of x.
diff --git a/ppcpy/preprocess/pollyPreprocess.py b/ppcpy/preprocess/pollyPreprocess.py
index f72472e..9125fc2 100644
--- a/ppcpy/preprocess/pollyPreprocess.py
+++ b/ppcpy/preprocess/pollyPreprocess.py
@@ -728,7 +728,7 @@ def pollyPreprocess(rawdata_dict:dict, collect_debug:bool=False, **param:dict):
     ## Range-corrected Signal calculation
     logging.info('... calculate range-corrected Signal')
     # mask = data_dict['lowSNRMask'].mask
-    mask = data_dict['lowSNRMask']
+    # mask = data_dict['lowSNRMask']
     # masked arry might be slow
     # RCS_masked = np.ma.masked_array(sigBGCor+bg, mask=mask)
     # data_dict['RCS'] = calculate_rcs(datasignal=preproSignal, data_dict=data_dict, mShots=mShots, hRes=hRes)
diff --git a/ppcpy/retrievals/ramanhelpers.py b/ppcpy/retrievals/ramanhelpers.py
index 340c4a3..70efd16 100644
--- a/ppcpy/retrievals/ramanhelpers.py
+++ b/ppcpy/retrievals/ramanhelpers.py
@@ -1,7 +1,6 @@
-
 import numpy as np
 from scipy.stats import norm, poisson
-from scipy.signal import savgol_coeffs
+
 
 def movingslope_variedWin(signal:np.ndarray, winWidth:int|np.ndarray) -> np.ndarray:
     """
@@ -198,61 +197,3 @@ def sigGenWithNoise(signal:np.ndarray, noise:np.ndarray=None, nProfile:int=1, me
         raise ValueError('A valid method should be provided.')
 
     return signalGen
-
-
-def savgol_filter(x:np.ndarray, window_length:int, polyorder:int=2, deriv:int=0, delta:float=1.0, fill_val:float=np.nan) -> np.ndarray:
-    """Savitzky-Golay filter
-
-    A Savitzky-Golay filter that works with NaN values.
-
-    Parameters
-    ----------
-    x : ndarray
-        Signal to be smoothed
-    window_length : int
-        Width of savgol filter
-    polyorder : int, optional
-        The order of the polynomial used to make the filter. 
-        must be less than 'window_length'. Default is 2.
-    deriv : int, optional
-        The order of the derivative to compute for the filter. Default is 0.
-    delta : float, optional
-        The spacing of which the filter will be applied. This is only used 
-        if 'deriv' > 0. Defualt is 1.0.
-    fill_val : float, optional
-        Value to be used for filling edges in order to recreate the input 
-        dimension. Default is np.nan. 
-    
-    Returns
-    -------
-    out : ndarray
-        Smoothed signal
-    
-    Notes
-    -----
-    This function is inspiered by scipy.signal's Savitzky-Golay filter [1].
-
-    References
-    ----------
-    [1] Virtanen, et al., Scipy 1.0: Fundamental Algorithms for Scientific
-    Computing in Python, Nature Methods, 2020, 17, 261-272, https://rdcu.be/b08Wh,
-    10.1038/s41592-019-0686-2
-    [2] A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of Data by
-    Simplified Least Squares Procedures. Analytical Chemistry, 1964, 36 (8),
-    pp 1627-1639.
-    [3] Jianwen Luo, Kui Ying, and Jing Bai. 2005. Savitzky-Golay smoothing and
-    differentiation filter for even number data. Signal Process.
-    85, 7 (July 2005), 1429-1434.
-
-    """
-    f = savgol_coeffs(window_length, polyorder, deriv=deriv, delta=delta)
-    x_smooth = np.convolve(x, f, mode='valid')
-    fill = np.full(int((window_length - 1)/2), fill_val)
-
-    # if window_length is even fill one more element at the start.
-    if window_length % 2 == 0:
-        out = np.hstack((np.append(fill, fill_val), x_smooth, fill))
-    else:
-        out = np.hstack((fill, x_smooth, fill))
-    
-    return out

From b6c42de2bf1efb11207895e55c05e99a584c7710 Mon Sep 17 00:00:00 2001
From: HavardStridBuholdt <havard.strid.buholdt@gmail.com>
Date: Wed, 18 Mar 2026 15:25:38 +0100
Subject: [PATCH 3/3] Update to the documentation of cloud screening module

---
 ppcpy/cloudmask/cloudscreen.py    | 36 +++++++++++++++++--------------
 ppcpy/cloudmask/profilesegment.py |  5 +++++
 ppcpy/misc/helper.py              |  8 +++----
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/ppcpy/cloudmask/cloudscreen.py b/ppcpy/cloudmask/cloudscreen.py
index 8000dc3..5a0fd89 100644
--- a/ppcpy/cloudmask/cloudscreen.py
+++ b/ppcpy/cloudmask/cloudscreen.py
@@ -18,8 +18,8 @@ def smooth_signal(signal:np.ndarray, window_len:int) -> np.ndarray:
     ndarray
         Smoothed signal
     
-    History
-    -------
+    **History**
+    
     - 2026-02-04: Changed from scipy.ndimage.uniform_filter1d to ppcpy.misc.helper.uniform_filter
     - 2026-03-17: Changed back to scipy.ndimage.uniform_filter1d due to NaN-related issues in
       Zhao's cloud screening algorithm.
@@ -48,6 +48,11 @@ def cloudscreen(data_cube, wv=532) -> np.ndarray:
     -----
     - The cloud screening is done for both the far range and near range total channels of
       wavelength wv if the channels exist.
+    
+    **History**
+    
+    - xxxx-xx-xx: First edition by ...
+    - 2026-03-18: Updated to include necessary configurations for cloudScreen_Zhao.
     """
 
     config_dict = data_cube.polly_config_dict
@@ -127,16 +132,15 @@ def cloudScreen_MSG(height:np.ndarray, signal:np.ndarray, slope_thres:float, sea
         A boolean array indicates whether the profile is cloud free.
     layerStatus: ndarray (time x height)
         Layer status for each bin (0: unknown, 1: cloud, 2: aerosol).
-        
-
-    History
-    -------
-    - 2021-05-18: First edition by Zhenping.
-    - 2025-03-20: Translated into python.
 
     Notes
     -----
     - This function does not yield any layerStatus information.
+
+    **History**
+    
+    - 2021-05-18: First edition by Zhenping.
+    - 2025-03-20: Translated into python.
     """
 
     if len(search_region) != 2 or search_region[1] <= height[0]:
@@ -201,8 +205,8 @@ def cloudScreen_Zhao(height:np.ndarray, signal:np.ndarray, bg:np.ndarray, search
     layer detection method based on micropulse lidar measurements, Journal of Geophysical
     Research: Atmospheres, 119(11), 6788-6802.
 
-    History
-    -------
+    **History**
+    
     - 2021-05-18: First edition by Zhengping.
     - 2026-03-11: Translated into python.
     """
@@ -294,11 +298,6 @@ def VDE_cld(signal:np.ndarray, height:np.ndarray, BG:float, minLayerDepth:float=
     Zhao, C., Y. Wang, Q. Wang, Z. Li, Z. Wang, and D. Liu (2014), A new cloud and aerosol
     layer detection method based on micropulse lidar measurements, Journal of Geophysical
     Research: Atmospheres, 119(11), 6788-6802.
-    
-    History
-    -------
-    - 2021-06-13: First edition by Zhenping
-    - 2026-03-11: Translated into pyhton
 
     Notes
     -----
@@ -306,8 +305,13 @@ def VDE_cld(signal:np.ndarray, height:np.ndarray, BG:float, minLayerDepth:float=
     - Also could consider using numba and its @njit decorator. That would also enhance the performance
       but the code needs to be rewritten to fit numba's requirements.
     - This function does not work with NaN values in the signal.
-    """
+
+    **History**
     
+    - 2021-06-13: First edition by Zhenping
+    - 2026-03-11: Translated into pyhton
+    """
+
     if np.floor(minLayerDepth / (height[1] - height[0])) < 3:
         raise ValueError('minLayerDepth must be assured there are at least 3 bins in each layer')
     
diff --git a/ppcpy/cloudmask/profilesegment.py b/ppcpy/cloudmask/profilesegment.py
index 25b9729..8a47d18 100644
--- a/ppcpy/cloudmask/profilesegment.py
+++ b/ppcpy/cloudmask/profilesegment.py
@@ -14,6 +14,11 @@ def segment(data_cube) -> np.ndarray:
     -------
     clFreeGrps : ndarray
         Time index of the detected cloud free regions [start, end].
+
+    **History**
+
+    - xxxx-xx-xx: First edition by ...
+    - 2026-03-18: Updated flagValPrf to include 'shutterOnMask' and 'fogMask'.
     """
     
     config_dict = data_cube.polly_config_dict
diff --git a/ppcpy/misc/helper.py b/ppcpy/misc/helper.py
index 23e197d..a0e0651 100644
--- a/ppcpy/misc/helper.py
+++ b/ppcpy/misc/helper.py
@@ -682,15 +682,15 @@ def uniform_filter(x:np.ndarray, win:int, fill_val:float=np.nan) -> np.ndarray:
     -------
     out : ndarray
         Smoothed signal.
-    
-    History
-    -------
-    - 2026-02-02: First edition by Buholdt
 
     Notes
     -----
     - Currently only support smoothing of 1D-arrays with a constant window size.
 
+    **History**
+    
+    - 2026-02-02: First edition by Buholdt
+
     """
     if isinstance(win, int):
         if len(x.shape) > 1: