From 49715728a382ecf5263fe8f079bfd7cb73fbb4fd Mon Sep 17 00:00:00 2001 From: HavardStridBuholdt Date: Wed, 11 Mar 2026 16:40:11 +0100 Subject: [PATCH 1/3] Added cloudScreen_Zhao implementation --- ppcpy/cloudmask/cloudscreen.py | 340 +++++++++++++++++++++++++++++---- 1 file changed, 305 insertions(+), 35 deletions(-) diff --git a/ppcpy/cloudmask/cloudscreen.py b/ppcpy/cloudmask/cloudscreen.py index 35b0043..6f1f801 100644 --- a/ppcpy/cloudmask/cloudscreen.py +++ b/ppcpy/cloudmask/cloudscreen.py @@ -1,7 +1,9 @@ import numpy as np +from scipy.ndimage import label from ppcpy.misc.helper import uniform_filter +from ppcpy.retrievals.ramanhelpers import savgol_filter # Helper functions def smooth_signal(signal:np.ndarray, window_len:int) -> np.ndarray: @@ -27,9 +29,20 @@ def smooth_signal(signal:np.ndarray, window_len:int) -> np.ndarray: return uniform_filter(signal, window_len) -def cloudscreen(data_cube): - """ """ +def cloudscreen(data_cube) -> np.ndarray: + """Preform cloud screening. + Parameters + ---------- + data_cube : object + Main PicassoProc object. + + Returns + ------- + falgCloudFree : ndarray + 1 dimensional temporal boolean array. 0 = cloudy, 1 = cloud free. + + """ config_dict = data_cube.polly_config_dict print('Starting cloud screen') print('cloud screen mode', config_dict['cloudScreenMode']) @@ -66,32 +79,37 @@ def cloudscreen(data_cube): -def cloudScreen_MSG(height, RCS, slope_thres, search_region): - """ - CLOUDSCREEN_MSG cloud screen with maximum signal gradient. - - - INPUTS: - height: array - Height in meters. - signal: array (time, height) !! this is transposed compared to the original implementation - Photon count rate in MHz. - slope_thres: float - Threshold of the slope to determine whether there is strong backscatter signal. [MHz*m] - search_region: list or array (2 elements) - [baseHeight, topHeight] in meters. - - OUTPUTS: - flagCloudFree: boolean array - Indicates whether the profile is cloud free. - layerStatus: matrix (height x time) - Layer status for each bin (0: unknown, 1: cloud, 2: aerosol). - - HISTORY: - - 2021-05-18: First edition by Zhenping - - 2025-03-20: Translated into python - """ +def cloudScreen_MSG(height:np.ndarray, signal:np.ndarray, slope_thres:float, search_region:list) -> float: + """Cloud screen with maximum signal gradient. + + Parameters + ---------- + height : array + Height in meters. + signal : array (time, height) !! this is transposed compared to the original implementation + Photon count rate in MHz. + slope_thres : float + Threshold of the slope to determine whether there is strong backscatter signal. [MHz*m] + search_region : list or array (2 elements) + [baseHeight, topHeight] in meters. + + Returns + ------- + flagCloudFree : boolean array + Indicates whether the profile is cloud free. + layerStatus: matrix (height x time) + Layer status for each bin (0: unknown, 1: cloud, 2: aerosol). + History + ------- + - 2021-05-18: First edition by Zhenping. + - 2025-03-20: Translated into python. + + Notes + ----- + - layerStatus is currently not calculated in this module. + + """ if len(search_region) != 2 or search_region[1] <= height[0]: raise ValueError("Not a valid search_region.") @@ -99,17 +117,17 @@ def cloudScreen_MSG(height, RCS, slope_thres, search_region): print(f"Warning: Base of search_region is lower than {height[0]}, setting it to {height[0]}") search_region[0] = height[0] - flagCloudFree = np.zeros(RCS.shape[0], dtype=bool) - layerStatus = np.zeros_like(RCS, dtype=int) + flagCloudFree = np.zeros(signal.shape[0], dtype=bool) + layerStatus = np.zeros_like(signal, dtype=int) # Find indices corresponding to search_region search_indx = np.array(((np.array(search_region) - height[0]) / (height[1] - height[0])) + 1, dtype=int) - for indx in range(RCS.shape[0]): - if np.isnan(RCS[indx, 0]): + for indx in range(signal.shape[0]): + if np.isnan(signal[indx, 0]): continue - slope = np.concatenate(([0], np.diff(smooth_signal(RCS[indx, :], 10)))) / (height[1] - height[0]) + slope = np.concatenate(([0], np.diff(smooth_signal(signal[indx, :], 10)))) / (height[1] - height[0]) if not np.any(slope[search_indx[0]:search_indx[1]] >= slope_thres): flagCloudFree[indx] = True @@ -117,6 +135,258 @@ def cloudScreen_MSG(height, RCS, slope_thres, search_region): return flagCloudFree, layerStatus -def cloudScreen_Zhao(height, RCS, slope_thres, search_region): - """ """ - raise ValueError('not yet implemented') \ No newline at end of file +def cloudScreen_Zhao(time:np.ndarray, height:np.ndarray, signal:np.ndarray, bg:np.ndarray, search_region:list=[0, 10000], + minDepth:float=100, heightFullOverlap:float=600, smoothWin:int=8, minSNR:float=1) -> tuple: + """Cloud layer detection based on Zhao's algorithm. + + Parameters + ---------- + time : ndarray + measurement time for each profile [datenum]. + height : ndarray + height above ground [m]. + signal : ndarray + Photon count rate signal [MHz]. + bg : ndarray + Background of the Photon count rate signal [MHz]. + search_region : list + bottom and top height for cloud detection [m]. + minDepth : flaot + minimum layer depth [m]. Default is 100. + heightFullOverlap : float + minimum heght with full overlap [m]. Default is 600. + smoothWin : int + smoothing window width [bins]. Default is 8. + minSNR : flaot + minimum layer mean signal-noise-ratio. Default is 1. + + Returns + ------- + flagCloudFree : boolean array + Indicates whether the profile is cloud free. + layerStatus: matrix (height x time) + Layer status for each bin (0: unknown, 1: cloud, 2: aerosol). + + References + ---------- + Zhao, C., Y. Wang, Q. Wang, Z. Li, Z. Wang, and D. Liu (2014), A new cloud and aerosol + layer detection method based on micropulse lidar measurements, Journal of Geophysical + Research: Atmospheres, 119(11), 6788-6802. + + History + ------- + - 2021-05-18: First edition by Zhengping. + - 2026-03-11: Translated into python. + + Notes + ----- + - Not yet tested or croschecked with the Matlab version! + + """ + if (signal.shape[0] != height.shape[0] or + signal.shape[1] != time.shape[0] or + signal.shape[1] != bg.shape[0]): + raise ValueError('Dimensions are not matched!') + + flagCloudFree = np.ones((1, len(time)), dtype=bool) + layerStatus = np.zeros(signal.shape, dtype=int) + + flagDetectBins = (height >= search_region[0] & height <= search_region[2]) + + for iTime in range(len(time)): + ## layer detection + layerInfo = VDE_cld( + signal=signal[flagDetectBins, iTime], + height=height[flagDetectBins] / 1e3, + BG=bg(iTime), + minLayerDepth=minDepth / 1e3, + minHeight=heightFullOverlap / 1e3, + smoothWin=smoothWin, + minSNR=minSNR + ) + + for iLayer in layerInfo: + ## gridding the layers + layerIndex = (height >= layerInfo[iLayer]['baseHeight'] * 1e3 and + height <= layerInfo[iLayer]['topHeight'] * 1e3) + + if layerInfo[iLayer]['flagCloud']: + ## cloud layer + flagCloudFree[iTime] = False + layerStatus[layerIndex, iTime] = 1 + + else: + ## aerosol layer + layerStatus[layerIndex, iTime] = 2 + + return flagCloudFree, layerStatus + + +def VDE_cld(signal:np.ndarray, height:np.ndarray, BG:float, minLayerDepth:float=0.2, + minHeight:float=0.4, smoothWin:int=3, minSNR:int=5) -> tuple: + """VDE_CLD cloud layer detection with VDE method. This method only required elstic signal. + + Parameters + ---------- + signal : ndarray + raw signal without background [photon count]. + height : ndarray + height above the ground [km]. + BG : float + background signal [photon count]. + minLayerDepth : float + minimun layer geometrical depth [km]. Default is 0.2. + minHeight : float + minimum height to start the searching with [km]. Default is 0.4. + smoothWin : int + smoothindow in bins. Default is 3. + minSNR : int + minimum layer SNR to filter the fake features. Default is 5. + + Returns + ------- + layerInfo : dict + id : int + identity of the layer. + baseHeight : float + the layer base height [km]. + topHeight : float + the layer top height [km]. + peakHeight : float + the layer height with maximum backscatter signal [km]. + layerDepth : int + geometrical depth of the layer [km]. + flagCloud: bool + cloud flag. + PD : ndarray + SDP signal. + PN : ndarray + VDE signal + + References + ---------- + Zhao, C., Y. Wang, Q. Wang, Z. Li, Z. Wang, and D. Liu (2014), A new cloud and aerosol + layer detection method based on micropulse lidar measurements, Journal of Geophysical + Research: Atmospheres, 119(11), 6788-6802. + + History + ------- + - 2021-06-13: First edition by Zhenping + - 2026-03-11: Translated into pyhton + + Notes + ----- + - Not yet tested or croschecked with the Matlab version! + - Breaking errors in the signal shape are suspected, ei. transposed vs not compared to the matlab version. + + """ + if np.floor(minLayerDepth / (height[1] - height[0])) < 3: + raise ValueError('minLayerDepth must be assured there are at least 3 bins in each layer') + + layerInfo = [] + + minIndex = np.where(height >= minHeight)[0][0] + + P = signal[minIndex:] + height = height[minIndex:] + + # ---------------------------------------------------- + # 1. Semi-Discretization Process (SDP) + # ---------------------------------------------------- + noise_level = np.sqrt(BG + P) + Ps = smooth_signal(P, smoothWin) + + ## bottom to top semi-discretization + PD1 = Ps.copy() + for i in range(1, len(PD1)): + if np.abs(PD1[i] - PD1[i - 1]) <= np.nanmax([noise_level[i]*3, np.sqrt(3)*3]): + PD1[i] = PD1[i - 1] + + ### top to bottom semi-discretizion + PD2 = Ps.copy() + for i in range(len(PD2) - 2, -1, -1): + if np.abs(PD2[i] - PD2[i + 1]) <= np.nanmax([noise_level(i)*3, np.sqrt(3)*3]): + PD2[i] = PD2[i + 1] + + PD = np.nanmean(np.vstack([PD1, PD2]), axis=0) + + # ---------------------------------------------------- + # 2. Value Distribution Equalization (VDE) Process + # ---------------------------------------------------- + IS = np.argsort(PD) + RS = PD[IS] + + MA = RS[-1] + MI = RS[0] + + PE = np.arange(len(RS)) / len(RS) + epsilon = 1e-6 + + for i in range(1, len(RS)): + if np.abs(RS[i] - RS[i - 1]) <= epsilon: + PE[i] = PE[i - 1] + + yi = PE * (MA - MI) + MI + + RIS = np.argsort(IS) ## ????? + PN = yi[RIS] + + # ---------------------------------------------------- + # 3. Layer detection + # ---------------------------------------------------- + BZ = np.arange(len(PN), 0, -1)/len(PN) * (MA - MI) + MI + layerN = 0 + L, nLayer = label(PN > BZ) + + for iLayer in range(1, nLayer + 1): + + baseIndex = np.where(L == iLayer)[0][0] + topIndex = np.where(L == iLayer)[0][-1] + + layerDepth = height[topIndex] - height[baseIndex] + layerIndex = (L == iLayer) + layerSNR = np.nanmean(P[layerIndex]) / np.sqrt(np.nanmean(P[layerIndex] + BG)) + + if (layerDepth >= minLayerDepth) and (layerSNR >= minSNR): + + layerN += 1 + layerInfo.append({ + 'id': layerN, + 'baseHeight': height[baseIndex], + 'topHeight': height[topIndex], + 'layerDepth': layerDepth, + 'flagCloud': False + }) + + # ---------------------------------------------------- + # 4. Layer classification + # ---------------------------------------------------- + for iLayer in range(len(layerInfo)): + Ps_1 = savgol_filter(P, 10, 2) + mask = (height >= layerInfo[iLayer]['baseHeight']) & (height <= layerInfo[iLayer]['topHeight']) + sig = Ps_1[mask] * height[mask]**2 + sig[sig <= 0] = np.nan + + maxIndex = np.nanargmax(sig) # potential crash here if all of sig = NaN + maxSig = sig[maxIndex] + maxIndex = np.where(mask)[0][0] + maxIndex - 1 # layerIndex + max_Signal_Index + T = maxSig / sig[0] + D = (np.log(sig[-1] / maxSig)) / (layerInfo[iLayer]['topHeight'] - height[maxIndex]) + layerHeight = np.nanmean([layerInfo[iLayer]['baseHeight'], + layerInfo[iLayer]['topHeight']]) + layerInfo[iLayer]['peekHeight'] = height[maxIndex] + + ## segmented threshold for the determination of layer status + if layerHeight <= 1.5: + if T > 6 or D < -15: + layerInfo[iLayer]['flagCloud'] = True + elif layerHeight > 1.5 and layerHeight <= 5: + if T > 4 or D < -9: + layerInfo[iLayer]['flagCloud'] = True + elif layerHeight > 5: + if T > 1.5 or D < -9: + layerInfo[iLayer]['flagCloud'] = True + else: + raise ValueError('Invalid height!') + + return layerInfo, PD, PN \ No newline at end of file From 7df1e2b1bcf514bc4c535bc9d1483510706479bf Mon Sep 17 00:00:00 2001 From: HavardStridBuholdt Date: Wed, 18 Mar 2026 15:07:06 +0100 Subject: [PATCH 2/3] Update to CloudScreening module --- ppcpy/cloudmask/cloudscreen.py | 205 ++++++++++++++++------------ ppcpy/cloudmask/profilesegment.py | 24 ++-- ppcpy/misc/helper.py | 60 ++++++++ ppcpy/preprocess/pollyPreprocess.py | 2 +- ppcpy/retrievals/ramanhelpers.py | 61 +-------- 5 files changed, 195 insertions(+), 157 deletions(-) diff --git a/ppcpy/cloudmask/cloudscreen.py b/ppcpy/cloudmask/cloudscreen.py index 6f1f801..8000dc3 100644 --- a/ppcpy/cloudmask/cloudscreen.py +++ b/ppcpy/cloudmask/cloudscreen.py @@ -1,11 +1,8 @@ - - import numpy as np -from scipy.ndimage import label -from ppcpy.misc.helper import uniform_filter -from ppcpy.retrievals.ramanhelpers import savgol_filter +from scipy.ndimage import label, uniform_filter1d +from ppcpy.misc.helper import uniform_filter, savgol_filter + -# Helper functions def smooth_signal(signal:np.ndarray, window_len:int) -> np.ndarray: """Uniformly smooth the input signal @@ -23,82 +20,114 @@ def smooth_signal(signal:np.ndarray, window_len:int) -> np.ndarray: History ------- - - 2026-02-04 Changed from scipy.ndimage.uniform_filter1d to ppcpy.misc.helper.uniform_filter - + - 2026-02-04: Changed from scipy.ndimage.uniform_filter1d to ppcpy.misc.helper.uniform_filter + - 2026-03-17: Changed back to scipy.ndimage.uniform_filter1d due to NaN-related issues in + Zhao's cloud screening algorithm. """ - return uniform_filter(signal, window_len) + # return uniform_filter(signal, window_len) + return uniform_filter1d(signal, window_len, mode='nearest') -def cloudscreen(data_cube) -> np.ndarray: + +def cloudscreen(data_cube, wv=532) -> np.ndarray: """Preform cloud screening. Parameters ---------- data_cube : object Main PicassoProc object. + wv : int, optional + Wavelength of the channel to perfom cloud screening [nm]. Default is 532. Returns ------- falgCloudFree : ndarray 1 dimensional temporal boolean array. 0 = cloudy, 1 = cloud free. + Notes + ----- + - The cloud screening is done for both the far range and near range total channels of + wavelength wv if the channels exist. """ + config_dict = data_cube.polly_config_dict print('Starting cloud screen') print('cloud screen mode', config_dict['cloudScreenMode']) - print('slope_thres', config_dict['maxSigSlope4FilterCloud']) - height = data_cube.retrievals_highres['range'] - wv = 532 - RCS = np.squeeze(data_cube.retrievals_highres['RCS'][:, :, data_cube.gf(wv, 'total', 'FR')]) + height = data_cube.retrievals_highres['range'] bg = np.squeeze(data_cube.retrievals_highres['BG'][:, data_cube.gf(wv, 'total', 'FR')]) hFullOL = np.array(config_dict['heightFullOverlap'])[data_cube.gf(wv, 'total', 'FR')][0] + # Cloud screen mode dependent configuration if config_dict['cloudScreenMode'] == 1: screenfunc = cloudScreen_MSG + sig = 'RCS' + kwargs = { + 'slope_thres': config_dict['maxSigSlope4FilterCloud'], + } elif config_dict['cloudScreenMode'] == 2: screenfunc = cloudScreen_Zhao + sig = 'PCR_slice' + kwargs = { + 'bg': bg, + 'minSNR': 2, + 'heightFullOverlap': hFullOL + } else: - raise ValueError(f'cloudScreenMode not properly defined') + raise ValueError(f'cloudScreenMode {config_dict['cloudScreenMode']} not properly defined.') + + signal = np.squeeze(data_cube.retrievals_highres[sig][:, :, data_cube.gf(wv, 'total', 'FR')]) flagCloudFree, layerStatus = screenfunc( - height, RCS, config_dict['maxSigSlope4FilterCloud'], [hFullOL, 7000]) + height=height, signal=signal, + search_region=[hFullOL, 7000], + **kwargs + ) # and for near range if it exists if np.any(data_cube.gf(wv, 'total', 'NR')): - RCS = np.squeeze(data_cube.retrievals_highres['RCS'][:, :, data_cube.gf(wv, 'total', 'NR')]) + signal = np.squeeze(data_cube.retrievals_highres[sig][:, :, data_cube.gf(wv, 'total', 'NR')]) bg = np.squeeze(data_cube.retrievals_highres['BG'][:, data_cube.gf(wv, 'total', 'NR')]) hFullOL = np.array(config_dict['heightFullOverlap'])[data_cube.gf(wv, 'total', 'NR')][0] - if config_dict['cloudScreenMode'] == 1: - flagCloudFree_NR, layerStatus_NR = screenfunc( - height, RCS, config_dict['maxSigSlope4FilterCloud'], [hFullOL, 2000]) + + if 'hFullOl' in kwargs and 'bg' in kwargs: + kwargs['heightFullOverlap'] = hFullOL + kwargs['bg'] = bg + + flagCloudFree_NR, layerStatus_NR = screenfunc( + height=height, signal=signal, + search_region=[hFullOL, 2000], + **kwargs + ) flagCloudFree = flagCloudFree & flagCloudFree_NR return flagCloudFree - -def cloudScreen_MSG(height:np.ndarray, signal:np.ndarray, slope_thres:float, search_region:list) -> float: +def cloudScreen_MSG(height:np.ndarray, signal:np.ndarray, slope_thres:float, search_region:list, smooth_win:int=9) -> float: """Cloud screen with maximum signal gradient. Parameters ---------- - height : array + height : ndarray Height in meters. - signal : array (time, height) !! this is transposed compared to the original implementation - Photon count rate in MHz. + signal : ndarray (time, height) + Range corrected photon count rate signal [MHz]. slope_thres : float Threshold of the slope to determine whether there is strong backscatter signal. [MHz*m] search_region : list or array (2 elements) [baseHeight, topHeight] in meters. + smooth_win : int, optional + Width of the applied smoothing filter [bins]. Default is 9. Returns ------- - flagCloudFree : boolean array - Indicates whether the profile is cloud free. - layerStatus: matrix (height x time) + flagCloudFree : ndarray + A boolean array indicates whether the profile is cloud free. + layerStatus: ndarray (time x height) Layer status for each bin (0: unknown, 1: cloud, 2: aerosol). + History ------- @@ -107,9 +136,9 @@ def cloudScreen_MSG(height:np.ndarray, signal:np.ndarray, slope_thres:float, sea Notes ----- - - layerStatus is currently not calculated in this module. - + - This function does not yield any layerStatus information. """ + if len(search_region) != 2 or search_region[1] <= height[0]: raise ValueError("Not a valid search_region.") @@ -118,53 +147,52 @@ def cloudScreen_MSG(height:np.ndarray, signal:np.ndarray, slope_thres:float, sea search_region[0] = height[0] flagCloudFree = np.zeros(signal.shape[0], dtype=bool) - layerStatus = np.zeros_like(signal, dtype=int) + layerStatus = None # Find indices corresponding to search_region - search_indx = np.array(((np.array(search_region) - height[0]) / (height[1] - height[0])) + 1, dtype=int) + search_index = np.searchsorted(height, np.array(search_region)) for indx in range(signal.shape[0]): if np.isnan(signal[indx, 0]): + print(f"Skipping timestamp {indx}.") continue - slope = np.concatenate(([0], np.diff(smooth_signal(signal[indx, :], 10)))) / (height[1] - height[0]) + slope = np.concatenate(([0], np.diff(smooth_signal(signal[indx, :], smooth_win)))) / (height[1] - height[0]) - if not np.any(slope[search_indx[0]:search_indx[1]] >= slope_thres): + if not np.any(slope[search_index[0]:search_index[1] + 1] >= slope_thres): flagCloudFree[indx] = True return flagCloudFree, layerStatus -def cloudScreen_Zhao(time:np.ndarray, height:np.ndarray, signal:np.ndarray, bg:np.ndarray, search_region:list=[0, 10000], - minDepth:float=100, heightFullOverlap:float=600, smoothWin:int=8, minSNR:float=1) -> tuple: +def cloudScreen_Zhao(height:np.ndarray, signal:np.ndarray, bg:np.ndarray, search_region:list=[0, 10000], + minDepth:float=100, heightFullOverlap:float=600, smoothWin:int=7, minSNR:float=1) -> tuple: """Cloud layer detection based on Zhao's algorithm. Parameters ---------- - time : ndarray - measurement time for each profile [datenum]. height : ndarray - height above ground [m]. - signal : ndarray - Photon count rate signal [MHz]. + Height above ground [m]. + signal : ndarray (time, height) + Background corrected photon count rate signal [MHz]. bg : ndarray - Background of the Photon count rate signal [MHz]. + Background of the photon count rate signal [MHz]. search_region : list - bottom and top height for cloud detection [m]. - minDepth : flaot - minimum layer depth [m]. Default is 100. + Bottom and top height for cloud detection [m]. + minDepth : float + Minimum layer depth [m]. Default is 100. heightFullOverlap : float - minimum heght with full overlap [m]. Default is 600. + Minimum height with full overlap [m]. Default is 600. smoothWin : int - smoothing window width [bins]. Default is 8. - minSNR : flaot - minimum layer mean signal-noise-ratio. Default is 1. + Smoothing window width [bins]. Default is 7. + minSNR : float + Minimum layer mean signal-noise-ratio. Default is 1. Returns ------- - flagCloudFree : boolean array - Indicates whether the profile is cloud free. - layerStatus: matrix (height x time) + flagCloudFree : ndarray + A boolean array indicates whether the profile is cloud free. + layerStatus: ndarray (time x height) Layer status for each bin (0: unknown, 1: cloud, 2: aerosol). References @@ -177,75 +205,73 @@ def cloudScreen_Zhao(time:np.ndarray, height:np.ndarray, signal:np.ndarray, bg:n ------- - 2021-05-18: First edition by Zhengping. - 2026-03-11: Translated into python. - - Notes - ----- - - Not yet tested or croschecked with the Matlab version! - """ - if (signal.shape[0] != height.shape[0] or - signal.shape[1] != time.shape[0] or - signal.shape[1] != bg.shape[0]): - raise ValueError('Dimensions are not matched!') - flagCloudFree = np.ones((1, len(time)), dtype=bool) + if (signal.shape[1] != height.shape[0] or + signal.shape[0] != signal.shape[0] or + signal.shape[0] != bg.shape[0]): + raise ValueError('Dimensions do not match!') + + flagCloudFree = np.ones(signal.shape[0], dtype=bool) layerStatus = np.zeros(signal.shape, dtype=int) - flagDetectBins = (height >= search_region[0] & height <= search_region[2]) + flagDetectBins = (height >= search_region[0]) & (height <= search_region[1]) - for iTime in range(len(time)): + for iTime in range(signal.shape[0]): ## layer detection - layerInfo = VDE_cld( - signal=signal[flagDetectBins, iTime], + layerInfo, _, _ = VDE_cld( + signal=signal[iTime, flagDetectBins], height=height[flagDetectBins] / 1e3, - BG=bg(iTime), + BG=bg[iTime], minLayerDepth=minDepth / 1e3, minHeight=heightFullOverlap / 1e3, smoothWin=smoothWin, minSNR=minSNR ) - for iLayer in layerInfo: + for iLayer in range(len(layerInfo)): ## gridding the layers - layerIndex = (height >= layerInfo[iLayer]['baseHeight'] * 1e3 and - height <= layerInfo[iLayer]['topHeight'] * 1e3) + layerIndex = (height >= layerInfo[iLayer]['baseHeight'] * 1e3) & \ + (height <= layerInfo[iLayer]['topHeight'] * 1e3) if layerInfo[iLayer]['flagCloud']: ## cloud layer flagCloudFree[iTime] = False - layerStatus[layerIndex, iTime] = 1 + layerStatus[iTime, layerIndex] = 1 else: ## aerosol layer - layerStatus[layerIndex, iTime] = 2 + layerStatus[iTime, layerIndex] = 2 return flagCloudFree, layerStatus def VDE_cld(signal:np.ndarray, height:np.ndarray, BG:float, minLayerDepth:float=0.2, - minHeight:float=0.4, smoothWin:int=3, minSNR:int=5) -> tuple: - """VDE_CLD cloud layer detection with VDE method. This method only required elstic signal. + minHeight:float=0.4, smoothWin:int=3, savgolSmoothWin:int=9, minSNR:int=5) -> tuple: + """Cloud layer detection with VDE method. This method only required elstic signal. Parameters ---------- - signal : ndarray + signal : ndarray (height) raw signal without background [photon count]. height : ndarray height above the ground [km]. BG : float background signal [photon count]. minLayerDepth : float - minimun layer geometrical depth [km]. Default is 0.2. + minimum layer geometrical depth [km]. Default is 0.2. minHeight : float minimum height to start the searching with [km]. Default is 0.4. smoothWin : int - smoothindow in bins. Default is 3. + smoothing window bins. Default is 3. + savgolSmoothWin : int + smoothing window bins of the Savitzky-Golay filter. Default is 9. minSNR : int minimum layer SNR to filter the fake features. Default is 5. Returns ------- - layerInfo : dict + layerInfo : list of dicts id : int identity of the layer. baseHeight : float @@ -276,43 +302,46 @@ def VDE_cld(signal:np.ndarray, height:np.ndarray, BG:float, minLayerDepth:float= Notes ----- - - Not yet tested or croschecked with the Matlab version! - - Breaking errors in the signal shape are suspected, ei. transposed vs not compared to the matlab version. - + - Some of the for-loops could be vectorised for enhanced performance. However, not in a pretty way. + - Also could consider using numba and its @njit decorator. That would also enhance the performance + but the code needs to be rewritten to fit numba's requirements. + - This function does not work with NaN values in the signal. """ + if np.floor(minLayerDepth / (height[1] - height[0])) < 3: raise ValueError('minLayerDepth must be assured there are at least 3 bins in each layer') layerInfo = [] minIndex = np.where(height >= minHeight)[0][0] - + P = signal[minIndex:] height = height[minIndex:] # ---------------------------------------------------- # 1. Semi-Discretization Process (SDP) # ---------------------------------------------------- - noise_level = np.sqrt(BG + P) + noise_tresh = np.maximum(np.sqrt(BG + P)*3, np.sqrt(3)*3) Ps = smooth_signal(P, smoothWin) ## bottom to top semi-discretization PD1 = Ps.copy() for i in range(1, len(PD1)): - if np.abs(PD1[i] - PD1[i - 1]) <= np.nanmax([noise_level[i]*3, np.sqrt(3)*3]): + if np.abs(PD1[i] - PD1[i - 1]) <= noise_tresh[i]: PD1[i] = PD1[i - 1] ### top to bottom semi-discretizion PD2 = Ps.copy() for i in range(len(PD2) - 2, -1, -1): - if np.abs(PD2[i] - PD2[i + 1]) <= np.nanmax([noise_level(i)*3, np.sqrt(3)*3]): + if np.abs(PD2[i] - PD2[i + 1]) <= noise_tresh[i]: PD2[i] = PD2[i + 1] - + PD = np.nanmean(np.vstack([PD1, PD2]), axis=0) # ---------------------------------------------------- # 2. Value Distribution Equalization (VDE) Process # ---------------------------------------------------- + PD = PD[~np.isnan(PD)] IS = np.argsort(PD) RS = PD[IS] @@ -328,7 +357,7 @@ def VDE_cld(signal:np.ndarray, height:np.ndarray, BG:float, minLayerDepth:float= yi = PE * (MA - MI) + MI - RIS = np.argsort(IS) ## ????? + RIS = np.argsort(IS) PN = yi[RIS] # ---------------------------------------------------- @@ -361,8 +390,8 @@ def VDE_cld(signal:np.ndarray, height:np.ndarray, BG:float, minLayerDepth:float= # ---------------------------------------------------- # 4. Layer classification # ---------------------------------------------------- + Ps_1 = savgol_filter(P, savgolSmoothWin, 2) for iLayer in range(len(layerInfo)): - Ps_1 = savgol_filter(P, 10, 2) mask = (height >= layerInfo[iLayer]['baseHeight']) & (height <= layerInfo[iLayer]['topHeight']) sig = Ps_1[mask] * height[mask]**2 sig[sig <= 0] = np.nan diff --git a/ppcpy/cloudmask/profilesegment.py b/ppcpy/cloudmask/profilesegment.py index de2485a..25b9729 100644 --- a/ppcpy/cloudmask/profilesegment.py +++ b/ppcpy/cloudmask/profilesegment.py @@ -2,15 +2,23 @@ import numpy as np from scipy.ndimage import label -def segment(data_cube): - """ """ +def segment(data_cube) -> np.ndarray: + """Perform cloud free profile segmentation + Parameters + ---------- + data_cube : object + Main PicassoProc object + + Returns + ------- + clFreeGrps : ndarray + Time index of the detected cloud free regions [start, end]. + """ + config_dict = data_cube.polly_config_dict - # the flag in the original matlab version - # flagValPrf = flagCloudFree & (~ data.fogMask) & (~ data.depCalMask) & (~ data.shutterOnMask); - - # TODO for some reason data_cube.retrievals_highres['depCalMask'] is of type masked_array - flagValPrf = data_cube.flagCloudFree & (~data_cube.retrievals_highres['depCalMask']) + flagValPrf = data_cube.flagCloudFree & (~data_cube.retrievals_highres['depCalMask']) \ + & (~data_cube.retrievals_highres['shutterOnMask']) & (~data_cube.retrievals_highres['fogMask']) print('intNProfiles', config_dict['intNProfiles'], 'minIntNProfiles', config_dict['minIntNProfiles']) clFreeGrps = clFreeSeg(flagValPrf, config_dict['intNProfiles'], config_dict['minIntNProfiles']) @@ -19,7 +27,7 @@ def segment(data_cube): -def clFreeSeg(prfFlag, nIntPrf, minNIntPrf): +def clFreeSeg(prfFlag:np.ndarray, nIntPrf:int, minNIntPrf:int) -> np.ndarray: """CLFREESEG splits continuous cloud-free profiles into small sections. INPUTS: diff --git a/ppcpy/misc/helper.py b/ppcpy/misc/helper.py index 6671e54..23e197d 100644 --- a/ppcpy/misc/helper.py +++ b/ppcpy/misc/helper.py @@ -10,6 +10,7 @@ from pathlib import Path import numpy as np from scipy.sparse import diags +from scipy.signal import savgol_coeffs def detect_path_type(fullpath): @@ -713,6 +714,65 @@ def uniform_filter(x:np.ndarray, win:int, fill_val:float=np.nan) -> np.ndarray: raise ValueError("Invalid window configuration.") +def savgol_filter(x:np.ndarray, window_length:int, polyorder:int=2, deriv:int=0, + delta:float=1.0, fill_val:float=np.nan) -> np.ndarray: + """Savitzky-Golay filter + + A Savitzky-Golay filter that works with NaN values. + + Parameters + ---------- + x : ndarray + Signal to be smoothed + window_length : int + Width of savgol filter + polyorder : int, optional + The order of the polynomial used to make the filter. + must be less than 'window_length'. Default is 2. + deriv : int, optional + The order of the derivative to compute for the filter. Default is 0. + delta : float, optional + The spacing of which the filter will be applied. This is only used + if 'deriv' > 0. Defualt is 1.0. + fill_val : float, optional + Value to be used for filling edges in order to recreate the input + dimension. Default is np.nan. + + Returns + ------- + out : ndarray + Smoothed signal + + Notes + ----- + This function is inspiered by scipy.signal's Savitzky-Golay filter [1]. + + References + ---------- + [1] Virtanen, et al., Scipy 1.0: Fundamental Algorithms for Scientific + Computing in Python, Nature Methods, 2020, 17, 261-272, https://rdcu.be/b08Wh, + 10.1038/s41592-019-0686-2 + [2] A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of Data by + Simplified Least Squares Procedures. Analytical Chemistry, 1964, 36 (8), + pp 1627-1639. + [3] Jianwen Luo, Kui Ying, and Jing Bai. 2005. Savitzky-Golay smoothing and + differentiation filter for even number data. Signal Process. + 85, 7 (July 2005), 1429-1434. + + """ + f = savgol_coeffs(window_length, polyorder, deriv=deriv, delta=delta) + x_smooth = np.convolve(x, f, mode='valid') + fill = np.full(int((window_length - 1)/2), fill_val) + + # if window_length is even fill one more element at the start. + if window_length % 2 == 0: + out = np.hstack((np.append(fill, fill_val), x_smooth, fill)) + else: + out = np.hstack((fill, x_smooth, fill)) + + return out + + def mean_stable(x:np.ndarray, win:int, minBin:int=None, maxBin:int=None, minRelStd:float=None) -> tuple: """Calculate the mean value of x based on the least fluctuated segment of x. The searching is based on the std inside each window of x. diff --git a/ppcpy/preprocess/pollyPreprocess.py b/ppcpy/preprocess/pollyPreprocess.py index f72472e..9125fc2 100644 --- a/ppcpy/preprocess/pollyPreprocess.py +++ b/ppcpy/preprocess/pollyPreprocess.py @@ -728,7 +728,7 @@ def pollyPreprocess(rawdata_dict:dict, collect_debug:bool=False, **param:dict): ## Range-corrected Signal calculation logging.info('... calculate range-corrected Signal') # mask = data_dict['lowSNRMask'].mask - mask = data_dict['lowSNRMask'] + # mask = data_dict['lowSNRMask'] # masked arry might be slow # RCS_masked = np.ma.masked_array(sigBGCor+bg, mask=mask) # data_dict['RCS'] = calculate_rcs(datasignal=preproSignal, data_dict=data_dict, mShots=mShots, hRes=hRes) diff --git a/ppcpy/retrievals/ramanhelpers.py b/ppcpy/retrievals/ramanhelpers.py index 340c4a3..70efd16 100644 --- a/ppcpy/retrievals/ramanhelpers.py +++ b/ppcpy/retrievals/ramanhelpers.py @@ -1,7 +1,6 @@ - import numpy as np from scipy.stats import norm, poisson -from scipy.signal import savgol_coeffs + def movingslope_variedWin(signal:np.ndarray, winWidth:int|np.ndarray) -> np.ndarray: """ @@ -198,61 +197,3 @@ def sigGenWithNoise(signal:np.ndarray, noise:np.ndarray=None, nProfile:int=1, me raise ValueError('A valid method should be provided.') return signalGen - - -def savgol_filter(x:np.ndarray, window_length:int, polyorder:int=2, deriv:int=0, delta:float=1.0, fill_val:float=np.nan) -> np.ndarray: - """Savitzky-Golay filter - - A Savitzky-Golay filter that works with NaN values. - - Parameters - ---------- - x : ndarray - Signal to be smoothed - window_length : int - Width of savgol filter - polyorder : int, optional - The order of the polynomial used to make the filter. - must be less than 'window_length'. Default is 2. - deriv : int, optional - The order of the derivative to compute for the filter. Default is 0. - delta : float, optional - The spacing of which the filter will be applied. This is only used - if 'deriv' > 0. Defualt is 1.0. - fill_val : float, optional - Value to be used for filling edges in order to recreate the input - dimension. Default is np.nan. - - Returns - ------- - out : ndarray - Smoothed signal - - Notes - ----- - This function is inspiered by scipy.signal's Savitzky-Golay filter [1]. - - References - ---------- - [1] Virtanen, et al., Scipy 1.0: Fundamental Algorithms for Scientific - Computing in Python, Nature Methods, 2020, 17, 261-272, https://rdcu.be/b08Wh, - 10.1038/s41592-019-0686-2 - [2] A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of Data by - Simplified Least Squares Procedures. Analytical Chemistry, 1964, 36 (8), - pp 1627-1639. - [3] Jianwen Luo, Kui Ying, and Jing Bai. 2005. Savitzky-Golay smoothing and - differentiation filter for even number data. Signal Process. - 85, 7 (July 2005), 1429-1434. - - """ - f = savgol_coeffs(window_length, polyorder, deriv=deriv, delta=delta) - x_smooth = np.convolve(x, f, mode='valid') - fill = np.full(int((window_length - 1)/2), fill_val) - - # if window_length is even fill one more element at the start. - if window_length % 2 == 0: - out = np.hstack((np.append(fill, fill_val), x_smooth, fill)) - else: - out = np.hstack((fill, x_smooth, fill)) - - return out From b6c42de2bf1efb11207895e55c05e99a584c7710 Mon Sep 17 00:00:00 2001 From: HavardStridBuholdt Date: Wed, 18 Mar 2026 15:25:38 +0100 Subject: [PATCH 3/3] Update to the documentation of cloud screening module --- ppcpy/cloudmask/cloudscreen.py | 36 +++++++++++++++++-------------- ppcpy/cloudmask/profilesegment.py | 5 +++++ ppcpy/misc/helper.py | 8 +++---- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/ppcpy/cloudmask/cloudscreen.py b/ppcpy/cloudmask/cloudscreen.py index 8000dc3..5a0fd89 100644 --- a/ppcpy/cloudmask/cloudscreen.py +++ b/ppcpy/cloudmask/cloudscreen.py @@ -18,8 +18,8 @@ def smooth_signal(signal:np.ndarray, window_len:int) -> np.ndarray: ndarray Smoothed signal - History - ------- + **History** + - 2026-02-04: Changed from scipy.ndimage.uniform_filter1d to ppcpy.misc.helper.uniform_filter - 2026-03-17: Changed back to scipy.ndimage.uniform_filter1d due to NaN-related issues in Zhao's cloud screening algorithm. @@ -48,6 +48,11 @@ def cloudscreen(data_cube, wv=532) -> np.ndarray: ----- - The cloud screening is done for both the far range and near range total channels of wavelength wv if the channels exist. + + **History** + + - xxxx-xx-xx: First edition by ... + - 2026-03-18: Updated to include necessary configurations for cloudScreen_Zhao. """ config_dict = data_cube.polly_config_dict @@ -127,16 +132,15 @@ def cloudScreen_MSG(height:np.ndarray, signal:np.ndarray, slope_thres:float, sea A boolean array indicates whether the profile is cloud free. layerStatus: ndarray (time x height) Layer status for each bin (0: unknown, 1: cloud, 2: aerosol). - - - History - ------- - - 2021-05-18: First edition by Zhenping. - - 2025-03-20: Translated into python. Notes ----- - This function does not yield any layerStatus information. + + **History** + + - 2021-05-18: First edition by Zhenping. + - 2025-03-20: Translated into python. """ if len(search_region) != 2 or search_region[1] <= height[0]: @@ -201,8 +205,8 @@ def cloudScreen_Zhao(height:np.ndarray, signal:np.ndarray, bg:np.ndarray, search layer detection method based on micropulse lidar measurements, Journal of Geophysical Research: Atmospheres, 119(11), 6788-6802. - History - ------- + **History** + - 2021-05-18: First edition by Zhengping. - 2026-03-11: Translated into python. """ @@ -294,11 +298,6 @@ def VDE_cld(signal:np.ndarray, height:np.ndarray, BG:float, minLayerDepth:float= Zhao, C., Y. Wang, Q. Wang, Z. Li, Z. Wang, and D. Liu (2014), A new cloud and aerosol layer detection method based on micropulse lidar measurements, Journal of Geophysical Research: Atmospheres, 119(11), 6788-6802. - - History - ------- - - 2021-06-13: First edition by Zhenping - - 2026-03-11: Translated into pyhton Notes ----- @@ -306,8 +305,13 @@ def VDE_cld(signal:np.ndarray, height:np.ndarray, BG:float, minLayerDepth:float= - Also could consider using numba and its @njit decorator. That would also enhance the performance but the code needs to be rewritten to fit numba's requirements. - This function does not work with NaN values in the signal. - """ + + **History** + - 2021-06-13: First edition by Zhenping + - 2026-03-11: Translated into pyhton + """ + if np.floor(minLayerDepth / (height[1] - height[0])) < 3: raise ValueError('minLayerDepth must be assured there are at least 3 bins in each layer') diff --git a/ppcpy/cloudmask/profilesegment.py b/ppcpy/cloudmask/profilesegment.py index 25b9729..8a47d18 100644 --- a/ppcpy/cloudmask/profilesegment.py +++ b/ppcpy/cloudmask/profilesegment.py @@ -14,6 +14,11 @@ def segment(data_cube) -> np.ndarray: ------- clFreeGrps : ndarray Time index of the detected cloud free regions [start, end]. + + **History** + + - xxxx-xx-xx: First edition by ... + - 2026-03-18: Updated flagValPrf to include 'shutterOnMask' and 'fogMask'. """ config_dict = data_cube.polly_config_dict diff --git a/ppcpy/misc/helper.py b/ppcpy/misc/helper.py index 23e197d..a0e0651 100644 --- a/ppcpy/misc/helper.py +++ b/ppcpy/misc/helper.py @@ -682,15 +682,15 @@ def uniform_filter(x:np.ndarray, win:int, fill_val:float=np.nan) -> np.ndarray: ------- out : ndarray Smoothed signal. - - History - ------- - - 2026-02-02: First edition by Buholdt Notes ----- - Currently only support smoothing of 1D-arrays with a constant window size. + **History** + + - 2026-02-02: First edition by Buholdt + """ if isinstance(win, int): if len(x.shape) > 1: