Changes

ph290 · 044f4911
--- a/HandyPythonScripts.md
+++ b/HandyPythonScripts.md
@@ -1903,3 +1903,130 @@ plt.plot(global\_average\_variable.coord(\'depth\').points,global\_average\_vari
 \#plotting globally averaged GLODAP values against depth. Units mol/kg

 plt.show()
+
+### Calculating Degree Heating Weeks (EXAMPLE SCRIPT)
+
+```
+#Import some modules
+
+import iris
+import iris.coord_categorisation
+import numpy as np
+import glob
+import cartopy.feature as cfeature
+from scipy.stats import t
+
+def linregress_3D(y_array):
+    # y_array is a 3-D array formatted like (time,lon,lat)
+    # The purpose of this function is to do linear regression using time series of data over each (lon,lat) grid box with consideration of ignoring np.nan
+    # Construct x_array indicating time indexes of y_array, namely the independent variable.
+    x_array=np.empty(y_array.shape)
+    for i in range(y_array.shape[0]): x_array[i,:,:]=i+1 # This would be fine if time series is not too long. Or we can use i+yr (e.g. 2019).
+    x_array[np.isnan(y_array)]=np.nan
+    # Compute the number of non-nan over each (lon,lat) grid box.
+    n=np.sum(~np.isnan(x_array),axis=0)
+    # Compute mean and standard deviation of time series of x_array and y_array over each (lon,lat) grid box.
+    x_mean=np.nanmean(x_array,axis=0)
+    y_mean=np.nanmean(y_array,axis=0)
+    x_std=np.nanstd(x_array,axis=0)
+    y_std=np.nanstd(y_array,axis=0)
+    # Compute co-variance between time series of x_array and y_array over each (lon,lat) grid box.
+    cov=np.nansum((x_array-x_mean)*(y_array-y_mean),axis=0)/n
+    # Compute correlation coefficients between time series of x_array and y_array over each (lon,lat) grid box.
+    cor=cov/(x_std*y_std)
+    # Compute slope between time series of x_array and y_array over each (lon,lat) grid box.
+    slope=cov/(x_std**2)
+    # Compute intercept between time series of x_array and y_array over each (lon,lat) grid box.
+    intercept=y_mean-x_mean*slope
+    # Compute tstats, stderr, and p_val between time series of x_array and y_array over each (lon,lat) grid box.
+    tstats=cor*np.sqrt(n-2)/np.sqrt(1-cor**2)
+    stderr=slope/tstats
+    p_val=t.sf(tstats,n-2)*2
+    # Compute r_square and rmse between time series of x_array and y_array over each (lon,lat) grid box.
+    # r_square also equals to cor**2 in 1-variable lineare regression analysis, which can be used for checking.
+    r_square=np.nansum((slope*x_array+intercept-y_mean)**2,axis=0)/np.nansum((y_array-y_mean)**2,axis=0)
+    rmse=np.sqrt(np.nansum((y_array-slope*x_array-intercept)**2,axis=0)/n)
+    # Do further filteration if needed (e.g. We stipulate at least 3 data records are needed to do regression analysis) and return values
+    n=n*1.0 # convert n from integer to float to enable later use of np.nan
+    n[n<3]=np.nan
+    slope[np.isnan(n)]=np.nan
+    intercept[np.isnan(n)]=np.nan
+    p_val[np.isnan(n)]=np.nan
+    r_square[np.isnan(n)]=np.nan
+    rmse[np.isnan(n)]=np.nan
+#     return n,slope,intercept,p_val,r_square,rmse
+    return slope,intercept
+
+
+def mmm_skirving(cube):
+    cube = cube.aggregated_by(['year','month'], iris.analysis.MEAN)
+    print('calculating NOAA Skirving MMM for month:')
+#     missing_data_value_greater_than = -32768.0
+#     missing_data_equals = -32768.0
+    missing_data_equals = cube.data.fill_value
+    years_for_mmm_climatology = [1985,2012]
+    standardisation_date = 1988.2857
+    mm_cube = cube[0:12].copy()
+    mm_cube_data = mm_cube.data.copy()
+    cube_years = cube.coord('year').points
+    #subset the data into the bit you want to use to calculate the MMM climatology and the bit you want to calculate DHW on
+    clim_cube = cube[np.where((cube_years >= years_for_mmm_climatology[0]) & (cube_years <= years_for_mmm_climatology[1]))]
+    clim_cube_detrended = clim_cube.copy()
+    clim_cube_detrended_data = clim_cube_detrended.data
+    print(np.shape(clim_cube_detrended))
+    for i,month in enumerate(np.unique(cube.coord('month_number').points)):
+        print(i+1)
+        loc = np.where(clim_cube.coord('month_number').points == month)
+        tmp = clim_cube_detrended_data[loc,:,:][0]
+        tmp[np.where(tmp == missing_data_equals )] = np.nan
+        slope,intercept = linregress_3D(tmp)
+        x = standardisation_date - years_for_mmm_climatology[0]
+        y = (slope * x ) + intercept
+        mm_cube_data[i,:,:] = y
+    mm_cube.data = mm_cube_data
+    mmm_climatology = mm_cube.collapsed('time',iris.analysis.MAX)
+    return mmm_climatology
+
+
+def dhw(cube,mmm_climatology,years_over_which_to_calculate_dhw):
+    cube_years = cube.coord('year').points
+    #note this is to be uef with daily data...
+    main_cube = cube[np.where((cube_years > years_over_which_to_calculate_dhw[0]) & (cube_years < years_over_which_to_calculate_dhw[1]))]
+    #subtract the monthly mean climatology from the rest of the data
+    try:
+        main_cube.remove_coord('month_number')
+        main_cube.remove_coord('month')
+        main_cube.remove_coord('year')
+    except:
+        pass
+    main_cube -= mmm_climatology
+    #set all values less than 1 to zero
+    main_cube.data[np.where(main_cube.data < 1.0)] = 0.0
+    #make a cube to hold the output data
+    output_cube = main_cube[83::].copy()
+    output_cube.data[:] = np.nan
+    output_cube_data = output_cube.data.copy()
+    #loop through from day 84 to the end of the dataset
+    for i in range(output_cube.shape[0]):
+#         print(i,' of ',output_cube.shape[0])
+        #sum the temperatures in that 84 day window and divide result by 7 to get in DHWeeks rather than DHdays
+        tmp_data = main_cube[i:i+84].collapsed('time',iris.analysis.SUM)/7.0
+        output_cube_data[i,:,:] = tmp_data.data
+    #save the output
+    output_cube.data = output_cube_data
+    return output_cube
+
+
+#if cube is a variable holding some historical and future daily surface ocean temperature data read in from model output (S2P3 or CMIP)
+# Note this dataset mustr include the years 1985 to 2012, because this is the range of years used to define the maximum monthly mean climatology
+
+#1st we can calculate the Maximum Monthly Mean climatology
+mmm_climatology = mmm_skirving(cube)s
+
+#We can specify the range of years ocer which we want to calculate the degree heating weeks
+years_over_which_to_calculate_dhw = [1950,2100]
+
+# Then we can calculate the daily Degree Heating Week values which will be held in the new variable dhw_cube
+dhw_cube = dhw(cube,mmm_climatology,years_over_which_to_calculate_dhw)
+
+```
\ No newline at end of file