import numpy
import numpy as np
import pandas as pd
import sys, glob
import urllib3
import h5py
import cdsapi, zipfile, os, time
import warnings
import shutil
import xarray
from datetime import date
warnings.filterwarnings('ignore')
import pycountry
sys.path.append(os.getcwd()+'/../../cds-backend/code/')
import cds_eua3 as eua
# import numbaprocess
import copy
import glob
from numba import njit
import pandas
import glob
import pickle
import matplotlib
import matplotlib.pyplot as plt
import h5py as h5
import netCDF4 as nc
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tqdm import tqdm
from multiprocessing import Pool
from functools import partial
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)
stdplevs = [ i*100 for i in [10.0, 20.0, 30.0, 50.0, 70.0, 100.0, 150.0, 200.0, 250.0, 300.0, 400.0, 500.0, 700.0, 850.0, 925.0, 1000.0] ]
# Select here the date range of interest
y_min, y_max = 2009 , 2010
# Select here the date pressure of interest
p = 10000
def read_csv(file, p, y_min, y_max, ds=''):
""" Extract the data from the IGRA, RHARM, CUON dataframes for plotting.
Adjustments of RHARM must be calculated by subtracting obs data RHARM-IGRA
"""
#file = 'data_SUNY/ACM00078861.csv'
#print('FILE:::' , file)
# adapt column names
if ds in ['RHARM', "IGRA"]:
df = pd.read_csv(file, sep = ',')
df['report_timestamp'] = pd.to_datetime( df['report_timestamp'] ).dt.tz_localize(None) ### TO DO to remove the +00 from timestamps , see Cannot compare tz-naive and tz-aware datetime-like objects
time, pressure = 'report_timestamp' , 'air_pressure'
elif ds in ['CUON']:
df = pd.read_csv(file, sep = '\t')
time, pressure = 'date_time' , 'z_coordinate'
elif ds in ['SUNY']:
df = pd.read_csv(file, sep = '\t')
time, pressure = 'date_time' , 'air_pressure'
#print(file, p, y_min, y_max, ds, time)
#print(df.columns)
df[time] = pd.to_datetime(df[time])
df = df[df[pressure]==p]
df = df.loc[ (df[time] > pd.to_datetime(y_min)) & ((df[time] > pd.to_datetime(y_min))) ]
df.station = file.split('/')[1].replace('.csv','')
return df
# test
# Read CUON data and Calculate averages
#file = 'data_SUNY/ACM00078861.csv'
#a = get_data_from_csv(file, p, '20090101', '2010101', ds='SUNY')
#a.station
def get_rharm_adj(i,h,):
""" Extract the RHARM adjustment.
First join the dataframe along the timestamp-pressure axes, then calculated the difference between
the RHAR-IGRA temp. values.
Return the RHARM dataframe with an additional column """
i = i[['report_timestamp', 'air_pressure', 'air_temperature']]
i['air_temperature_i'] = i['air_temperature']
h = h [['report_timestamp','air_pressure', 'air_temperature']]
h['air_temperature_h'] = h['air_temperature']
d = h.merge(i, how='outer')
d['adj'] = d.air_temperature_h - d.air_temperature_i
return d
def get_night_day_diff(df, ds='IGRA', bias=''):
""" Extract day and night differences from a dataframe.
Processing slightly different for IGRA/RHARM, CUON or SUNY.
Have to return two different temp_diff since for RHAR/IGRA from CDS I process separately the two dataframes
and then calculate the adj by subtracting RHARM-CUON,
while for CUON and RHAR/IGRA crom CSV I have the adj in the same dataframe """
#print(df.columns, ' ' , what)
# name of timestamp in the different dataframes
dt_col = 'date_time' if ds not in ['IGRA','RHARM'] else 'report_timestamp'
df['date'] = pd.to_datetime(df[dt_col])
df['hour'] = df['date'].dt.hour
df = df.reset_index()
# Must correct for days
### print( df.head(10) )
night_hours = [0,1,2,3 ]
nights = list(np.where( df.hour.isin(night_hours) )[0] )
df.loc[nights, 'hour'] = 0
# ---
following_day_night_hours = [21,22,23]
nights = list(np.where( df.hour.isin(following_day_night_hours))[0])
df['date'][nights] = df['date'][nights] + pd.Timedelta(1,unit='d')
df.loc[nights, 'hour'] = 0
# ---
day_hours = [11,12,13,14]
day = np.where( df.hour.isin(day_hours))[0]
df.loc[day, 'hour'] = 12
df['date'] = df['date'].dt.date
iday = df.loc[df.hour == 12 ]
inight = df.loc[df.hour == 0 ]
#iday = iday.drop_duplicates(subset=['date'])
#inight = inight.drop_duplicates(subset=['date'])
# df['adj'] = df.air_temperature_rharm - df.air_temperature_igra
days_set = set(iday.date) # make a set of the first list
days_set.intersection_update(inight.date)
#print(len(iday.date), len(inight.date), len(days_set) )
day_set = list(days_set)
df = df.loc[df['date'].isin(days_set)]
#return day_set, df
days_set = df.date
#print(df)
#print(df[df.hour==12].head(10).air_temperature_igra)
#print(df[df.hour==0].head(10).air_temperature_igra)
#print( df[df.hour==12].air_temperature_igra.values - df[df.hour==0].air_temperature_igra.values)
df = df.drop_duplicates( subset =['date','hour'])
days = df[df.hour==12].date
if ds in ['IGRA','RHARM']:
temp_diff_i = df[df.hour==12].air_temperature_igra.values - df[df.hour==0].air_temperature_igra.values
temp_diff_h = df[df.hour==12].air_temperature_rharm.values - df[df.hour==0].air_temperature_rharm.values
return days, temp_diff_i , temp_diff_h
elif ds == 'SUNY':
temp_diff_raw = df[df.hour==12].air_temperature.values - df[df.hour==0].air_temperature.values
temp_diff_adj = df[df.hour==12].air_temperature_adj.values - df[df.hour==0].air_temperature_adj.values
return days, temp_diff_raw, temp_diff_adj
else:
temp_diff = df[df.hour==12].observation_value.values - df[df.hour==0].observation_value.values
if bias =='RAOBCORE':
temp_diff_adj = df[df.hour==12].observation_value.values - df[df.hour==12].RAOBCORE_bias_estimate.values
temp_diff_adj = temp_diff_adj -(df[df.hour==0].observation_value.values-df[df.hour==0].RAOBCORE_bias_estimate.values)
elif bias== "RASE":
temp_diff_adj = df[df.hour==12].observation_value.values - df[df.hour==12].RASE_bias_estimate.values
temp_diff_adj = temp_diff_adj -(df[df.hour==0].observation_value.values-df[df.hour==0].RASE_bias_estimate.values)
elif bias== "RICH":
temp_diff_adj = df[df.hour==12].observation_value.values - df[df.hour==12].RICH_bias_estimate.values
temp_diff_adj = temp_diff_adj -(df[df.hour==0].observation_value.values-df[df.hour==0].RICH_bias_estimate.values)
elif bias== "RISE":
temp_diff_adj = df[df.hour==12].observation_value.values - df[df.hour==12].RISE_bias_estimate.values
temp_diff_adj = temp_diff_adj -(df[df.hour==0].observation_value.values-df[df.hour==0].RISE_bias_estimate.values)
return days, temp_diff, temp_diff_adj
def extract_data_from_df(df, ds='CUON', bias=''):
""" Extract and manipulate data from the original dataframes.
Returns date times, raw temp diff, adjusted temp diff in a vector [] """
if ds in ['CUON']:
c_date, c_temp_diff, c_temp_diff_RAOBCORE = get_night_day_diff(df, ds=ds, bias=bias)
v = [c_date, c_temp_diff, c_temp_diff_RAOBCORE]
elif ds in ['SUNY']:
s_date, s_tempdiff_raw, s_tempdiff_adj = get_night_day_diff(df, ds=ds)
v = [s_date, s_tempdiff_raw, s_tempdiff_adj]
else:
ih_dates, temp_diff_i , temp_diff_h = get_night_day_diff(df, ds=ds)
v = [ih_dates, temp_diff_i , temp_diff_h]
return v
def filter_day_night(i,h,c,s, time = 'day'):
# filtering hours for day and night
if time == 'day':
i = i.loc[ i['report_timestamp'].dt.hour == 12 ]
h = h.loc[ h['report_timestamp'].dt.hour == 12 ]
c = c[c['date_time'].dt.hour.isin([10,11,12,13,14]) ]
s = s[s['date_time'].dt.hour == 12 ]
elif time == 'night':
i = i.loc[ i['report_timestamp'].dt.hour== 0 ]
h = h.loc[ h['report_timestamp'].dt.hour== 0 ]
c = c[c['date_time'].dt.hour.isin([22,23,0,1,2]) ]
s = s[s['date_time'].dt.hour == 0 ]
return i,h,c,s
def calculate_day_night_adjustment(data='', time='night'):
""" Calculate adjustments """
c,i,h,s = data["CUON"]['df'], data["IGRA"]['df'], data["RHARM"]['df'], data["SUNY"]['df']
############## calculating adjustments
i,h,c,s = filter_day_night(i,h,c,s, time=time)
if source != 'csv':
adj = get_rharm_adj(i,h)
#adj_test = get_rharm_adj( i_test,h_test) ### this was for CDS-TEST, remove
else:
adj = i
#adj_test = i ### this was for CDS-TEST, remove
# return df ready for adjustments
return c, s, i, adj
def extract_averages(igrainv, p, date_min , date_max, ds, bias, stat_file):
# res.append(extract_averages(igrainv, p, date_min , date_max, ds, bias, s ) )
df = read_csv(stat_file, p, y_min, y_max, ds=ds)
stat = df.station
a = igrainv.loc [igrainv.StationId == stat ]
if a.empty: # case of CUON numbers, to be fixed
#print('No igra Id found', )
igrastat = [ i[6:] for i in igrainv.StationId.values ]
#print(igrastat)
igrainv['igrastat'] = igrastat
a = igrainv.loc [igrainv.igrastat == stat ]
try:
lat, lon = a.Latitude.values[0] , a.Longitude.values[0]
#print (res , df)
data = extract_data_from_df(df, ds=ds, bias=bias)
dates = pd.to_datetime( data[0] )
df = pd.DataFrame.from_dict( {'date_time': pd.to_datetime(data[0]), "raw": data[1] , 'adj':data[2] } )
df = df[ (df.date_time > pd.to_datetime(date_min)) & (df.date_time < pd.to_datetime(date_max)) ]
raw_average = np.nanmean(df.raw)
adj_average = np.nanmean(df.adj)
except:
lat, lon, raw_average, adj_average = 0,0, -999,-999
return stat, lat, lon, raw_average, adj_average
def make_maps_plotly(data):
""" Make interactive maps using PLOTLY library for interactivity """
#data["SUNY"]['daynight'] = [s_date, s_tempdiff_raw, s_tempdiff_adj] ### structure of data
p, ds = data.p, data.ds
date_min, date_max = data.date_min, data.date_max
bias = data.bias
data = data.dropna(subset=['raw'])
#data.raw = [float(r) for r in data.raw.values]
#data.adj = [float(r) for r in data.adj.values]
fig = make_subplots(
rows=1, cols=2,
column_widths=[0.5, 0.5],
row_heights=[0.01],
specs=[[{"type": "scattergeo", "rowspan": 1}, {"type": "scattergeo", "rowspan": 1}],
],
subplot_titles=("Raw","Adjusted")
)
fig.add_trace(go.Scattergeo(
lon = data['lon'],
lat = data['lat'],
text = data['stat'],
mode = 'markers',
#marker_color = data['adj'],
marker = dict(
size = 8,
#opacity = 0.8,
#reversescale = True,
#autocolorscale = False,
#symbol = 'square',
line = dict(
width=1,
),
colorscale = 'RdBu',
cmin = -1,
color = data['raw'],
cmax = 1,
colorbar_title="Temp Diff [K]"
),
),
row=1, col=1
)
fig.add_trace(go.Scattergeo(
lon = data['lon'],
lat = data['lat'],
text = data['stat'],
mode = 'markers',
#marker_color = data['adj'],
marker = dict(
size = 8,
#opacity = 0.8,
#reversescale = True,
#autocolorscale = False,
#symbol = 'square',
line = dict(
width=1,
),
colorscale = 'RdBu',
cmin = -1,
color = data['adj'],
cmax = 1,
colorbar_title="Temp Diff [K]"
),
),
row=1, col=2
)
fig.update_traces(marker=dict(size=6,
line=dict(width=1,
color='gray'))
)
fig.update_layout(
title = ds + ' ' + bias + ' Dataset - 12:00GMT-00:00GMT Temperature Difference [K] , p=' + str(p) + ' [Pa] between ' + date_min + '-' + date_max,
height=600, width=1800,
)
fig.update_layout(coloraxis=dict(colorscale='Bluered_r'), showlegend=False)
fig.show()
def extract_map_data(ds='', bias='', p='', date_min='', date_max='', force_extract=False, pool=True):
""" Extract the data for a given
dataset: ds
bias type: bias
pressure: p
minimum date: date_min
maximum date: date_max
Use pool multiprocessing or single process.
Note: RHARM data only available between 20090101 and 20100131 for now.
If data for the desired combination is available in the directory 'data_MAPS' will read it
if not OR if force_extract=True, will read data from original csv files
"""
if ds in ['SUNY', 'CUON']:
directory= 'data_' + ds
else:
directory = 'new_data_emanuele_27Sept2022'
#
maps_dir = 'data_MAPS'
if not os.path.isdir(maps_dir):
os.mkdir(maps_dir)
# Here I look if data for the desired combination of dates/pressure/dataset exists and if so load it
# unless the force_extract option is selected
file = maps_dir+'/'+ ds+'_'+bias+'_plevel_'+str(p)+'_'+date_min+'_'+date_max+'.csv'
if not force_extract and os.path.isfile(file) :
print('Reading ::: ' , ds, ' ' , bias , ' plevel= ' , p , ', date range: ' , date_min , ' ' , date_max )
map_df = pd.read_csv(file, sep='\t')
else:
print('Extracting ::: ' , ds, ' ' , bias , ' plevel= ' , p , ', date range: ' , date_min , ' ' , date_max )
stations = glob.glob(directory+'/*')
POOL = pool
if POOL:
pr = Pool(30)
func = partial(extract_averages, igrainv, p, date_min , date_max, ds, bias)
res = pr.map(func, stations)
else:
res = []
for s in stations:
res.append(extract_averages(igrainv, p, date_min , date_max, ds, bias, s ) )
map_df = {'stat':[], 'lat':[] , 'lon':[], 'raw':[], 'adj':[] }
for r in res:
map_df['stat'].append(r[0])
map_df['lat'].append(r[1])
map_df['lon'].append(r[2])
map_df['raw'].append(r[3])
map_df['adj'].append(r[4])
map_df = pd.DataFrame.from_dict(map_df)
map_df.to_csv(file, sep='\t')
# adding metadata
map_df.date_min = date_min
map_df.date_max = date_max
map_df.ds = ds
map_df.p = p
map_df.bias = bias
return map_df
# Extracting data from slimmed CSV files
igrainv=pd.read_fwf('igra2-station-list.txt',widths=(11,10,7,4,30,5,5,7),
names=('StationId','Latitude','Longitude','Elev','dummy','StationName','From','To','Nrec'))
# common parameters
date_min , date_max = '20090101' , '20101231'
plevel = 10000
POOL = True
force = False
map_df_RHARM = extract_map_data(ds='RHARM', bias='', p=plevel, date_min=date_min, date_max=date_max, pool=False)
Reading ::: RHARM plevel= 10000 , date range: 20090101 20101231
map_df_RAOBCORE = extract_map_data(ds='CUON', bias='RAOBCORE', p=plevel, date_min=date_min, date_max='20100101', pool=POOL)
#map_df_RAOBCORE_2011 = extract_map_data(ds='CUON', bias='RAOBCORE', p=plevel, date_min=date_min, date_max='20101231', pool=POOL)
Reading ::: CUON RAOBCORE plevel= 10000 , date range: 20090101 20100101
map_df_RASE = extract_map_data(ds='CUON', bias='RASE', p=plevel, date_min=date_min, date_max=date_max, pool=POOL)
Reading ::: CUON RASE plevel= 10000 , date range: 20090101 20101231
map_df_RICH = extract_map_data(ds='CUON', bias='RICH', p=plevel, date_min=date_min, date_max=date_max, pool=POOL)
Reading ::: CUON RICH plevel= 10000 , date range: 20090101 20101231
map_df_RISE = extract_map_data(ds='CUON', bias='RISE', p=plevel, date_min=date_min, date_max=date_max, pool=POOL)
Reading ::: CUON RISE plevel= 10000 , date range: 20090101 20101231
map_df_SUNY = extract_map_data(ds='SUNY', bias='', p=plevel, date_min=date_min, date_max=date_max, pool=POOL)
Reading ::: SUNY plevel= 10000 , date range: 20090101 20101231
f = make_maps_plotly(map_df_RHARM)
f = make_maps_plotly(map_df_SUNY)
f = make_maps_plotly(map_df_RAOBCORE)
f = make_maps_plotly(map_df_RICH)
f = make_maps_plotly(map_df_RASE)
f = make_maps_plotly(map_df_RISE)
#map_df_RICH.head(10)
#map_df_RASE.head(10)
#map_df_RAOBCORE.head(10)
def make_maps_static(data_SUNY, data_RAOBCORE, data_RICH, data_RASE, data_RHARM):
from matplotlib.gridspec import GridSpec
import cartopy.crs as ccrs
crs = ccrs.PlateCarree()
cm = plt.cm.get_cmap('RdYlBu')
# parameters from the dataframe
p, min_date, max_date = data_SUNY.p, data_SUNY.date_min, data_SUNY.date_max
# Limits for color bar
vmin, vmax = -1, 1
#fig = plt.figure(figsize=(5,50))
#fig = plt.figure()
fs = 20
gs = GridSpec(4,2, wspace=0.1, hspace=0.000001)
fig = plt.figure(figsize=(33,37))
title = '12:00GMT-00:00GMT Temperature Difference [K] , p=' + str(p) + ' [Pa] between ' + min_date + '-' + max_date
fig.suptitle(title, fontsize = 25, y=0.9)
def subplotter(df, title='', data = '', k = ''):
import matplotlib.ticker as mticker
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
""" Plot each subplot of the gridspec """
# SUNY raw
ax = plt.subplot(gs[k],projection=crs)
ax.set_xticks([])
ax.set_yticks([])
ax.coastlines()
plt.scatter( df['lon'] , df['lat'],
c=df[data],
marker='o',
linewidths=0.4,
edgecolors='gray',
transform=ccrs.PlateCarree(),
vmin=vmin, vmax=vmax, cmap=cm)
plt.colorbar(fraction=0.026, pad=0.02)
ax.set_title(title, fontsize=fs, y=1.03)
ax.set_ylim([-90,90])
gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True,
linewidth=1.1, color='blue', alpha=0.2, linestyle='--')
gl.xlabels_top = False
gl.ylabels_left = True
gl.ylabels_right = False
gl.xlabels_bottom = True
gl.xlines = True
gl.xlocator = mticker.FixedLocator([-180, -120, -60 , 0, 60, 120, 180])
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
gl.xlabel_style = {'size': 11, 'color': 'blue'}
gl.ylabel_style = {'size': 11, 'color': 'blue'}
#gl.xlabel_style = {'color': 'gray', 'weight': 'bold'}
subplotter(data_SUNY, title='SUNY Raw', data = 'raw', k = 0)
subplotter(data_SUNY, title='SUNY Homog.', data = 'adj', k = 1)
subplotter(data_RAOBCORE, title='CUON Raw', data = 'raw', k = 2)
subplotter(data_RAOBCORE, title='CUON RAOBCORE', data = 'adj', k = 3)
subplotter(data_RICH, title='CUON RICH', data = 'adj', k = 4)
subplotter(data_RASE, title='CUON RASE', data = 'adj', k = 5)
subplotter(data_RHARM, title='IGRA', data = 'raw', k = 6)
subplotter(data_RHARM, title='RHARM', data = 'adj', k = 7)
make_maps_static(map_df_SUNY, map_df_RAOBCORE, map_df_RICH, map_df_RASE, map_df_RHARM)