App for subsetting a large intake catalog#

import intake
import panel as pn
pn.extension('tabulator')
pn.extension(loading_spinner='dots', loading_color='#00aa41', sizing_mode="stretch_width")
from panel.widgets import Tqdm
import pandas as pd
tqdm=Tqdm()
tqdm

We will load parts of the cmip6 intake catalog to easily subset it.

use_cols=["source_id","experiment_id","table_id","variable_id"]
temp=[]
def create_df(*events):
    for chunk in tqdm(pd.read_csv("https://swift.dkrz.de/v1/dkrz_a44962e3ba914c309a7421573a6949a6/intake-esm/dkrz_cmip6_disk_netcdf.csv.gz",
                      usecols=use_cols,
                      chunksize=10**6), total=7,
                      desc="Reading dkrz-cmip6-catalog", leave=True) :
        temp.append(chunk)

    return pd.concat(temp, ignore_index = True)

df=create_df()
df_groups=df.drop_duplicates(ignore_index=True)
pndf=pn.widgets.Tabulator(df_groups,
                          pagination='local', page_size=10,
                          header_filters=True)
                          #selectable='checkbox')#
def create_filters(*events):
    columnlist=[]
    for col in tqdm(list(df.columns), desc="Creating filters", leave=True):
        select=pn.widgets.MultiChoice(options=sorted(list(df[col].unique())), name=f'{col} Filter')
        columnlist.append(select)
        pndf.add_filter(select, col)
    return columnlist
    
pn_column=pn.Column(*create_filters())

layout_pndf=pn.Row(*[pn_column], pndf)
layout_pndf
from io import BytesIO

def get_csv():
    with pn.param.set_values(create_download_bn, loading=True):        
        selectiondf=df.copy()
        for colidx,pnfilter in enumerate(layout_pndf[0]):
            if pnfilter.value :
                selectiondf=selectiondf.loc[selectiondf[use_cols[colidx]].isin(pnfilter.value),:]
        no_selection=list(df.loc[~df.index.isin(selectiondf.index)].index)

        return BytesIO(pd.read_csv("https://swift.dkrz.de/v1/dkrz_a44962e3ba914c309a7421573a6949a6/intake-esm/dkrz_cmip6_disk_netcdf.csv.gz",
                    skiprows=no_selection).to_csv().encode())
create_download_bn=pn.widgets.Button(name="Create Download selection",
                                    button_type='primary')
btn_column=pn.Column(create_download_bn)

def create_button(event):
    if len(btn_column) > 1:
        btn_column.pop(-1)
    file_download_csv = pn.widgets.FileDownload(
        file=get_csv(),
        filename="dkrz_cmip6_disk_selection.csv",
        button_type="success",
        embed=True)
    btn_column.append(file_download_csv)

create_download_bn.on_click(create_button)

btn_column
#from bokeh.resources import INLINE
pn.Column(layout_pndf,btn_column).save('test.html', 
                                       embed=True,
                                      load_path="./")#, resources=INLINE)