Source code for metrics_as_scores.cli.Download

"""
This module contains the workflow for downloading known datasets.
"""

from metrics_as_scores.__init__ import DATASETS_DIR
from metrics_as_scores.cli.Workflow import Workflow
from metrics_as_scores.cli.helpers import get_known_datasets, get_local_datasets, KNOWN_DATASETS_FILE
from shutil import unpack_archive
from wget import download
from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn, TimeElapsedColumn, TimeRemainingColumn



[docs]class DownloadWorkflow(Workflow):
    __doc__ = f'''
This workflow access a curated list of known datasets that can be used with
Metrics As Scores. With this workflow, a known dataset can be downloaded and
installed as a local dataset. Use the workflow for listing the known datasets
and then enter the ID here.

Known datasets are loaded from: {KNOWN_DATASETS_FILE}
'''.strip()

[docs]    def __init__(self) -> None:
        super().__init__()
    

[docs]    def download(self) -> None:
        """Main entry point for this workflow."""
        self._print_doc()


        known_ds = { ds['id']: ds for ds in get_known_datasets() }
        id = self.askt(options=list([
            (f'{ds["name"]} [{ds["id"]}]', ds['id']) for ds in known_ds.values()
        ]), prompt='Select the dataset you wish to download:')
        local_ds = { ds['id']: ds for ds in get_local_datasets() }
        if id in local_ds:
            self.q.print(text=f'The dataset with ID "{id}" is already installed, aborting.', style = self.style_err)
            return
        
        use_ds = known_ds[id]
        dataset_dir = DATASETS_DIR.joinpath(f'./{use_ds["id"]}')
        dataset_dir.mkdir(exist_ok=False)

        self.print_info(text_normal='Downloading archive from: ', text_vital=f"{use_ds['download']}\n", arrow='\n -> ')
        zip_file = dataset_dir.joinpath('./dataset.zip')
        
        with Progress(TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), BarColumn(), MofNCompleteColumn(), TextColumn("MB -"), TimeElapsedColumn(), TextColumn("-"), TimeRemainingColumn()) as progress:
            task = progress.add_task('[darkyellow]Downloading ...', total=int(round(use_ds['size'] / 1e6)))
            def update(current_bytes: int, total_bytes: int, width: int):
                progress.update(task_id=task, completed=float(current_bytes) / 1e6)
            download(url=use_ds['download'], out=str(zip_file), bar=update)

        self.q.print('Download complete. Extracting ...')

        unpack_archive(filename=str(zip_file), extract_dir=str(dataset_dir))
        self.q.print('\nDone! You can now use this dataset!\n')
        self.q.print(10*'-' + '\n')
        zip_file.unlink()