Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion crunch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,4 @@
import crunch.api as api
import crunch.store as store
from crunch.inline import load as load_notebook
from crunch.container import Columns as Columns
from crunch.runner import is_inside as is_inside_runner
6 changes: 0 additions & 6 deletions crunch/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,15 @@
from crunch.api.domain.competition import CompetitionMode as CompetitionMode
from crunch.api.domain.competition import CompetitionStatus as CompetitionStatus
from crunch.api.domain.crunch import Crunch as Crunch
from crunch.api.domain.data_release import ColumnNames as ColumnNames
from crunch.api.domain.data_release import DataFile as DataFile
from crunch.api.domain.data_release import DataFiles as DataFiles
from crunch.api.domain.data_release import DataFilesUnion as DataFilesUnion
from crunch.api.domain.data_release import DataRelease as DataRelease
from crunch.api.domain.data_release import DataReleaseFeature as DataReleaseFeature
from crunch.api.domain.data_release import DataReleaseSplit as DataReleaseSplit
from crunch.api.domain.data_release import DataReleaseSplitGroup as DataReleaseSplitGroup
from crunch.api.domain.data_release import DataReleaseSplitReduced as DataReleaseSplitReduced
from crunch.api.domain.data_release import DataReleaseTargetResolution as DataReleaseTargetResolution
from crunch.api.domain.data_release import KnownData as KnownData
from crunch.api.domain.data_release import OriginalFiles as OriginalFiles
from crunch.api.domain.data_release import SizeVariant as SizeVariant
from crunch.api.domain.data_release import SplitKeyPythonType as SplitKeyPythonType
from crunch.api.domain.data_release import TargetColumnNames as TargetColumnNames
from crunch.api.domain.enum_ import Language as Language
from crunch.api.domain.enum_ import SplitKeyType as SplitKeyType
from crunch.api.domain.leaderboard import Leaderboard as Leaderboard
Expand Down
168 changes: 5 additions & 163 deletions crunch/api/domain/data_release.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from dataclasses import dataclass
from enum import Enum
from types import MappingProxyType
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Union

from dataclasses_json import LetterCase, Undefined, dataclass_json

Expand Down Expand Up @@ -37,53 +36,7 @@ class DataFile:
compressed: bool


@dataclass(frozen=True)
class KnownData:

X_TRAIN = "x_train"
Y_TRAIN = "y_train"
X_TEST = "x_test"
Y_TEST = "y_test"
X = "x"
Y = "y"
Y_RAW = "y_raw"
EXAMPLE_PREDICTION = "example_prediction"


@dataclass_json(
letter_case=LetterCase.CAMEL,
undefined=Undefined.EXCLUDE
)
@dataclass(frozen=True)
class DataFiles:

x_train: DataFile
x_test: DataFile
y_train: DataFile
y_test: DataFile
example_prediction: DataFile

def items(self):
return vars(self).items()


@dataclass_json(
letter_case=LetterCase.CAMEL,
undefined=Undefined.EXCLUDE
)
@dataclass(frozen=True)
class OriginalFiles:

x: DataFile
y: DataFile
y_raw: Optional[DataFile]
example_prediction: DataFile

def items(self):
return vars(self).items()


DataFilesUnion = Union[DataFiles, OriginalFiles, Dict[str, DataFile]]
DataFiles = Dict[str, DataFile]


class DataReleaseSplitGroup(Enum):
Expand Down Expand Up @@ -128,26 +81,6 @@ def from_dict_array(
]


@dataclass_json(
letter_case=LetterCase.CAMEL,
undefined=Undefined.EXCLUDE,
)
@dataclass(frozen=True)
class DataReleaseFeature:

group: str
name: str

@staticmethod
def from_dict_array(
input: List[Dict[str, Any]]
):
return [
DataReleaseFeature.from_dict(x)
for x in input
]


class SizeVariant(Enum):

DEFAULT = "DEFAULT"
Expand Down Expand Up @@ -193,10 +126,6 @@ def number(self) -> int:
def embargo(self) -> int:
return self._attrs["embargo"]

@property
def column_names(self) -> "ColumnNames":
return ColumnNames.from_dict(self._attrs["columnNames"])

@property
def number_of_features(self) -> int:
return self._attrs["numberOfFeatures"]
Expand All @@ -210,22 +139,16 @@ def target_resolution(self):
return DataReleaseTargetResolution[self._attrs["target_resolution"]]

@property
def data_files(self) -> DataFilesUnion:
def data_files(self) -> DataFiles:
files = self._attrs.get("dataFiles")
if not files:
self.reload()
files = self._attrs["dataFiles"]

if "xTrain" in files and "yTrain" in files:
return DataFiles.from_dict(files)

if "x" in files and "y" in files:
return OriginalFiles.from_dict(files)

return MappingProxyType({
return {
key: DataFile.from_dict(value)
for key, value in files.items()
})
}

@property
def splits(self) -> List[DataReleaseSplit]:
Expand All @@ -236,92 +159,15 @@ def splits(self) -> List[DataReleaseSplit]:

return list(DataReleaseSplit.from_dict_array(splits))

@property
def default_feature_group(self) -> str:
return self._attrs["defaultFeatureGroup"]

@property
def features(self) -> Tuple[DataReleaseFeature]:
features = self._attrs.get("features")
if features is None:
self.reload(include_features=True)
features = self._attrs["features"]

return tuple(DataReleaseFeature.from_dict_array(features))

def reload(
self,
include_splits: bool = True,
include_features: bool = True,
):
return super().reload(
include_splits=include_splits,
include_features=include_features,
)


@dataclass_json(
letter_case=LetterCase.CAMEL,
undefined=Undefined.EXCLUDE
)
@dataclass(frozen=True)
class TargetColumnNames:

id: int
name: str
side: Optional[str]
input: Optional[str]
output: Optional[str]
file_path: Optional[str]


@dataclass_json(
letter_case=LetterCase.CAMEL,
undefined=Undefined.EXCLUDE
)
@dataclass(frozen=True)
class ColumnNames:

id: Optional[str]
moon: Optional[str]
side: Optional[str]
input: Optional[str]
output: Optional[str]
targets: List[TargetColumnNames]

@property
def first_target(self):
return next(iter(self.targets), None)

@property
def inputs(self):
return [
target_column_names.input
for target_column_names in self.targets
]

@property
def outputs(self):
return [
target_column_names.output
for target_column_names in self.targets
]

@property
def target_names(self):
return [
target_column_names.name
for target_column_names in self.targets
]

def get_target_by_name(self, name: str):
for target in self.targets:
if target.name == name:
return target

return None


class DataReleaseCollection(Collection[DataRelease]):

model = DataRelease
Expand All @@ -342,14 +188,12 @@ def get(
self,
number: Union[int, str],
include_splits: bool = True,
include_features: bool = True,
) -> DataRelease:
return self.prepare_model(
self._client.api.get_data_release(
self.competition.id,
number,
include_splits=include_splits,
include_features=include_features,
)
)

Expand Down Expand Up @@ -387,14 +231,12 @@ def get_data_release(
competition_identifier: "CompetitionIdentifierType",
number: int,
include_splits: bool = False,
include_features: bool = False,
):
return self._result(
self.get(
f"/v1/competitions/{competition_identifier}/data-releases/{number}",
params={
"includeSplits": include_splits,
"includeFeatures": include_features,
}
),
json=True
Expand Down
11 changes: 6 additions & 5 deletions crunch/api/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from crunch.api.domain.phase import PhaseType
from crunch.api.domain.project import ProjectTokenType
from crunch.external.inflection import camelize, underscore
from crunch.utils import format_bytes, smart_call, try_get_competition_name
from crunch.external.humanfriendly import format_size
from crunch.utils import smart_call, try_get_competition_name


def _print_contact(
Expand Down Expand Up @@ -343,8 +344,8 @@ def print_helper(
self,
**kwargs: Any,
) -> None:
size = format_bytes(self.size) if self.size is not None else None
maximum_size = format_bytes(self.maximum_size)
size = format_size(self.size) if self.size is not None else None
maximum_size = format_size(self.maximum_size)

sizes = f"{size}/{maximum_size}" if size is not None else f"max. {maximum_size}"
print(f"The resources directory is too big. ({sizes})")
Expand Down Expand Up @@ -406,7 +407,7 @@ def print_helper(
self,
**kwargs: Any,
) -> None:
print(f"Prediction is too big. ({format_bytes(self.size)}/{format_bytes(self.maximum_size)})")
print(f"Prediction is too big. ({format_size(self.size)}/{format_size(self.maximum_size)})")


class ProjectNotFoundException(ApiException):
Expand Down Expand Up @@ -578,7 +579,7 @@ def print_helper(
self,
**kwargs: Any,
) -> None:
print(f"Submission is too big. ({format_bytes(self.size)}/{format_bytes(self.maximum_size)})")
print(f"Submission is too big. ({format_size(self.size)}/{format_size(self.maximum_size)})")


class CannotParticipateException(ApiException):
Expand Down
2 changes: 1 addition & 1 deletion crunch/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
import os
import sys
from typing import Any, Callable, List, Optional, Tuple, Union
from typing import Any, Callable, List, Optional, Union

import click

Expand Down
12 changes: 1 addition & 11 deletions crunch/command/download.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import typing

from .. import api, constants, container, downloader, utils
from .. import api, constants, downloader, utils


def _get_data_urls(
Expand All @@ -12,18 +12,14 @@ def _get_data_urls(
int,
int,
typing.List[api.SplitKeyPythonType],
container.Features,
api.ColumnNames,
typing.Dict[str, downloader.PreparedDataFile]
]:
data_release = round.phases.get_submission().get_data_release(size_variant=size_variant)

embargo = data_release.embargo
number_of_features = data_release.number_of_features
column_names = data_release.column_names
data_files = data_release.data_files
splits = data_release.splits
features = container.Features.from_data_release(data_release)

split_keys = [
split.key
Expand All @@ -38,8 +34,6 @@ def _get_data_urls(
embargo,
number_of_features,
split_keys,
features,
column_names,
downloader.prepare_all(data_directory_path, data_files),
)

Expand Down Expand Up @@ -87,8 +81,6 @@ def download(
embargo,
number_of_features,
split_keys,
features,
column_names,
prepared_data_files,
) = _get_data_urls(
round,
Expand All @@ -111,8 +103,6 @@ def download(
embargo,
number_of_features,
split_keys,
features,
column_names,
data_directory_path,
file_paths,
)
Expand Down
Loading
Loading