In general, only columns that the model can use as input should be included here (numeric data only). Args: columns (:obj:`List` or :obj:`str`): Dataset column(s) to load in the tf.data.Dataset. This tf.data.Dataset will load and collate batches from the Dataset, and is suitable for passing to methods like model.fit() or model.predict(). dtype ) return signatures def to_tf_dataset ( self, columns : Union ], batch_size : int, shuffle : bool, drop_remainder : bool = None, collate_fn : Callable = None, collate_fn_args : Dict = None, label_cols : Union ] = None, dummy_labels : bool = False, prefetch : bool = True, ): """Create a tf.data.Dataset from the underlying Dataset. TensorSpec ( shape = shape, dtype = tensor. shape else : # If this doesn't look like LM labels that got added by the collate_fn, let's not say anything # about the dimensions we're unsure of shape = + ] else : # If this doesn't look like LM labels that got added by the collate_fn, let's not say anything # about the dimensions we're unsure of shape = + ] signatures = tf. startswith ( "label" ): if "input_ids" in signatures and test_batch. items (): if column in signatures : continue if column. TensorSpec ( shape = shape, dtype = dtype ) # Catching columns added by the collate_fn, such as MLM labels for column, tensor in test_batch. " "If you're getting this error with one of our datasets, and you're " "sure the column should be convertable to tf.Tensor, please " "file an issue at /huggingface/datasets and tag " ) shape = + shape shape = signatures = tf. features = ( ! " "This may indicate a column was included with an unusual datatype " "that we were unable to process correctly. major = 0 : PYARROW_V0 = True else : PYARROW_V0 = False class LazyDict ( UserDict ): def _init_ ( self, data, features = None, decoding = True ): self. dataset_dict import DatasetDict logger = logging. utils.typing import PathLike if TYPE_CHECKING : from. utils.file_utils import estimate_dataset_size from. table import ( ConcatenationTable, InMemoryTable, MemoryMappedTable, Table, cast_with_sliced_list_support, concat_tables, list_table_cache_files, ) from. formatting import format_table, get_format_type_from_alias, get_formatter, query_table from. fingerprint import ( fingerprint_transform, generate_fingerprint, generate_random_fingerprint, get_temporary_cache_files_directory, is_caching_enabled, maybe_register_dataset_for_temp_dir_deletion, update_fingerprint, ) from. filesystems import extract_path_from_uri, is_remote_filesystem from. features import ClassLabel, Features, Sequence, Value, _ArrayXD from. arrow_writer import ArrowWriter, OptimizedTypedSequence from. # Lint as: python3 """ Simple Dataset wrapping an Arrow Table.""" import contextlib import copy import json import os import shutil import tempfile import weakref from collections import Counter, UserDict from collections.abc import Iterable, Mapping from copy import deepcopy from dataclasses import asdict from functools import partial, wraps from math import ceil, floor from pathlib import Path from typing import TYPE_CHECKING, Any, BinaryIO, Callable, Dict, Iterator, List, Optional, Tuple, Union import fsspec import numpy as np import pandas as pd import pyarrow as pa import pute as pc from multiprocess import Pool, RLock from to import tqdm from _classification import TextClassification from. # See the License for the specific language governing permissions and # limitations under the License. # You may obtain a copy of the License at # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # Licensed under the Apache License, Version 2.0 (the "License") # you may not use this file except in compliance with the License. # coding=utf-8 # Copyright 2020 The HuggingFace Authors.
0 Comments
Leave a Reply. |
AuthorWrite something about yourself. No need to be fancy, just an overview. ArchivesCategories |