from __future__ import annotations

import pickle
import re
import sys
import warnings
from copy import copy, deepcopy
from io import StringIO
from textwrap import dedent
from typing import Any, Hashable

import numpy as np
import pandas as pd
import pytest
from pandas.core.indexes.datetimes import DatetimeIndex

import xarray as xr
from xarray import (
    DataArray,
    Dataset,
    IndexVariable,
    MergeError,
    Variable,
    align,
    backends,
    broadcast,
    open_dataset,
    set_options,
)
from xarray.coding.cftimeindex import CFTimeIndex
from xarray.core import dtypes, indexing, utils
from xarray.core.common import duck_array_ops, full_like
from xarray.core.coordinates import DatasetCoordinates
from xarray.core.indexes import Index, PandasIndex
from xarray.core.pycompat import array_type, integer_types
from xarray.core.utils import is_scalar
from xarray.tests import (
    InaccessibleArray,
    UnexpectedDataAccess,
    assert_allclose,
    assert_array_equal,
    assert_equal,
    assert_identical,
    assert_no_warnings,
    create_test_data,
    has_cftime,
    has_dask,
    requires_bottleneck,
    requires_cftime,
    requires_cupy,
    requires_dask,
    requires_numexpr,
    requires_pint,
    requires_scipy,
    requires_sparse,
    source_ndarray,
)

try:
    from pandas.errors import UndefinedVariableError
except ImportError:
    # TODO: remove once we stop supporting pandas<1.4.3
    from pandas.core.computation.ops import UndefinedVariableError


try:
    import dask.array as da
except ImportError:
    pass

sparse_array_type = array_type("sparse")

pytestmark = [
    pytest.mark.filterwarnings("error:Mean of empty slice"),
    pytest.mark.filterwarnings("error:All-NaN (slice|axis) encountered"),
]


def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]:
    rs = np.random.RandomState(seed)

    lat = [2, 1, 0]
    lon = [0, 1, 2]
    nt1 = 3
    nt2 = 2
    time1 = pd.date_range("2000-01-01", periods=nt1)
    time2 = pd.date_range("2000-02-01", periods=nt2)
    string_var = np.array(["ae", "bc", "df"], dtype=object)
    string_var_to_append = np.array(["asdf", "asdfg"], dtype=object)
    string_var_fixed_length = np.array(["aa", "bb", "cc"], dtype="|S2")
    string_var_fixed_length_to_append = np.array(["dd", "ee"], dtype="|S2")
    unicode_var = ["áó", "áó", "áó"]
    datetime_var = np.array(
        ["2019-01-01", "2019-01-02", "2019-01-03"], dtype="datetime64[s]"
    )
    datetime_var_to_append = np.array(
        ["2019-01-04", "2019-01-05"], dtype="datetime64[s]"
    )
    bool_var = np.array([True, False, True], dtype=bool)
    bool_var_to_append = np.array([False, True], dtype=bool)

    ds = xr.Dataset(
        data_vars={
            "da": xr.DataArray(
                rs.rand(3, 3, nt1),
                coords=[lat, lon, time1],
                dims=["lat", "lon", "time"],
            ),
            "string_var": xr.DataArray(string_var, coords=[time1], dims=["time"]),
            "string_var_fixed_length": xr.DataArray(
                string_var_fixed_length, coords=[time1], dims=["time"]
            ),
            "unicode_var": xr.DataArray(
                unicode_var, coords=[time1], dims=["time"]
            ).astype(np.unicode_),
            "datetime_var": xr.DataArray(datetime_var, coords=[time1], dims=["time"]),
            "bool_var": xr.DataArray(bool_var, coords=[time1], dims=["time"]),
        }
    )

    ds_to_append = xr.Dataset(
        data_vars={
            "da": xr.DataArray(
                rs.rand(3, 3, nt2),
                coords=[lat, lon, time2],
                dims=["lat", "lon", "time"],
            ),
            "string_var": xr.DataArray(
                string_var_to_append, coords=[time2], dims=["time"]
            ),
            "string_var_fixed_length": xr.DataArray(
                string_var_fixed_length_to_append, coords=[time2], dims=["time"]
            ),
            "unicode_var": xr.DataArray(
                unicode_var[:nt2], coords=[time2], dims=["time"]
            ).astype(np.unicode_),
            "datetime_var": xr.DataArray(
                datetime_var_to_append, coords=[time2], dims=["time"]
            ),
            "bool_var": xr.DataArray(bool_var_to_append, coords=[time2], dims=["time"]),
        }
    )

    ds_with_new_var = xr.Dataset(
        data_vars={
            "new_var": xr.DataArray(
                rs.rand(3, 3, nt1 + nt2),
                coords=[lat, lon, time1.append(time2)],
                dims=["lat", "lon", "time"],
            )
        }
    )

    assert all(objp.data.flags.writeable for objp in ds.variables.values())
    assert all(objp.data.flags.writeable for objp in ds_to_append.variables.values())
    return ds, ds_to_append, ds_with_new_var


def create_append_string_length_mismatch_test_data(dtype) -> tuple[Dataset, Dataset]:
    def make_datasets(data, data_to_append) -> tuple[Dataset, Dataset]:
        ds = xr.Dataset(
            {"temperature": (["time"], data)},
            coords={"time": [0, 1, 2]},
        )
        ds_to_append = xr.Dataset(
            {"temperature": (["time"], data_to_append)}, coords={"time": [0, 1, 2]}
        )
        assert all(objp.data.flags.writeable for objp in ds.variables.values())
        assert all(
            objp.data.flags.writeable for objp in ds_to_append.variables.values()
        )
        return ds, ds_to_append

    u2_strings = ["ab", "cd", "ef"]
    u5_strings = ["abc", "def", "ghijk"]

    s2_strings = np.array(["aa", "bb", "cc"], dtype="|S2")
    s3_strings = np.array(["aaa", "bbb", "ccc"], dtype="|S3")

    if dtype == "U":
        return make_datasets(u2_strings, u5_strings)
    elif dtype == "S":
        return make_datasets(s2_strings, s3_strings)
    else:
        raise ValueError(f"unsupported dtype {dtype}.")


def create_test_multiindex() -> Dataset:
    mindex = pd.MultiIndex.from_product(
        [["a", "b"], [1, 2]], names=("level_1", "level_2")
    )
    return Dataset({}, {"x": mindex})


def create_test_stacked_array() -> tuple[DataArray, DataArray]:
    x = DataArray(pd.Index(np.r_[:10], name="x"))
    y = DataArray(pd.Index(np.r_[:20], name="y"))
    a = x * y
    b = x * y * y
    return a, b


class InaccessibleVariableDataStore(backends.InMemoryDataStore):
    def __init__(self):
        super().__init__()
        self._indexvars = set()

    def store(self, variables, *args, **kwargs) -> None:
        super().store(variables, *args, **kwargs)
        for k, v in variables.items():
            if isinstance(v, IndexVariable):
                self._indexvars.add(k)

    def get_variables(self):
        def lazy_inaccessible(k, v):
            if k in self._indexvars:
                return v
            data = indexing.LazilyIndexedArray(InaccessibleArray(v.values))
            return Variable(v.dims, data, v.attrs)

        return {k: lazy_inaccessible(k, v) for k, v in self._variables.items()}


class TestDataset:
    def test_repr(self) -> None:
        data = create_test_data(seed=123)
        data.attrs["foo"] = "bar"
        # need to insert str dtype at runtime to handle different endianness
        expected = dedent(
            """\
            <xarray.Dataset>
            Dimensions:  (dim2: 9, dim3: 10, time: 20, dim1: 8)
            Coordinates:
              * dim2     (dim2) float64 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0
              * dim3     (dim3) %s 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j'
              * time     (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-20
                numbers  (dim3) int64 0 1 2 0 0 1 1 2 2 3
            Dimensions without coordinates: dim1
            Data variables:
                var1     (dim1, dim2) float64 -1.086 0.9973 0.283 ... 0.1995 0.4684 -0.8312
                var2     (dim1, dim2) float64 1.162 -1.097 -2.123 ... 0.1302 1.267 0.3328
                var3     (dim3, dim1) float64 0.5565 -0.2121 0.4563 ... -0.2452 -0.3616
            Attributes:
                foo:      bar"""
            % data["dim3"].dtype
        )
        actual = "\n".join(x.rstrip() for x in repr(data).split("\n"))
        print(actual)
        assert expected == actual

        with set_options(display_width=100):
            max_len = max(map(len, repr(data).split("\n")))
            assert 90 < max_len < 100

        expected = dedent(
            """\
            <xarray.Dataset>
            Dimensions:  ()
            Data variables:
                *empty*"""
        )
        actual = "\n".join(x.rstrip() for x in repr(Dataset()).split("\n"))
        print(actual)
        assert expected == actual

        # verify that ... doesn't appear for scalar coordinates
        data = Dataset({"foo": ("x", np.ones(10))}).mean()
        expected = dedent(
            """\
            <xarray.Dataset>
            Dimensions:  ()
            Data variables:
                foo      float64 1.0"""
        )
        actual = "\n".join(x.rstrip() for x in repr(data).split("\n"))
        print(actual)
        assert expected == actual

        # verify long attributes are truncated
        data = Dataset(attrs={"foo": "bar" * 1000})
        assert len(repr(data)) < 1000

    def test_repr_multiindex(self) -> None:
        data = create_test_multiindex()
        expected = dedent(
            """\
            <xarray.Dataset>
            Dimensions:  (x: 4)
            Coordinates:
              * x        (x) object MultiIndex
              * level_1  (x) object 'a' 'a' 'b' 'b'
              * level_2  (x) int64 1 2 1 2
            Data variables:
                *empty*"""
        )
        actual = "\n".join(x.rstrip() for x in repr(data).split("\n"))
        print(actual)
        assert expected == actual

        # verify that long level names are not truncated
        mindex = pd.MultiIndex.from_product(
            [["a", "b"], [1, 2]], names=("a_quite_long_level_name", "level_2")
        )
        data = Dataset({}, {"x": mindex})
        expected = dedent(
            """\
            <xarray.Dataset>
            Dimensions:                  (x: 4)
            Coordinates:
              * x                        (x) object MultiIndex
              * a_quite_long_level_name  (x) object 'a' 'a' 'b' 'b'
              * level_2                  (x) int64 1 2 1 2
            Data variables:
                *empty*"""
        )
        actual = "\n".join(x.rstrip() for x in repr(data).split("\n"))
        print(actual)
        assert expected == actual

    def test_repr_period_index(self) -> None:
        data = create_test_data(seed=456)
        data.coords["time"] = pd.period_range("2000-01-01", periods=20, freq="B")

        # check that creating the repr doesn't raise an error #GH645
        repr(data)

    def test_unicode_data(self) -> None:
        # regression test for GH834
        data = Dataset({"foø": ["ba®"]}, attrs={"å": "∑"})
        repr(data)  # should not raise

        byteorder = "<" if sys.byteorder == "little" else ">"
        expected = dedent(
            """\
            <xarray.Dataset>
            Dimensions:  (foø: 1)
            Coordinates:
              * foø      (foø) %cU3 %r
            Data variables:
                *empty*
            Attributes:
                å:        ∑"""
            % (byteorder, "ba®")
        )
        actual = str(data)
        assert expected == actual

    def test_repr_nep18(self) -> None:
        class Array:
            def __init__(self):
                self.shape = (2,)
                self.dtype = np.dtype(np.float64)

            def __array_function__(self, *args, **kwargs):
                pass

            def __repr__(self):
                return "Custom\nArray"

        dataset = Dataset({"foo": ("x", Array())})
        expected = dedent(
            """\
            <xarray.Dataset>
            Dimensions:  (x: 2)
            Dimensions without coordinates: x
            Data variables:
                foo      (x) float64 Custom Array"""
        )
        assert expected == repr(dataset)

    def test_info(self) -> None:
        ds = create_test_data(seed=123)
        ds = ds.drop_vars("dim3")  # string type prints differently in PY2 vs PY3
        ds.attrs["unicode_attr"] = "ba®"
        ds.attrs["string_attr"] = "bar"

        buf = StringIO()
        ds.info(buf=buf)

        expected = dedent(
            """\
        xarray.Dataset {
        dimensions:
        \tdim2 = 9 ;
        \ttime = 20 ;
        \tdim1 = 8 ;
        \tdim3 = 10 ;

        variables:
        \tfloat64 dim2(dim2) ;
        \tdatetime64[ns] time(time) ;
        \tfloat64 var1(dim1, dim2) ;
        \t\tvar1:foo = variable ;
        \tfloat64 var2(dim1, dim2) ;
        \t\tvar2:foo = variable ;
        \tfloat64 var3(dim3, dim1) ;
        \t\tvar3:foo = variable ;
        \tint64 numbers(dim3) ;

        // global attributes:
        \t:unicode_attr = ba® ;
        \t:string_attr = bar ;
        }"""
        )
        actual = buf.getvalue()
        assert expected == actual
        buf.close()

    def test_constructor(self) -> None:
        x1 = ("x", 2 * np.arange(100))
        x2 = ("x", np.arange(1000))
        z = (["x", "y"], np.arange(1000).reshape(100, 10))

        with pytest.raises(ValueError, match=r"conflicting sizes"):
            Dataset({"a": x1, "b": x2})
        with pytest.raises(ValueError, match=r"disallows such variables"):
            Dataset({"a": x1, "x": z})
        with pytest.raises(TypeError, match=r"tuple of form"):
            Dataset({"x": (1, 2, 3, 4, 5, 6, 7)})
        with pytest.raises(ValueError, match=r"already exists as a scalar"):
            Dataset({"x": 0, "y": ("x", [1, 2, 3])})

        # verify handling of DataArrays
        expected = Dataset({"x": x1, "z": z})
        actual = Dataset({"z": expected["z"]})
        assert_identical(expected, actual)

    def test_constructor_invalid_dims(self) -> None:
        # regression for GH1120
        with pytest.raises(MergeError):
            Dataset(
                data_vars=dict(v=("y", [1, 2, 3, 4])),
                coords=dict(y=DataArray([0.1, 0.2, 0.3, 0.4], dims="x")),
            )

    def test_constructor_1d(self) -> None:
        expected = Dataset({"x": (["x"], 5.0 + np.arange(5))})
        actual = Dataset({"x": 5.0 + np.arange(5)})
        assert_identical(expected, actual)

        actual = Dataset({"x": [5, 6, 7, 8, 9]})
        assert_identical(expected, actual)

    def test_constructor_0d(self) -> None:
        expected = Dataset({"x": ([], 1)})
        for arg in [1, np.array(1), expected["x"]]:
            actual = Dataset({"x": arg})
            assert_identical(expected, actual)

        class Arbitrary:
            pass

        d = pd.Timestamp("2000-01-01T12")
        args = [
            True,
            None,
            3.4,
            np.nan,
            "hello",
            b"raw",
            np.datetime64("2000-01-01"),
            d,
            d.to_pydatetime(),
            Arbitrary(),
        ]
        for arg in args:
            print(arg)
            expected = Dataset({"x": ([], arg)})
            actual = Dataset({"x": arg})
            assert_identical(expected, actual)

    def test_constructor_auto_align(self) -> None:
        a = DataArray([1, 2], [("x", [0, 1])])
        b = DataArray([3, 4], [("x", [1, 2])])

        # verify align uses outer join
        expected = Dataset(
            {"a": ("x", [1, 2, np.nan]), "b": ("x", [np.nan, 3, 4])}, {"x": [0, 1, 2]}
        )
        actual = Dataset({"a": a, "b": b})
        assert_identical(expected, actual)

        # regression test for GH346
        assert isinstance(actual.variables["x"], IndexVariable)

        # variable with different dimensions
        c = ("y", [3, 4])
        expected2 = expected.merge({"c": c})
        actual = Dataset({"a": a, "b": b, "c": c})
        assert_identical(expected2, actual)

        # variable that is only aligned against the aligned variables
        d = ("x", [3, 2, 1])
        expected3 = expected.merge({"d": d})
        actual = Dataset({"a": a, "b": b, "d": d})
        assert_identical(expected3, actual)

        e = ("x", [0, 0])
        with pytest.raises(ValueError, match=r"conflicting sizes"):
            Dataset({"a": a, "b": b, "e": e})

    def test_constructor_pandas_sequence(self) -> None:

        ds = self.make_example_math_dataset()
        pandas_objs = {
            var_name: ds[var_name].to_pandas() for var_name in ["foo", "bar"]
        }
        ds_based_on_pandas = Dataset(pandas_objs, ds.coords, attrs=ds.attrs)
        del ds_based_on_pandas["x"]
        assert_equal(ds, ds_based_on_pandas)

        # reindex pandas obj, check align works
        rearranged_index = reversed(pandas_objs["foo"].index)
        pandas_objs["foo"] = pandas_objs["foo"].reindex(rearranged_index)
        ds_based_on_pandas = Dataset(pandas_objs, ds.coords, attrs=ds.attrs)
        del ds_based_on_pandas["x"]
        assert_equal(ds, ds_based_on_pandas)

    def test_constructor_pandas_single(self) -> None:

        das = [
            DataArray(np.random.rand(4), dims=["a"]),  # series
            DataArray(np.random.rand(4, 3), dims=["a", "b"]),  # df
        ]

        for a in das:
            pandas_obj = a.to_pandas()
            ds_based_on_pandas = Dataset(pandas_obj)  # type: ignore  # TODO: improve typing of __init__
            for dim in ds_based_on_pandas.data_vars:
                assert_array_equal(ds_based_on_pandas[dim], pandas_obj[dim])

    def test_constructor_compat(self) -> None:
        data = {"x": DataArray(0, coords={"y": 1}), "y": ("z", [1, 1, 1])}
        expected = Dataset({"x": 0}, {"y": ("z", [1, 1, 1])})
        actual = Dataset(data)
        assert_identical(expected, actual)

        data = {"y": ("z", [1, 1, 1]), "x": DataArray(0, coords={"y": 1})}
        actual = Dataset(data)
        assert_identical(expected, actual)

        original = Dataset(
            {"a": (("x", "y"), np.ones((2, 3)))},
            {"c": (("x", "y"), np.zeros((2, 3))), "x": [0, 1]},
        )
        expected = Dataset(
            {"a": ("x", np.ones(2)), "b": ("y", np.ones(3))},
            {"c": (("x", "y"), np.zeros((2, 3))), "x": [0, 1]},
        )

        actual = Dataset(
            {"a": original["a"][:, 0], "b": original["a"][0].drop_vars("x")}
        )
        assert_identical(expected, actual)

        data = {"x": DataArray(0, coords={"y": 3}), "y": ("z", [1, 1, 1])}
        with pytest.raises(MergeError):
            Dataset(data)

        data = {"x": DataArray(0, coords={"y": 1}), "y": [1, 1]}
        actual = Dataset(data)
        expected = Dataset({"x": 0}, {"y": [1, 1]})
        assert_identical(expected, actual)

    def test_constructor_with_coords(self) -> None:
        with pytest.raises(ValueError, match=r"found in both data_vars and"):
            Dataset({"a": ("x", [1])}, {"a": ("x", [1])})

        ds = Dataset({}, {"a": ("x", [1])})
        assert not ds.data_vars
        assert list(ds.coords.keys()) == ["a"]

        mindex = pd.MultiIndex.from_product(
            [["a", "b"], [1, 2]], names=("level_1", "level_2")
        )
        with pytest.raises(ValueError, match=r"conflicting MultiIndex"):
            Dataset({}, {"x": mindex, "y": mindex})
            Dataset({}, {"x": mindex, "level_1": range(4)})

    def test_properties(self) -> None:
        ds = create_test_data()

        # dims / sizes
        # These exact types aren't public API, but this makes sure we don't
        # change them inadvertently:
        assert isinstance(ds.dims, utils.Frozen)
        assert isinstance(ds.dims.mapping, dict)
        assert type(ds.dims.mapping) is dict
        assert ds.dims == {"dim1": 8, "dim2": 9, "dim3": 10, "time": 20}
        assert ds.sizes == ds.dims

        # dtypes
        assert isinstance(ds.dtypes, utils.Frozen)
        assert isinstance(ds.dtypes.mapping, dict)
        assert ds.dtypes == {
            "var1": np.dtype("float64"),
            "var2": np.dtype("float64"),
            "var3": np.dtype("float64"),
        }

        # data_vars
        assert list(ds) == list(ds.data_vars)
        assert list(ds.keys()) == list(ds.data_vars)
        assert "aasldfjalskdfj" not in ds.variables
        assert "dim1" in repr(ds.variables)
        assert len(ds) == 3
        assert bool(ds)

        assert list(ds.data_vars) == ["var1", "var2", "var3"]
        assert list(ds.data_vars.keys()) == ["var1", "var2", "var3"]
        assert "var1" in ds.data_vars
        assert "dim1" not in ds.data_vars
        assert "numbers" not in ds.data_vars
        assert len(ds.data_vars) == 3

        # xindexes
        assert set(ds.xindexes) == {"dim2", "dim3", "time"}
        assert len(ds.xindexes) == 3
        assert "dim2" in repr(ds.xindexes)
        assert all([isinstance(idx, Index) for idx in ds.xindexes.values()])

        # indexes
        assert set(ds.indexes) == {"dim2", "dim3", "time"}
        assert len(ds.indexes) == 3
        assert "dim2" in repr(ds.indexes)
        assert all([isinstance(idx, pd.Index) for idx in ds.indexes.values()])

        # coords
        assert list(ds.coords) == ["dim2", "dim3", "time", "numbers"]
        assert "dim2" in ds.coords
        assert "numbers" in ds.coords
        assert "var1" not in ds.coords
        assert "dim1" not in ds.coords
        assert len(ds.coords) == 4

        # nbytes
        assert (
            Dataset({"x": np.int64(1), "y": np.array([1, 2], dtype=np.float32)}).nbytes
            == 16
        )

    def test_asarray(self) -> None:
        ds = Dataset({"x": 0})
        with pytest.raises(TypeError, match=r"cannot directly convert"):
            np.asarray(ds)

    def test_get_index(self) -> None:
        ds = Dataset({"foo": (("x", "y"), np.zeros((2, 3)))}, coords={"x": ["a", "b"]})
        assert ds.get_index("x").equals(pd.Index(["a", "b"]))
        assert ds.get_index("y").equals(pd.Index([0, 1, 2]))
        with pytest.raises(KeyError):
            ds.get_index("z")

    def test_attr_access(self) -> None:
        ds = Dataset(
            {"tmin": ("x", [42], {"units": "Celsius"})}, attrs={"title": "My test data"}
        )
        assert_identical(ds.tmin, ds["tmin"])
        assert_identical(ds.tmin.x, ds.x)

        assert ds.title == ds.attrs["title"]
        assert ds.tmin.units == ds["tmin"].attrs["units"]

        assert {"tmin", "title"} <= set(dir(ds))
        assert "units" in set(dir(ds.tmin))

        # should defer to variable of same name
        ds.attrs["tmin"] = -999
        assert ds.attrs["tmin"] == -999
        assert_identical(ds.tmin, ds["tmin"])

    def test_variable(self) -> None:
        a = Dataset()
        d = np.random.random((10, 3))
        a["foo"] = (("time", "x"), d)
        assert "foo" in a.variables
        assert "foo" in a
        a["bar"] = (("time", "x"), d)
        # order of creation is preserved
        assert list(a.variables) == ["foo", "bar"]
        assert_array_equal(a["foo"].values, d)
        # try to add variable with dim (10,3) with data that's (3,10)
        with pytest.raises(ValueError):
            a["qux"] = (("time", "x"), d.T)

    def test_modify_inplace(self) -> None:
        a = Dataset()
        vec = np.random.random((10,))
        attributes = {"foo": "bar"}
        a["x"] = ("x", vec, attributes)
        assert "x" in a.coords
        assert isinstance(a.coords["x"].to_index(), pd.Index)
        assert_identical(a.coords["x"].variable, a.variables["x"])
        b = Dataset()
        b["x"] = ("x", vec, attributes)
        assert_identical(a["x"], b["x"])
        assert a.dims == b.dims
        # this should work
        a["x"] = ("x", vec[:5])
        a["z"] = ("x", np.arange(5))
        with pytest.raises(ValueError):
            # now it shouldn't, since there is a conflicting length
            a["x"] = ("x", vec[:4])
        arr = np.random.random((10, 1))
        scal = np.array(0)
        with pytest.raises(ValueError):
            a["y"] = ("y", arr)
        with pytest.raises(ValueError):
            a["y"] = ("y", scal)
        assert "y" not in a.dims

    def test_coords_properties(self) -> None:
        # use int64 for repr consistency on windows
        data = Dataset(
            {
                "x": ("x", np.array([-1, -2], "int64")),
                "y": ("y", np.array([0, 1, 2], "int64")),
                "foo": (["x", "y"], np.random.randn(2, 3)),
            },
            {"a": ("x", np.array([4, 5], "int64")), "b": np.int64(-10)},
        )

        coords = data.coords
        assert isinstance(coords, DatasetCoordinates)

        # len
        assert len(coords) == 4

        # iter
        assert list(coords) == ["x", "y", "a", "b"]

        assert_identical(coords["x"].variable, data["x"].variable)
        assert_identical(coords["y"].variable, data["y"].variable)

        assert "x" in coords
        assert "a" in coords
        assert 0 not in coords
        assert "foo" not in coords

        with pytest.raises(KeyError):
            coords["foo"]
        with pytest.raises(KeyError):
            coords[0]

        # repr
        expected = dedent(
            """\
        Coordinates:
          * x        (x) int64 -1 -2
          * y        (y) int64 0 1 2
            a        (x) int64 4 5
            b        int64 -10"""
        )
        actual = repr(coords)
        assert expected == actual

        # dims
        assert coords.dims == {"x": 2, "y": 3}

        # dtypes
        assert coords.dtypes == {
            "x": np.dtype("int64"),
            "y": np.dtype("int64"),
            "a": np.dtype("int64"),
            "b": np.dtype("int64"),
        }

    def test_coords_modify(self) -> None:
        data = Dataset(
            {
                "x": ("x", [-1, -2]),
                "y": ("y", [0, 1, 2]),
                "foo": (["x", "y"], np.random.randn(2, 3)),
            },
            {"a": ("x", [4, 5]), "b": -10},
        )

        actual = data.copy(deep=True)
        actual.coords["x"] = ("x", ["a", "b"])
        assert_array_equal(actual["x"], ["a", "b"])

        actual = data.copy(deep=True)
        actual.coords["z"] = ("z", ["a", "b"])
        assert_array_equal(actual["z"], ["a", "b"])

        actual = data.copy(deep=True)
        with pytest.raises(ValueError, match=r"conflicting sizes"):
            actual.coords["x"] = ("x", [-1])
        assert_identical(actual, data)  # should not be modified

        actual = data.copy()
        del actual.coords["b"]
        expected = data.reset_coords("b", drop=True)
        assert_identical(expected, actual)

        with pytest.raises(KeyError):
            del data.coords["not_found"]

        with pytest.raises(KeyError):
            del data.coords["foo"]

        actual = data.copy(deep=True)
        actual.coords.update({"c": 11})
        expected = data.merge({"c": 11}).set_coords("c")
        assert_identical(expected, actual)

        # regression test for GH3746
        del actual.coords["x"]
        assert "x" not in actual.xindexes

    def test_update_index(self) -> None:
        actual = Dataset(coords={"x": [1, 2, 3]})
        actual["x"] = ["a", "b", "c"]
        assert actual.xindexes["x"].to_pandas_index().equals(pd.Index(["a", "b", "c"]))

    def test_coords_setitem_with_new_dimension(self) -> None:
        actual = Dataset()
        actual.coords["foo"] = ("x", [1, 2, 3])
        expected = Dataset(coords={"foo": ("x", [1, 2, 3])})
        assert_identical(expected, actual)

    def test_coords_setitem_multiindex(self) -> None:
        data = create_test_multiindex()
        with pytest.raises(
            ValueError, match=r"cannot set or update variable.*corrupt.*index "
        ):
            data.coords["level_1"] = range(4)

    def test_coords_set(self) -> None:
        one_coord = Dataset({"x": ("x", [0]), "yy": ("x", [1]), "zzz": ("x", [2])})
        two_coords = Dataset({"zzz": ("x", [2])}, {"x": ("x", [0]), "yy": ("x", [1])})
        all_coords = Dataset(
            coords={"x": ("x", [0]), "yy": ("x", [1]), "zzz": ("x", [2])}
        )

        actual = one_coord.set_coords("x")
        assert_identical(one_coord, actual)
        actual = one_coord.set_coords(["x"])
        assert_identical(one_coord, actual)

        actual = one_coord.set_coords("yy")
        assert_identical(two_coords, actual)

        actual = one_coord.set_coords(["yy", "zzz"])
        assert_identical(all_coords, actual)

        actual = one_coord.reset_coords()
        assert_identical(one_coord, actual)
        actual = two_coords.reset_coords()
        assert_identical(one_coord, actual)
        actual = all_coords.reset_coords()
        assert_identical(one_coord, actual)

        actual = all_coords.reset_coords(["yy", "zzz"])
        assert_identical(one_coord, actual)
        actual = all_coords.reset_coords("zzz")
        assert_identical(two_coords, actual)

        with pytest.raises(ValueError, match=r"cannot remove index"):
            one_coord.reset_coords("x")

        actual = all_coords.reset_coords("zzz", drop=True)
        expected = all_coords.drop_vars("zzz")
        assert_identical(expected, actual)
        expected = two_coords.drop_vars("zzz")
        assert_identical(expected, actual)

    def test_coords_to_dataset(self) -> None:
        orig = Dataset({"foo": ("y", [-1, 0, 1])}, {"x": 10, "y": [2, 3, 4]})
        expected = Dataset(coords={"x": 10, "y": [2, 3, 4]})
        actual = orig.coords.to_dataset()
        assert_identical(expected, actual)

    def test_coords_merge(self) -> None:
        orig_coords = Dataset(coords={"a": ("x", [1, 2]), "x": [0, 1]}).coords
        other_coords = Dataset(coords={"b": ("x", ["a", "b"]), "x": [0, 1]}).coords
        expected = Dataset(
            coords={"a": ("x", [1, 2]), "b": ("x", ["a", "b"]), "x": [0, 1]}
        )
        actual = orig_coords.merge(other_coords)
        assert_identical(expected, actual)
        actual = other_coords.merge(orig_coords)
        assert_identical(expected, actual)

        other_coords = Dataset(coords={"x": ("x", ["a"])}).coords
        with pytest.raises(MergeError):
            orig_coords.merge(other_coords)
        other_coords = Dataset(coords={"x": ("x", ["a", "b"])}).coords
        with pytest.raises(MergeError):
            orig_coords.merge(other_coords)
        other_coords = Dataset(coords={"x": ("x", ["a", "b", "c"])}).coords
        with pytest.raises(MergeError):
            orig_coords.merge(other_coords)

        other_coords = Dataset(coords={"a": ("x", [8, 9])}).coords
        expected = Dataset(coords={"x": range(2)})
        actual = orig_coords.merge(other_coords)
        assert_identical(expected, actual)
        actual = other_coords.merge(orig_coords)
        assert_identical(expected, actual)

        other_coords = Dataset(coords={"x": np.nan}).coords
        actual = orig_coords.merge(other_coords)
        assert_identical(orig_coords.to_dataset(), actual)
        actual = other_coords.merge(orig_coords)
        assert_identical(orig_coords.to_dataset(), actual)

    def test_coords_merge_mismatched_shape(self) -> None:
        orig_coords = Dataset(coords={"a": ("x", [1, 1])}).coords
        other_coords = Dataset(coords={"a": 1}).coords
        expected = orig_coords.to_dataset()
        actual = orig_coords.merge(other_coords)
        assert_identical(expected, actual)

        other_coords = Dataset(coords={"a": ("y", [1])}).coords
        expected = Dataset(coords={"a": (["x", "y"], [[1], [1]])})
        actual = orig_coords.merge(other_coords)
        assert_identical(expected, actual)

        actual = other_coords.merge(orig_coords)
        assert_identical(expected.transpose(), actual)

        orig_coords = Dataset(coords={"a": ("x", [np.nan])}).coords
        other_coords = Dataset(coords={"a": np.nan}).coords
        expected = orig_coords.to_dataset()
        actual = orig_coords.merge(other_coords)
        assert_identical(expected, actual)

    def test_data_vars_properties(self) -> None:
        ds = Dataset()
        ds["foo"] = (("x",), [1.0])
        ds["bar"] = 2.0

        # iter
        assert set(ds.data_vars) == {"foo", "bar"}
        assert "foo" in ds.data_vars
        assert "x" not in ds.data_vars
        assert_identical(ds["foo"], ds.data_vars["foo"])

        # repr
        expected = dedent(
            """\
        Data variables:
            foo      (x) float64 1.0
            bar      float64 2.0"""
        )
        actual = repr(ds.data_vars)
        assert expected == actual

        # dtypes
        assert ds.data_vars.dtypes == {
            "foo": np.dtype("float64"),
            "bar": np.dtype("float64"),
        }

    def test_equals_and_identical(self) -> None:
        data = create_test_data(seed=42)
        assert data.equals(data)
        assert data.identical(data)

        data2 = create_test_data(seed=42)
        data2.attrs["foobar"] = "baz"
        assert data.equals(data2)
        assert not data.identical(data2)

        del data2["time"]
        assert not data.equals(data2)

        data = create_test_data(seed=42).rename({"var1": None})
        assert data.equals(data)
        assert data.identical(data)

        data2 = data.reset_coords()
        assert not data2.equals(data)
        assert not data2.identical(data)

    def test_equals_failures(self) -> None:
        data = create_test_data()
        assert not data.equals("foo")  # type: ignore[arg-type]
        assert not data.identical(123)  # type: ignore[arg-type]
        assert not data.broadcast_equals({1: 2})  # type: ignore[arg-type]

    def test_broadcast_equals(self) -> None:
        data1 = Dataset(coords={"x": 0})
        data2 = Dataset(coords={"x": [0]})
        assert data1.broadcast_equals(data2)
        assert not data1.equals(data2)
        assert not data1.identical(data2)

    def test_attrs(self) -> None:
        data = create_test_data(seed=42)
        data.attrs = {"foobar": "baz"}
        assert data.attrs["foobar"], "baz"
        assert isinstance(data.attrs, dict)

    def test_chunks_does_not_load_data(self) -> None:
        # regression test for GH6538
        store = InaccessibleVariableDataStore()
        create_test_data().dump_to_store(store)
        ds = open_dataset(store)
        assert ds.chunks == {}

    @requires_dask
    def test_chunk(self) -> None:
        data = create_test_data()
        for v in data.variables.values():
            assert isinstance(v.data, np.ndarray)
        assert data.chunks == {}

        reblocked = data.chunk()
        for k, v in reblocked.variables.items():
            if k in reblocked.dims:
                assert isinstance(v.data, np.ndarray)
            else:
                assert isinstance(v.data, da.Array)

        expected_chunks: dict[Hashable, tuple[int, ...]] = {
            "dim1": (8,),
            "dim2": (9,),
            "dim3": (10,),
        }
        assert reblocked.chunks == expected_chunks

        # test kwargs form of chunks
        assert data.chunk(expected_chunks).chunks == expected_chunks

        def get_dask_names(ds):
            return {k: v.data.name for k, v in ds.items()}

        orig_dask_names = get_dask_names(reblocked)

        reblocked = data.chunk({"time": 5, "dim1": 5, "dim2": 5, "dim3": 5})
        # time is not a dim in any of the data_vars, so it
        # doesn't get chunked
        expected_chunks = {"dim1": (5, 3), "dim2": (5, 4), "dim3": (5, 5)}
        assert reblocked.chunks == expected_chunks

        # make sure dask names change when rechunking by different amounts
        # regression test for GH3350
        new_dask_names = get_dask_names(reblocked)
        for k, v in new_dask_names.items():
            assert v != orig_dask_names[k]

        reblocked = data.chunk(expected_chunks)
        assert reblocked.chunks == expected_chunks

        # reblock on already blocked data
        orig_dask_names = get_dask_names(reblocked)
        reblocked = reblocked.chunk(expected_chunks)
        new_dask_names = get_dask_names(reblocked)
        assert reblocked.chunks == expected_chunks
        assert_identical(reblocked, data)
        # rechunking with same chunk sizes should not change names
        for k, v in new_dask_names.items():
            assert v == orig_dask_names[k]

        with pytest.raises(ValueError, match=r"some chunks"):
            data.chunk({"foo": 10})

    @requires_dask
    def test_dask_is_lazy(self) -> None:
        store = InaccessibleVariableDataStore()
        create_test_data().dump_to_store(store)
        ds = open_dataset(store).chunk()

        with pytest.raises(UnexpectedDataAccess):
            ds.load()
        with pytest.raises(UnexpectedDataAccess):
            ds["var1"].values

        # these should not raise UnexpectedDataAccess:
        ds.var1.data
        ds.isel(time=10)
        ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1)
        ds.transpose()
        ds.mean()
        ds.fillna(0)
        ds.rename({"dim1": "foobar"})
        ds.set_coords("var1")
        ds.drop_vars("var1")

    def test_isel(self) -> None:
        data = create_test_data()
        slicers: dict[Hashable, slice] = {
            "dim1": slice(None, None, 2),
            "dim2": slice(0, 2),
        }
        ret = data.isel(slicers)

        # Verify that only the specified dimension was altered
        assert list(data.dims) == list(ret.dims)
        for d in data.dims:
            if d in slicers:
                assert ret.dims[d] == np.arange(data.dims[d])[slicers[d]].size
            else:
                assert data.dims[d] == ret.dims[d]
        # Verify that the data is what we expect
        for v in data.variables:
            assert data[v].dims == ret[v].dims
            assert data[v].attrs == ret[v].attrs
            slice_list = [slice(None)] * data[v].values.ndim
            for d, s in slicers.items():
                if d in data[v].dims:
                    inds = np.nonzero(np.array(data[v].dims) == d)[0]
                    for ind in inds:
                        slice_list[ind] = s
            expected = data[v].values[tuple(slice_list)]
            actual = ret[v].values
            np.testing.assert_array_equal(expected, actual)

        with pytest.raises(ValueError):
            data.isel(not_a_dim=slice(0, 2))
        with pytest.raises(
            ValueError,
            match=r"Dimensions {'not_a_dim'} do not exist. Expected "
            r"one or more of "
            r"[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*",
        ):
            data.isel(not_a_dim=slice(0, 2))
        with pytest.warns(
            UserWarning,
            match=r"Dimensions {'not_a_dim'} do not exist. "
            r"Expected one or more of "
            r"[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*",
        ):
            data.isel(not_a_dim=slice(0, 2), missing_dims="warn")
        assert_identical(data, data.isel(not_a_dim=slice(0, 2), missing_dims="ignore"))

        ret = data.isel(dim1=0)
        assert {"time": 20, "dim2": 9, "dim3": 10} == ret.dims
        assert set(data.data_vars) == set(ret.data_vars)
        assert set(data.coords) == set(ret.coords)
        assert set(data.xindexes) == set(ret.xindexes)

        ret = data.isel(time=slice(2), dim1=0, dim2=slice(5))
        assert {"time": 2, "dim2": 5, "dim3": 10} == ret.dims
        assert set(data.data_vars) == set(ret.data_vars)
        assert set(data.coords) == set(ret.coords)
        assert set(data.xindexes) == set(ret.xindexes)

        ret = data.isel(time=0, dim1=0, dim2=slice(5))
        assert {"dim2": 5, "dim3": 10} == ret.dims
        assert set(data.data_vars) == set(ret.data_vars)
        assert set(data.coords) == set(ret.coords)
        assert set(data.xindexes) == set(list(ret.xindexes) + ["time"])

    def test_isel_fancy(self) -> None:
        # isel with fancy indexing.
        data = create_test_data()

        pdim1 = [1, 2, 3]
        pdim2 = [4, 5, 1]
        pdim3 = [1, 2, 3]
        actual = data.isel(
            dim1=(("test_coord",), pdim1),
            dim2=(("test_coord",), pdim2),
            dim3=(("test_coord",), pdim3),
        )
        assert "test_coord" in actual.dims
        assert actual.coords["test_coord"].shape == (len(pdim1),)

        # Should work with DataArray
        actual = data.isel(
            dim1=DataArray(pdim1, dims="test_coord"),
            dim2=(("test_coord",), pdim2),
            dim3=(("test_coord",), pdim3),
        )
        assert "test_coord" in actual.dims
        assert actual.coords["test_coord"].shape == (len(pdim1),)
        expected = data.isel(
            dim1=(("test_coord",), pdim1),
            dim2=(("test_coord",), pdim2),
            dim3=(("test_coord",), pdim3),
        )
        assert_identical(actual, expected)

        # DataArray with coordinate
        idx1 = DataArray(pdim1, dims=["a"], coords={"a": np.random.randn(3)})
        idx2 = DataArray(pdim2, dims=["b"], coords={"b": np.random.randn(3)})
        idx3 = DataArray(pdim3, dims=["c"], coords={"c": np.random.randn(3)})
        # Should work with DataArray
        actual = data.isel(dim1=idx1, dim2=idx2, dim3=idx3)
        assert "a" in actual.dims
        assert "b" in actual.dims
        assert "c" in actual.dims
        assert "time" in actual.coords
        assert "dim2" in actual.coords
        assert "dim3" in actual.coords
        expected = data.isel(
            dim1=(("a",), pdim1), dim2=(("b",), pdim2), dim3=(("c",), pdim3)
        )
        expected = expected.assign_coords(a=idx1["a"], b=idx2["b"], c=idx3["c"])
        assert_identical(actual, expected)

        idx1 = DataArray(pdim1, dims=["a"], coords={"a": np.random.randn(3)})
        idx2 = DataArray(pdim2, dims=["a"])
        idx3 = DataArray(pdim3, dims=["a"])
        # Should work with DataArray
        actual = data.isel(dim1=idx1, dim2=idx2, dim3=idx3)
        assert "a" in actual.dims
        assert "time" in actual.coords
        assert "dim2" in actual.coords
        assert "dim3" in actual.coords
        expected = data.isel(
            dim1=(("a",), pdim1), dim2=(("a",), pdim2), dim3=(("a",), pdim3)
        )
        expected = expected.assign_coords(a=idx1["a"])
        assert_identical(actual, expected)

        actual = data.isel(dim1=(("points",), pdim1), dim2=(("points",), pdim2))
        assert "points" in actual.dims
        assert "dim3" in actual.dims
        assert "dim3" not in actual.data_vars
        np.testing.assert_array_equal(data["dim2"][pdim2], actual["dim2"])

        # test that the order of the indexers doesn't matter
        assert_identical(
            data.isel(dim1=(("points",), pdim1), dim2=(("points",), pdim2)),
            data.isel(dim2=(("points",), pdim2), dim1=(("points",), pdim1)),
        )
        # make sure we're raising errors in the right places
        with pytest.raises(IndexError, match=r"Dimensions of indexers mismatch"):
            data.isel(dim1=(("points",), [1, 2]), dim2=(("points",), [1, 2, 3]))
        with pytest.raises(TypeError, match=r"cannot use a Dataset"):
            data.isel(dim1=Dataset({"points": [1, 2]}))

        # test to be sure we keep around variables that were not indexed
        ds = Dataset({"x": [1, 2, 3, 4], "y": 0})
        actual = ds.isel(x=(("points",), [0, 1, 2]))
        assert_identical(ds["y"], actual["y"])

        # tests using index or DataArray as indexers
        stations = Dataset()
        stations["station"] = (("station",), ["A", "B", "C"])
        stations["dim1s"] = (("station",), [1, 2, 3])
        stations["dim2s"] = (("station",), [4, 5, 1])

        actual = data.isel(dim1=stations["dim1s"], dim2=stations["dim2s"])
        assert "station" in actual.coords
        assert "station" in actual.dims
        assert_identical(actual["station"].drop_vars(["dim2"]), stations["station"])

        with pytest.raises(ValueError, match=r"conflicting values/indexes on "):
            data.isel(
                dim1=DataArray(
                    [0, 1, 2], dims="station", coords={"station": [0, 1, 2]}
                ),
                dim2=DataArray(
                    [0, 1, 2], dims="station", coords={"station": [0, 1, 3]}
                ),
            )

        # multi-dimensional selection
        stations = Dataset()
        stations["a"] = (("a",), ["A", "B", "C"])
        stations["b"] = (("b",), [0, 1])
        stations["dim1s"] = (("a", "b"), [[1, 2], [2, 3], [3, 4]])
        stations["dim2s"] = (("a",), [4, 5, 1])
        actual = data.isel(dim1=stations["dim1s"], dim2=stations["dim2s"])
        assert "a" in actual.coords
        assert "a" in actual.dims
        assert "b" in actual.coords
        assert "b" in actual.dims
        assert "dim2" in actual.coords
        assert "a" in actual["dim2"].dims

        assert_identical(actual["a"].drop_vars(["dim2"]), stations["a"])
        assert_identical(actual["b"], stations["b"])
        expected_var1 = data["var1"].variable[
            stations["dim1s"].variable, stations["dim2s"].variable
        ]
        expected_var2 = data["var2"].variable[
            stations["dim1s"].variable, stations["dim2s"].variable
        ]
        expected_var3 = data["var3"].variable[slice(None), stations["dim1s"].variable]
        assert_equal(actual["a"].drop_vars("dim2"), stations["a"])
        assert_array_equal(actual["var1"], expected_var1)
        assert_array_equal(actual["var2"], expected_var2)
        assert_array_equal(actual["var3"], expected_var3)

        # test that drop works
        ds = xr.Dataset({"a": (("x",), [1, 2, 3])}, coords={"b": (("x",), [5, 6, 7])})

        actual = ds.isel({"x": 1}, drop=False)
        expected = xr.Dataset({"a": 2}, coords={"b": 6})
        assert_identical(actual, expected)

        actual = ds.isel({"x": 1}, drop=True)
        expected = xr.Dataset({"a": 2})
        assert_identical(actual, expected)

        actual = ds.isel({"x": DataArray(1)}, drop=False)
        expected = xr.Dataset({"a": 2}, coords={"b": 6})
        assert_identical(actual, expected)

        actual = ds.isel({"x": DataArray(1)}, drop=True)
        expected = xr.Dataset({"a": 2})
        assert_identical(actual, expected)

    def test_isel_dataarray(self) -> None:
        """Test for indexing by DataArray"""
        data = create_test_data()
        # indexing with DataArray with same-name coordinates.
        indexing_da = DataArray(
            np.arange(1, 4), dims=["dim1"], coords={"dim1": np.random.randn(3)}
        )
        actual = data.isel(dim1=indexing_da)
        assert_identical(indexing_da["dim1"], actual["dim1"])
        assert_identical(data["dim2"], actual["dim2"])

        # Conflict in the dimension coordinate
        indexing_da = DataArray(
            np.arange(1, 4), dims=["dim2"], coords={"dim2": np.random.randn(3)}
        )
        with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"):
            data.isel(dim2=indexing_da)
        # Also the case for DataArray
        with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"):
            data["var2"].isel(dim2=indexing_da)
        with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"):
            data["dim2"].isel(dim2=indexing_da)

        # same name coordinate which does not conflict
        indexing_da = DataArray(
            np.arange(1, 4), dims=["dim2"], coords={"dim2": data["dim2"].values[1:4]}
        )
        actual = data.isel(dim2=indexing_da)
        assert_identical(actual["dim2"], indexing_da["dim2"])

        # Silently drop conflicted (non-dimensional) coordinate of indexer
        indexing_da = DataArray(
            np.arange(1, 4),
            dims=["dim2"],
            coords={
                "dim2": data["dim2"].values[1:4],
                "numbers": ("dim2", np.arange(2, 5)),
            },
        )
        actual = data.isel(dim2=indexing_da)
        assert_identical(actual["numbers"], data["numbers"])

        # boolean data array with coordinate with the same name
        indexing_da = DataArray(
            np.arange(1, 10), dims=["dim2"], coords={"dim2": data["dim2"].values}
        )
        indexing_da = indexing_da < 3
        actual = data.isel(dim2=indexing_da)
        assert_identical(actual["dim2"], data["dim2"][:2])

        # boolean data array with non-dimensioncoordinate
        indexing_da = DataArray(
            np.arange(1, 10),
            dims=["dim2"],
            coords={
                "dim2": data["dim2"].values,
                "non_dim": (("dim2",), np.random.randn(9)),
                "non_dim2": 0,
            },
        )
        indexing_da = indexing_da < 3
        actual = data.isel(dim2=indexing_da)
        assert_identical(
            actual["dim2"].drop_vars("non_dim").drop_vars("non_dim2"), data["dim2"][:2]
        )
        assert_identical(actual["non_dim"], indexing_da["non_dim"][:2])
        assert_identical(actual["non_dim2"], indexing_da["non_dim2"])

        # non-dimension coordinate will be also attached
        indexing_da = DataArray(
            np.arange(1, 4),
            dims=["dim2"],
            coords={"non_dim": (("dim2",), np.random.randn(3))},
        )
        actual = data.isel(dim2=indexing_da)
        assert "non_dim" in actual
        assert "non_dim" in actual.coords

        # Index by a scalar DataArray
        indexing_da = DataArray(3, dims=[], coords={"station": 2})
        actual = data.isel(dim2=indexing_da)
        assert "station" in actual
        actual = data.isel(dim2=indexing_da["station"])
        assert "station" in actual

        # indexer generated from coordinates
        indexing_ds = Dataset({}, coords={"dim2": [0, 1, 2]})
        with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"):
            actual = data.isel(dim2=indexing_ds["dim2"])

    def test_isel_fancy_convert_index_variable(self) -> None:
        # select index variable "x" with a DataArray of dim "z"
        # -> drop index and convert index variable to base variable
        ds = xr.Dataset({"foo": ("x", [1, 2, 3])}, coords={"x": [0, 1, 2]})
        idxr = xr.DataArray([1], dims="z", name="x")
        actual = ds.isel(x=idxr)
        assert "x" not in actual.xindexes
        assert not isinstance(actual.x.variable, IndexVariable)

    def test_sel(self) -> None:
        data = create_test_data()
        int_slicers = {"dim1": slice(None, None, 2), "dim2": slice(2), "dim3": slice(3)}
        loc_slicers = {
            "dim1": slice(None, None, 2),
            "dim2": slice(0, 0.5),
            "dim3": slice("a", "c"),
        }
        assert_equal(data.isel(int_slicers), data.sel(loc_slicers))
        data["time"] = ("time", pd.date_range("2000-01-01", periods=20))
        assert_equal(data.isel(time=0), data.sel(time="2000-01-01"))
        assert_equal(
            data.isel(time=slice(10)), data.sel(time=slice("2000-01-01", "2000-01-10"))
        )
        assert_equal(data, data.sel(time=slice("1999", "2005")))
        times = pd.date_range("2000-01-01", periods=3)
        assert_equal(data.isel(time=slice(3)), data.sel(time=times))
        assert_equal(
            data.isel(time=slice(3)), data.sel(time=(data["time.dayofyear"] <= 3))
        )

        td = pd.to_timedelta(np.arange(3), unit="days")
        data = Dataset({"x": ("td", np.arange(3)), "td": td})
        assert_equal(data, data.sel(td=td))
        assert_equal(data, data.sel(td=slice("3 days")))
        assert_equal(data.isel(td=0), data.sel(td=pd.Timedelta("0 days")))
        assert_equal(data.isel(td=0), data.sel(td=pd.Timedelta("0h")))
        assert_equal(data.isel(td=slice(1, 3)), data.sel(td=slice("1 days", "2 days")))

    def test_sel_dataarray(self) -> None:
        data = create_test_data()

        ind = DataArray([0.0, 0.5, 1.0], dims=["dim2"])
        actual = data.sel(dim2=ind)
        assert_equal(actual, data.isel(dim2=[0, 1, 2]))

        # with different dimension
        ind = DataArray([0.0, 0.5, 1.0], dims=["new_dim"])
        actual = data.sel(dim2=ind)
        expected = data.isel(dim2=Variable("new_dim", [0, 1, 2]))
        assert "new_dim" in actual.dims
        assert_equal(actual, expected)

        # Multi-dimensional
        ind = DataArray([[0.0], [0.5], [1.0]], dims=["new_dim", "new_dim2"])
        actual = data.sel(dim2=ind)
        expected = data.isel(dim2=Variable(("new_dim", "new_dim2"), [[0], [1], [2]]))
        assert "new_dim" in actual.dims
        assert "new_dim2" in actual.dims
        assert_equal(actual, expected)

        # with coordinate
        ind = DataArray(
            [0.0, 0.5, 1.0], dims=["new_dim"], coords={"new_dim": ["a", "b", "c"]}
        )
        actual = data.sel(dim2=ind)
        expected = data.isel(dim2=[0, 1, 2]).rename({"dim2": "new_dim"})
        assert "new_dim" in actual.dims
        assert "new_dim" in actual.coords
        assert_equal(
            actual.drop_vars("new_dim").drop_vars("dim2"), expected.drop_vars("new_dim")
        )
        assert_equal(actual["new_dim"].drop_vars("dim2"), ind["new_dim"])

        # with conflicted coordinate (silently ignored)
        ind = DataArray(
            [0.0, 0.5, 1.0], dims=["dim2"], coords={"dim2": ["a", "b", "c"]}
        )
        actual = data.sel(dim2=ind)
        expected = data.isel(dim2=[0, 1, 2])
        assert_equal(actual, expected)

        # with conflicted coordinate (silently ignored)
        ind = DataArray(
            [0.0, 0.5, 1.0],
            dims=["new_dim"],
            coords={"new_dim": ["a", "b", "c"], "dim2": 3},
        )
        actual = data.sel(dim2=ind)
        assert_equal(
            actual["new_dim"].drop_vars("dim2"), ind["new_dim"].drop_vars("dim2")
        )
        expected = data.isel(dim2=[0, 1, 2])
        expected["dim2"] = (("new_dim"), expected["dim2"].values)
        assert_equal(actual["dim2"].drop_vars("new_dim"), expected["dim2"])
        assert actual["var1"].dims == ("dim1", "new_dim")

        # with non-dimensional coordinate
        ind = DataArray(
            [0.0, 0.5, 1.0],
            dims=["dim2"],
            coords={
                "dim2": ["a", "b", "c"],
                "numbers": ("dim2", [0, 1, 2]),
                "new_dim": ("dim2", [1.1, 1.2, 1.3]),
            },
        )
        actual = data.sel(dim2=ind)
        expected = data.isel(dim2=[0, 1, 2])
        assert_equal(actual.drop_vars("new_dim"), expected)
        assert np.allclose(actual["new_dim"].values, ind["new_dim"].values)

    def test_sel_dataarray_mindex(self) -> None:
        midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two"))
        mds = xr.Dataset(
            {"var": (("x", "y"), np.random.rand(6, 3))},
            coords={"x": midx, "y": range(3)},
        )

        actual_isel = mds.isel(x=xr.DataArray(np.arange(3), dims="x"))
        actual_sel = mds.sel(x=DataArray(midx[:3], dims="x"))
        assert actual_isel["x"].dims == ("x",)
        assert actual_sel["x"].dims == ("x",)
        assert_identical(actual_isel, actual_sel)

        actual_isel = mds.isel(x=xr.DataArray(np.arange(3), dims="z"))
        actual_sel = mds.sel(x=Variable("z", midx[:3]))
        assert actual_isel["x"].dims == ("z",)
        assert actual_sel["x"].dims == ("z",)
        assert_identical(actual_isel, actual_sel)

        # with coordinate
        actual_isel = mds.isel(
            x=xr.DataArray(np.arange(3), dims="z", coords={"z": [0, 1, 2]})
        )
        actual_sel = mds.sel(
            x=xr.DataArray(midx[:3], dims="z", coords={"z": [0, 1, 2]})
        )
        assert actual_isel["x"].dims == ("z",)
        assert actual_sel["x"].dims == ("z",)
        assert_identical(actual_isel, actual_sel)

        # Vectorized indexing with level-variables raises an error
        with pytest.raises(ValueError, match=r"Vectorized selection is "):
            mds.sel(one=["a", "b"])

        with pytest.raises(
            ValueError,
            match=r"Vectorized selection is not available along coordinate 'x' with a multi-index",
        ):
            mds.sel(
                x=xr.DataArray(
                    [np.array(midx[:2]), np.array(midx[-2:])], dims=["a", "b"]
                )
            )

    def test_sel_categorical(self) -> None:
        ind = pd.Series(["foo", "bar"], dtype="category")
        df = pd.DataFrame({"ind": ind, "values": [1, 2]})
        ds = df.set_index("ind").to_xarray()
        actual = ds.sel(ind="bar")
        expected = ds.isel(ind=1)
        assert_identical(expected, actual)

    def test_sel_categorical_error(self) -> None:
        ind = pd.Series(["foo", "bar"], dtype="category")
        df = pd.DataFrame({"ind": ind, "values": [1, 2]})
        ds = df.set_index("ind").to_xarray()
        with pytest.raises(ValueError):
            ds.sel(ind="bar", method="nearest")
        with pytest.raises(ValueError):
            ds.sel(ind="bar", tolerance="nearest")

    def test_categorical_index(self) -> None:
        cat = pd.CategoricalIndex(
            ["foo", "bar", "foo"],
            categories=["foo", "bar", "baz", "qux", "quux", "corge"],
        )
        ds = xr.Dataset(
            {"var": ("cat", np.arange(3))},
            coords={"cat": ("cat", cat), "c": ("cat", [0, 1, 1])},
        )
        # test slice
        actual1 = ds.sel(cat="foo")
        expected1 = ds.isel(cat=[0, 2])
        assert_identical(expected1, actual1)
        # make sure the conversion to the array works
        actual2 = ds.sel(cat="foo")["cat"].values
        assert (actual2 == np.array(["foo", "foo"])).all()

        ds = ds.set_index(index=["cat", "c"])
        actual3 = ds.unstack("index")
        assert actual3["var"].shape == (2, 2)

    def test_categorical_reindex(self) -> None:
        cat = pd.CategoricalIndex(
            ["foo", "bar", "baz"],
            categories=["foo", "bar", "baz", "qux", "quux", "corge"],
        )
        ds = xr.Dataset(
            {"var": ("cat", np.arange(3))},
            coords={"cat": ("cat", cat), "c": ("cat", [0, 1, 2])},
        )
        actual = ds.reindex(cat=["foo"])["cat"].values
        assert (actual == np.array(["foo"])).all()

    def test_categorical_multiindex(self) -> None:
        i1 = pd.Series([0, 0])
        cat = pd.CategoricalDtype(categories=["foo", "baz", "bar"])
        i2 = pd.Series(["baz", "bar"], dtype=cat)

        df = pd.DataFrame({"i1": i1, "i2": i2, "values": [1, 2]}).set_index(
            ["i1", "i2"]
        )
        actual = df.to_xarray()
        assert actual["values"].shape == (1, 2)

    def test_sel_drop(self) -> None:
        data = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]})
        expected = Dataset({"foo": 1})
        selected = data.sel(x=0, drop=True)
        assert_identical(expected, selected)

        expected = Dataset({"foo": 1}, {"x": 0})
        selected = data.sel(x=0, drop=False)
        assert_identical(expected, selected)

        data = Dataset({"foo": ("x", [1, 2, 3])})
        expected = Dataset({"foo": 1})
        selected = data.sel(x=0, drop=True)
        assert_identical(expected, selected)

    def test_sel_drop_mindex(self) -> None:
        midx = pd.MultiIndex.from_arrays([["a", "a"], [1, 2]], names=("foo", "bar"))
        data = Dataset(coords={"x": midx})

        actual = data.sel(foo="a", drop=True)
        assert "foo" not in actual.coords

        actual = data.sel(foo="a", drop=False)
        assert_equal(actual.foo, DataArray("a", coords={"foo": "a"}))

    def test_isel_drop(self) -> None:
        data = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]})
        expected = Dataset({"foo": 1})
        selected = data.isel(x=0, drop=True)
        assert_identical(expected, selected)

        expected = Dataset({"foo": 1}, {"x": 0})
        selected = data.isel(x=0, drop=False)
        assert_identical(expected, selected)

    def test_head(self) -> None:
        data = create_test_data()

        expected = data.isel(time=slice(5), dim2=slice(6))
        actual = data.head(time=5, dim2=6)
        assert_equal(expected, actual)

        expected = data.isel(time=slice(0))
        actual = data.head(time=0)
        assert_equal(expected, actual)

        expected = data.isel({dim: slice(6) for dim in data.dims})
        actual = data.head(6)
        assert_equal(expected, actual)

        expected = data.isel({dim: slice(5) for dim in data.dims})
        actual = data.head()
        assert_equal(expected, actual)

        with pytest.raises(TypeError, match=r"either dict-like or a single int"):
            data.head([3])  # type: ignore[arg-type]
        with pytest.raises(TypeError, match=r"expected integer type"):
            data.head(dim2=3.1)
        with pytest.raises(ValueError, match=r"expected positive int"):
            data.head(time=-3)

    def test_tail(self) -> None:
        data = create_test_data()

        expected = data.isel(time=slice(-5, None), dim2=slice(-6, None))
        actual = data.tail(time=5, dim2=6)
        assert_equal(expected, actual)

        expected = data.isel(dim1=slice(0))
        actual = data.tail(dim1=0)
        assert_equal(expected, actual)

        expected = data.isel({dim: slice(-6, None) for dim in data.dims})
        actual = data.tail(6)
        assert_equal(expected, actual)

        expected = data.isel({dim: slice(-5, None) for dim in data.dims})
        actual = data.tail()
        assert_equal(expected, actual)

        with pytest.raises(TypeError, match=r"either dict-like or a single int"):
            data.tail([3])  # type: ignore[arg-type]
        with pytest.raises(TypeError, match=r"expected integer type"):
            data.tail(dim2=3.1)
        with pytest.raises(ValueError, match=r"expected positive int"):
            data.tail(time=-3)

    def test_thin(self) -> None:
        data = create_test_data()

        expected = data.isel(time=slice(None, None, 5), dim2=slice(None, None, 6))
        actual = data.thin(time=5, dim2=6)
        assert_equal(expected, actual)

        expected = data.isel({dim: slice(None, None, 6) for dim in data.dims})
        actual = data.thin(6)
        assert_equal(expected, actual)

        with pytest.raises(TypeError, match=r"either dict-like or a single int"):
            data.thin([3])  # type: ignore[arg-type]
        with pytest.raises(TypeError, match=r"expected integer type"):
            data.thin(dim2=3.1)
        with pytest.raises(ValueError, match=r"cannot be zero"):
            data.thin(time=0)
        with pytest.raises(ValueError, match=r"expected positive int"):
            data.thin(time=-3)

    @pytest.mark.filterwarnings("ignore::DeprecationWarning")
    def test_sel_fancy(self) -> None:
        data = create_test_data()

        # add in a range() index
        data["dim1"] = data.dim1

        pdim1 = [1, 2, 3]
        pdim2 = [4, 5, 1]
        pdim3 = [1, 2, 3]
        expected = data.isel(
            dim1=Variable(("test_coord",), pdim1),
            dim2=Variable(("test_coord",), pdim2),
            dim3=Variable(("test_coord"), pdim3),
        )
        actual = data.sel(
            dim1=Variable(("test_coord",), data.dim1[pdim1]),
            dim2=Variable(("test_coord",), data.dim2[pdim2]),
            dim3=Variable(("test_coord",), data.dim3[pdim3]),
        )
        assert_identical(expected, actual)

        # DataArray Indexer
        idx_t = DataArray(
            data["time"][[3, 2, 1]].values, dims=["a"], coords={"a": ["a", "b", "c"]}
        )
        idx_2 = DataArray(
            data["dim2"][[3, 2, 1]].values, dims=["a"], coords={"a": ["a", "b", "c"]}
        )
        idx_3 = DataArray(
            data["dim3"][[3, 2, 1]].values, dims=["a"], coords={"a": ["a", "b", "c"]}
        )
        actual = data.sel(time=idx_t, dim2=idx_2, dim3=idx_3)
        expected = data.isel(
            time=Variable(("a",), [3, 2, 1]),
            dim2=Variable(("a",), [3, 2, 1]),
            dim3=Variable(("a",), [3, 2, 1]),
        )
        expected = expected.assign_coords(a=idx_t["a"])
        assert_identical(expected, actual)

        idx_t = DataArray(
            data["time"][[3, 2, 1]].values, dims=["a"], coords={"a": ["a", "b", "c"]}
        )
        idx_2 = DataArray(
            data["dim2"][[2, 1, 3]].values, dims=["b"], coords={"b": [0, 1, 2]}
        )
        idx_3 = DataArray(
            data["dim3"][[1, 2, 1]].values, dims=["c"], coords={"c": [0.0, 1.1, 2.2]}
        )
        actual = data.sel(time=idx_t, dim2=idx_2, dim3=idx_3)
        expected = data.isel(
            time=Variable(("a",), [3, 2, 1]),
            dim2=Variable(("b",), [2, 1, 3]),
            dim3=Variable(("c",), [1, 2, 1]),
        )
        expected = expected.assign_coords(a=idx_t["a"], b=idx_2["b"], c=idx_3["c"])
        assert_identical(expected, actual)

        # test from sel_points
        data = Dataset({"foo": (("x", "y"), np.arange(9).reshape(3, 3))})
        data.coords.update({"x": [0, 1, 2], "y": [0, 1, 2]})

        expected = Dataset(
            {"foo": ("points", [0, 4, 8])},
            coords={
                "x": Variable(("points",), [0, 1, 2]),
                "y": Variable(("points",), [0, 1, 2]),
            },
        )
        actual = data.sel(
            x=Variable(("points",), [0, 1, 2]), y=Variable(("points",), [0, 1, 2])
        )
        assert_identical(expected, actual)

        expected.coords.update({"x": ("points", [0, 1, 2]), "y": ("points", [0, 1, 2])})
        actual = data.sel(
            x=Variable(("points",), [0.1, 1.1, 2.5]),
            y=Variable(("points",), [0, 1.2, 2.0]),
            method="pad",
        )
        assert_identical(expected, actual)

        idx_x = DataArray([0, 1, 2], dims=["a"], coords={"a": ["a", "b", "c"]})
        idx_y = DataArray([0, 2, 1], dims=["b"], coords={"b": [0, 3, 6]})
        expected_ary = data["foo"][[0, 1, 2], [0, 2, 1]]
        actual = data.sel(x=idx_x, y=idx_y)
        assert_array_equal(expected_ary, actual["foo"])
        assert_identical(actual["a"].drop_vars("x"), idx_x["a"])
        assert_identical(actual["b"].drop_vars("y"), idx_y["b"])

        with pytest.raises(KeyError):
            data.sel(x=[2.5], y=[2.0], method="pad", tolerance=1e-3)

    def test_sel_method(self) -> None:
        data = create_test_data()

        expected = data.sel(dim2=1)
        actual = data.sel(dim2=0.95, method="nearest")
        assert_identical(expected, actual)

        actual = data.sel(dim2=0.95, method="nearest", tolerance=1)
        assert_identical(expected, actual)

        with pytest.raises(KeyError):
            actual = data.sel(dim2=np.pi, method="nearest", tolerance=0)

        expected = data.sel(dim2=[1.5])
        actual = data.sel(dim2=[1.45], method="backfill")
        assert_identical(expected, actual)

        with pytest.raises(NotImplementedError, match=r"slice objects"):
            data.sel(dim2=slice(1, 3), method="ffill")

        with pytest.raises(TypeError, match=r"``method``"):
            # this should not pass silently
            data.sel(dim2=1, method=data)  # type: ignore[arg-type]

        # cannot pass method if there is no associated coordinate
        with pytest.raises(ValueError, match=r"cannot supply"):
            data.sel(dim1=0, method="nearest")

    def test_loc(self) -> None:
        data = create_test_data()
        expected = data.sel(dim3="a")
        actual = data.loc[dict(dim3="a")]
        assert_identical(expected, actual)
        with pytest.raises(TypeError, match=r"can only lookup dict"):
            data.loc["a"]  # type: ignore[index]

    def test_selection_multiindex(self) -> None:
        mindex = pd.MultiIndex.from_product(
            [["a", "b"], [1, 2], [-1, -2]], names=("one", "two", "three")
        )
        mdata = Dataset(data_vars={"var": ("x", range(8))}, coords={"x": mindex})

        def test_sel(
            lab_indexer, pos_indexer, replaced_idx=False, renamed_dim=None
        ) -> None:
            ds = mdata.sel(x=lab_indexer)
            expected_ds = mdata.isel(x=pos_indexer)
            if not replaced_idx:
                assert_identical(ds, expected_ds)
            else:
                if renamed_dim:
                    assert ds["var"].dims[0] == renamed_dim
                    ds = ds.rename({renamed_dim: "x"})
                assert_identical(ds["var"].variable, expected_ds["var"].variable)
                assert not ds["x"].equals(expected_ds["x"])

        test_sel(("a", 1, -1), 0)
        test_sel(("b", 2, -2), -1)
        test_sel(("a", 1), [0, 1], replaced_idx=True, renamed_dim="three")
        test_sel(("a",), range(4), replaced_idx=True)
        test_sel("a", range(4), replaced_idx=True)
        test_sel([("a", 1, -1), ("b", 2, -2)], [0, 7])
        test_sel(slice("a", "b"), range(8))
        test_sel(slice(("a", 1), ("b", 1)), range(6))
        test_sel({"one": "a", "two": 1, "three": -1}, 0)
        test_sel({"one": "a", "two": 1}, [0, 1], replaced_idx=True, renamed_dim="three")
        test_sel({"one": "a"}, range(4), replaced_idx=True)

        assert_identical(mdata.loc[{"x": {"one": "a"}}], mdata.sel(x={"one": "a"}))
        assert_identical(mdata.loc[{"x": "a"}], mdata.sel(x="a"))
        assert_identical(mdata.loc[{"x": ("a", 1)}], mdata.sel(x=("a", 1)))
        assert_identical(mdata.loc[{"x": ("a", 1, -1)}], mdata.sel(x=("a", 1, -1)))

        assert_identical(mdata.sel(x={"one": "a", "two": 1}), mdata.sel(one="a", two=1))

    def test_broadcast_like(self) -> None:
        original1 = DataArray(
            np.random.randn(5), [("x", range(5))], name="a"
        ).to_dataset()

        original2 = DataArray(np.random.randn(6), [("y", range(6))], name="b")

        expected1, expected2 = broadcast(original1, original2)

        assert_identical(
            original1.broadcast_like(original2), expected1.transpose("y", "x")
        )

        assert_identical(original2.broadcast_like(original1), expected2)

    def test_to_pandas(self) -> None:
        # 0D -> series
        actual = Dataset({"a": 1, "b": 2}).to_pandas()
        expected = pd.Series([1, 2], ["a", "b"])
        assert_array_equal(actual, expected)

        # 1D -> dataframe
        x = np.random.randn(10)
        y = np.random.randn(10)
        t = list("abcdefghij")
        ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t)})
        actual = ds.to_pandas()
        expected = ds.to_dataframe()
        assert expected.equals(actual), (expected, actual)

        # 2D -> error
        x2d = np.random.randn(10, 10)
        y2d = np.random.randn(10, 10)
        with pytest.raises(ValueError, match=r"cannot convert Datasets"):
            Dataset({"a": (["t", "r"], x2d), "b": (["t", "r"], y2d)}).to_pandas()

    def test_reindex_like(self) -> None:
        data = create_test_data()
        data["letters"] = ("dim3", 10 * ["a"])

        expected = data.isel(dim1=slice(10), time=slice(13))
        actual = data.reindex_like(expected)
        assert_identical(actual, expected)

        expected = data.copy(deep=True)
        expected["dim3"] = ("dim3", list("cdefghijkl"))
        expected["var3"][:-2] = expected["var3"][2:].values
        expected["var3"][-2:] = np.nan
        expected["letters"] = expected["letters"].astype(object)
        expected["letters"][-2:] = np.nan
        expected["numbers"] = expected["numbers"].astype(float)
        expected["numbers"][:-2] = expected["numbers"][2:].values
        expected["numbers"][-2:] = np.nan
        actual = data.reindex_like(expected)
        assert_identical(actual, expected)

    def test_reindex(self) -> None:
        data = create_test_data()
        assert_identical(data, data.reindex())

        expected = data.assign_coords(dim1=data["dim1"])
        actual = data.reindex(dim1=data["dim1"])
        assert_identical(actual, expected)

        actual = data.reindex(dim1=data["dim1"].values)
        assert_identical(actual, expected)

        actual = data.reindex(dim1=data["dim1"].to_index())
        assert_identical(actual, expected)

        with pytest.raises(
            ValueError, match=r"cannot reindex or align along dimension"
        ):
            data.reindex(dim1=data["dim1"][:5])

        expected = data.isel(dim2=slice(5))
        actual = data.reindex(dim2=data["dim2"][:5])
        assert_identical(actual, expected)

        # test dict-like argument
        actual = data.reindex({"dim2": data["dim2"]})
        expected = data
        assert_identical(actual, expected)
        with pytest.raises(ValueError, match=r"cannot specify both"):
            data.reindex({"x": 0}, x=0)
        with pytest.raises(ValueError, match=r"dictionary"):
            data.reindex("foo")  # type: ignore[arg-type]

        # invalid dimension
        # TODO: (benbovy - explicit indexes): uncomment?
        # --> from reindex docstrings: "any mis-matched dimension is simply ignored"
        # with pytest.raises(ValueError, match=r"indexer keys.*not correspond.*"):
        #     data.reindex(invalid=0)

        # out of order
        expected = data.sel(dim2=data["dim2"][:5:-1])
        actual = data.reindex(dim2=data["dim2"][:5:-1])
        assert_identical(actual, expected)

        # multiple fill values
        expected = data.reindex(dim2=[0.1, 2.1, 3.1, 4.1]).assign(
            var1=lambda ds: ds.var1.copy(data=[[-10, -10, -10, -10]] * len(ds.dim1)),
            var2=lambda ds: ds.var2.copy(data=[[-20, -20, -20, -20]] * len(ds.dim1)),
        )
        actual = data.reindex(
            dim2=[0.1, 2.1, 3.1, 4.1], fill_value={"var1": -10, "var2": -20}
        )
        assert_identical(actual, expected)
        # use the default value
        expected = data.reindex(dim2=[0.1, 2.1, 3.1, 4.1]).assign(
            var1=lambda ds: ds.var1.copy(data=[[-10, -10, -10, -10]] * len(ds.dim1)),
            var2=lambda ds: ds.var2.copy(
                data=[[np.nan, np.nan, np.nan, np.nan]] * len(ds.dim1)
            ),
        )
        actual = data.reindex(dim2=[0.1, 2.1, 3.1, 4.1], fill_value={"var1": -10})
        assert_identical(actual, expected)

        # regression test for #279
        expected = Dataset({"x": ("time", np.random.randn(5))}, {"time": range(5)})
        time2 = DataArray(np.arange(5), dims="time2")
        with pytest.raises(ValueError):
            actual = expected.reindex(time=time2)

        # another regression test
        ds = Dataset(
            {"foo": (["x", "y"], np.zeros((3, 4)))}, {"x": range(3), "y": range(4)}
        )
        expected = Dataset(
            {"foo": (["x", "y"], np.zeros((3, 2)))}, {"x": [0, 1, 3], "y": [0, 1]}
        )
        expected["foo"][-1] = np.nan
        actual = ds.reindex(x=[0, 1, 3], y=[0, 1])
        assert_identical(expected, actual)

    def test_reindex_attrs_encoding(self) -> None:
        ds = Dataset(
            {"data": ("x", [1, 2, 3])},
            {"x": ("x", [0, 1, 2], {"foo": "bar"}, {"bar": "baz"})},
        )
        actual = ds.reindex(x=[0, 1])
        expected = Dataset(
            {"data": ("x", [1, 2])},
            {"x": ("x", [0, 1], {"foo": "bar"}, {"bar": "baz"})},
        )
        assert_identical(actual, expected)
        assert actual.x.encoding == expected.x.encoding

    def test_reindex_warning(self) -> None:
        data = create_test_data()

        with pytest.raises(ValueError):
            # DataArray with different dimension raises Future warning
            ind = xr.DataArray([0.0, 1.0], dims=["new_dim"], name="ind")
            data.reindex(dim2=ind)

        # Should not warn
        ind = xr.DataArray([0.0, 1.0], dims=["dim2"], name="ind")
        with warnings.catch_warnings(record=True) as ws:
            data.reindex(dim2=ind)
            assert len(ws) == 0

    def test_reindex_variables_copied(self) -> None:
        data = create_test_data()
        reindexed_data = data.reindex(copy=False)
        for k in data.variables:
            assert reindexed_data.variables[k] is not data.variables[k]

    def test_reindex_method(self) -> None:
        ds = Dataset({"x": ("y", [10, 20]), "y": [0, 1]})
        y = [-0.5, 0.5, 1.5]
        actual = ds.reindex(y=y, method="backfill")
        expected = Dataset({"x": ("y", [10, 20, np.nan]), "y": y})
        assert_identical(expected, actual)

        actual = ds.reindex(y=y, method="backfill", tolerance=0.1)
        expected = Dataset({"x": ("y", 3 * [np.nan]), "y": y})
        assert_identical(expected, actual)

        actual = ds.reindex(y=y, method="backfill", tolerance=[0.1, 0.5, 0.1])
        expected = Dataset({"x": ("y", [np.nan, 20, np.nan]), "y": y})
        assert_identical(expected, actual)

        actual = ds.reindex(y=[0.1, 0.1, 1], tolerance=[0, 0.1, 0], method="nearest")
        expected = Dataset({"x": ("y", [np.nan, 10, 20]), "y": [0.1, 0.1, 1]})
        assert_identical(expected, actual)

        actual = ds.reindex(y=y, method="pad")
        expected = Dataset({"x": ("y", [np.nan, 10, 20]), "y": y})
        assert_identical(expected, actual)

        alt = Dataset({"y": y})
        actual = ds.reindex_like(alt, method="pad")
        assert_identical(expected, actual)

    @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"x": 2, "z": 1}])
    def test_reindex_fill_value(self, fill_value) -> None:
        ds = Dataset({"x": ("y", [10, 20]), "z": ("y", [-20, -10]), "y": [0, 1]})
        y = [0, 1, 2]
        actual = ds.reindex(y=y, fill_value=fill_value)
        if fill_value == dtypes.NA:
            # if we supply the default, we expect the missing value for a
            # float array
            fill_value_x = fill_value_z = np.nan
        elif isinstance(fill_value, dict):
            fill_value_x = fill_value["x"]
            fill_value_z = fill_value["z"]
        else:
            fill_value_x = fill_value_z = fill_value
        expected = Dataset(
            {
                "x": ("y", [10, 20, fill_value_x]),
                "z": ("y", [-20, -10, fill_value_z]),
                "y": y,
            }
        )
        assert_identical(expected, actual)

    @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"x": 2, "z": 1}])
    def test_reindex_like_fill_value(self, fill_value) -> None:
        ds = Dataset({"x": ("y", [10, 20]), "z": ("y", [-20, -10]), "y": [0, 1]})
        y = [0, 1, 2]
        alt = Dataset({"y": y})
        actual = ds.reindex_like(alt, fill_value=fill_value)
        if fill_value == dtypes.NA:
            # if we supply the default, we expect the missing value for a
            # float array
            fill_value_x = fill_value_z = np.nan
        elif isinstance(fill_value, dict):
            fill_value_x = fill_value["x"]
            fill_value_z = fill_value["z"]
        else:
            fill_value_x = fill_value_z = fill_value
        expected = Dataset(
            {
                "x": ("y", [10, 20, fill_value_x]),
                "z": ("y", [-20, -10, fill_value_z]),
                "y": y,
            }
        )
        assert_identical(expected, actual)

    @pytest.mark.parametrize("dtype", [str, bytes])
    def test_reindex_str_dtype(self, dtype) -> None:
        data = Dataset({"data": ("x", [1, 2]), "x": np.array(["a", "b"], dtype=dtype)})

        actual = data.reindex(x=data.x)
        expected = data

        assert_identical(expected, actual)
        assert actual.x.dtype == expected.x.dtype

    @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"foo": 2, "bar": 1}])
    def test_align_fill_value(self, fill_value) -> None:
        x = Dataset({"foo": DataArray([1, 2], dims=["x"], coords={"x": [1, 2]})})
        y = Dataset({"bar": DataArray([1, 2], dims=["x"], coords={"x": [1, 3]})})
        x2, y2 = align(x, y, join="outer", fill_value=fill_value)
        if fill_value == dtypes.NA:
            # if we supply the default, we expect the missing value for a
            # float array
            fill_value_foo = fill_value_bar = np.nan
        elif isinstance(fill_value, dict):
            fill_value_foo = fill_value["foo"]
            fill_value_bar = fill_value["bar"]
        else:
            fill_value_foo = fill_value_bar = fill_value

        expected_x2 = Dataset(
            {
                "foo": DataArray(
                    [1, 2, fill_value_foo], dims=["x"], coords={"x": [1, 2, 3]}
                )
            }
        )
        expected_y2 = Dataset(
            {
                "bar": DataArray(
                    [1, fill_value_bar, 2], dims=["x"], coords={"x": [1, 2, 3]}
                )
            }
        )
        assert_identical(expected_x2, x2)
        assert_identical(expected_y2, y2)

    def test_align(self) -> None:
        left = create_test_data()
        right = left.copy(deep=True)
        right["dim3"] = ("dim3", list("cdefghijkl"))
        right["var3"][:-2] = right["var3"][2:].values
        right["var3"][-2:] = np.random.randn(*right["var3"][-2:].shape)
        right["numbers"][:-2] = right["numbers"][2:].values
        right["numbers"][-2:] = -10

        intersection = list("cdefghij")
        union = list("abcdefghijkl")

        left2, right2 = align(left, right, join="inner")
        assert_array_equal(left2["dim3"], intersection)
        assert_identical(left2, right2)

        left2, right2 = align(left, right, join="outer")

        assert_array_equal(left2["dim3"], union)
        assert_equal(left2["dim3"].variable, right2["dim3"].variable)

        assert_identical(left2.sel(dim3=intersection), right2.sel(dim3=intersection))
        assert np.isnan(left2["var3"][-2:]).all()
        assert np.isnan(right2["var3"][:2]).all()

        left2, right2 = align(left, right, join="left")
        assert_equal(left2["dim3"].variable, right2["dim3"].variable)
        assert_equal(left2["dim3"].variable, left["dim3"].variable)

        assert_identical(left2.sel(dim3=intersection), right2.sel(dim3=intersection))
        assert np.isnan(right2["var3"][:2]).all()

        left2, right2 = align(left, right, join="right")
        assert_equal(left2["dim3"].variable, right2["dim3"].variable)
        assert_equal(left2["dim3"].variable, right["dim3"].variable)

        assert_identical(left2.sel(dim3=intersection), right2.sel(dim3=intersection))

        assert np.isnan(left2["var3"][-2:]).all()

        with pytest.raises(ValueError, match=r"invalid value for join"):
            align(left, right, join="foobar")  # type: ignore[arg-type]
        with pytest.raises(TypeError):
            align(left, right, foo="bar")  # type: ignore[call-arg]

    def test_align_exact(self) -> None:
        left = xr.Dataset(coords={"x": [0, 1]})
        right = xr.Dataset(coords={"x": [1, 2]})

        left1, left2 = xr.align(left, left, join="exact")
        assert_identical(left1, left)
        assert_identical(left2, left)

        with pytest.raises(ValueError, match=r"cannot align.*join.*exact.*not equal.*"):
            xr.align(left, right, join="exact")

    def test_align_override(self) -> None:
        left = xr.Dataset(coords={"x": [0, 1, 2]})
        right = xr.Dataset(coords={"x": [0.1, 1.1, 2.1], "y": [1, 2, 3]})
        expected_right = xr.Dataset(coords={"x": [0, 1, 2], "y": [1, 2, 3]})

        new_left, new_right = xr.align(left, right, join="override")
        assert_identical(left, new_left)
        assert_identical(new_right, expected_right)

        new_left, new_right = xr.align(left, right, exclude="x", join="override")
        assert_identical(left, new_left)
        assert_identical(right, new_right)

        new_left, new_right = xr.align(
            left.isel(x=0, drop=True), right, exclude="x", join="override"
        )
        assert_identical(left.isel(x=0, drop=True), new_left)
        assert_identical(right, new_right)

        with pytest.raises(
            ValueError, match=r"cannot align.*join.*override.*same size"
        ):
            xr.align(left.isel(x=0).expand_dims("x"), right, join="override")

    def test_align_exclude(self) -> None:
        x = Dataset(
            {
                "foo": DataArray(
                    [[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [1, 2], "y": [3, 4]}
                )
            }
        )
        y = Dataset(
            {
                "bar": DataArray(
                    [[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [1, 3], "y": [5, 6]}
                )
            }
        )
        x2, y2 = align(x, y, exclude=["y"], join="outer")

        expected_x2 = Dataset(
            {
                "foo": DataArray(
                    [[1, 2], [3, 4], [np.nan, np.nan]],
                    dims=["x", "y"],
                    coords={"x": [1, 2, 3], "y": [3, 4]},
                )
            }
        )
        expected_y2 = Dataset(
            {
                "bar": DataArray(
                    [[1, 2], [np.nan, np.nan], [3, 4]],
                    dims=["x", "y"],
                    coords={"x": [1, 2, 3], "y": [5, 6]},
                )
            }
        )
        assert_identical(expected_x2, x2)
        assert_identical(expected_y2, y2)

    def test_align_nocopy(self) -> None:
        x = Dataset({"foo": DataArray([1, 2, 3], coords=[("x", [1, 2, 3])])})
        y = Dataset({"foo": DataArray([1, 2], coords=[("x", [1, 2])])})
        expected_x2 = x
        expected_y2 = Dataset(
            {"foo": DataArray([1, 2, np.nan], coords=[("x", [1, 2, 3])])}
        )

        x2, y2 = align(x, y, copy=False, join="outer")
        assert_identical(expected_x2, x2)
        assert_identical(expected_y2, y2)
        assert source_ndarray(x["foo"].data) is source_ndarray(x2["foo"].data)

        x2, y2 = align(x, y, copy=True, join="outer")
        assert source_ndarray(x["foo"].data) is not source_ndarray(x2["foo"].data)
        assert_identical(expected_x2, x2)
        assert_identical(expected_y2, y2)

    def test_align_indexes(self) -> None:
        x = Dataset({"foo": DataArray([1, 2, 3], dims="x", coords=[("x", [1, 2, 3])])})
        (x2,) = align(x, indexes={"x": [2, 3, 1]})
        expected_x2 = Dataset(
            {"foo": DataArray([2, 3, 1], dims="x", coords={"x": [2, 3, 1]})}
        )

        assert_identical(expected_x2, x2)

    def test_align_non_unique(self) -> None:
        x = Dataset({"foo": ("x", [3, 4, 5]), "x": [0, 0, 1]})
        x1, x2 = align(x, x)
        assert_identical(x1, x)
        assert_identical(x2, x)

        y = Dataset({"bar": ("x", [6, 7]), "x": [0, 1]})
        with pytest.raises(ValueError, match=r"cannot reindex or align"):
            align(x, y)

    def test_align_str_dtype(self) -> None:

        a = Dataset({"foo": ("x", [0, 1])}, coords={"x": ["a", "b"]})
        b = Dataset({"foo": ("x", [1, 2])}, coords={"x": ["b", "c"]})

        expected_a = Dataset(
            {"foo": ("x", [0, 1, np.NaN])}, coords={"x": ["a", "b", "c"]}
        )
        expected_b = Dataset(
            {"foo": ("x", [np.NaN, 1, 2])}, coords={"x": ["a", "b", "c"]}
        )

        actual_a, actual_b = xr.align(a, b, join="outer")

        assert_identical(expected_a, actual_a)
        assert expected_a.x.dtype == actual_a.x.dtype

        assert_identical(expected_b, actual_b)
        assert expected_b.x.dtype == actual_b.x.dtype

    @pytest.mark.parametrize("join", ["left", "override"])
    def test_align_index_var_attrs(self, join) -> None:
        # regression test https://github.com/pydata/xarray/issues/6852
        # aligning two objects should have no side effect on their index variable
        # metadata.

        ds = Dataset(coords={"x": ("x", [1, 2, 3], {"units": "m"})})
        ds_noattr = Dataset(coords={"x": ("x", [1, 2, 3])})

        xr.align(ds_noattr, ds, join=join)

        assert ds.x.attrs == {"units": "m"}
        assert ds_noattr.x.attrs == {}

    def test_broadcast(self) -> None:
        ds = Dataset(
            {"foo": 0, "bar": ("x", [1]), "baz": ("y", [2, 3])}, {"c": ("x", [4])}
        )
        expected = Dataset(
            {
                "foo": (("x", "y"), [[0, 0]]),
                "bar": (("x", "y"), [[1, 1]]),
                "baz": (("x", "y"), [[2, 3]]),
            },
            {"c": ("x", [4])},
        )
        (actual,) = broadcast(ds)
        assert_identical(expected, actual)

        ds_x = Dataset({"foo": ("x", [1])})
        ds_y = Dataset({"bar": ("y", [2, 3])})
        expected_x = Dataset({"foo": (("x", "y"), [[1, 1]])})
        expected_y = Dataset({"bar": (("x", "y"), [[2, 3]])})
        actual_x, actual_y = broadcast(ds_x, ds_y)
        assert_identical(expected_x, actual_x)
        assert_identical(expected_y, actual_y)

        array_y = ds_y["bar"]
        expected_y2 = expected_y["bar"]
        actual_x2, actual_y2 = broadcast(ds_x, array_y)
        assert_identical(expected_x, actual_x2)
        assert_identical(expected_y2, actual_y2)

    def test_broadcast_nocopy(self) -> None:
        # Test that data is not copied if not needed
        x = Dataset({"foo": (("x", "y"), [[1, 1]])})
        y = Dataset({"bar": ("y", [2, 3])})

        (actual_x,) = broadcast(x)
        assert_identical(x, actual_x)
        assert source_ndarray(actual_x["foo"].data) is source_ndarray(x["foo"].data)

        actual_x, actual_y = broadcast(x, y)
        assert_identical(x, actual_x)
        assert source_ndarray(actual_x["foo"].data) is source_ndarray(x["foo"].data)

    def test_broadcast_exclude(self) -> None:
        x = Dataset(
            {
                "foo": DataArray(
                    [[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [1, 2], "y": [3, 4]}
                ),
                "bar": DataArray(5),
            }
        )
        y = Dataset(
            {
                "foo": DataArray(
                    [[1, 2]], dims=["z", "y"], coords={"z": [1], "y": [5, 6]}
                )
            }
        )
        x2, y2 = broadcast(x, y, exclude=["y"])

        expected_x2 = Dataset(
            {
                "foo": DataArray(
                    [[[1, 2]], [[3, 4]]],
                    dims=["x", "z", "y"],
                    coords={"z": [1], "x": [1, 2], "y": [3, 4]},
                ),
                "bar": DataArray(
                    [[5], [5]], dims=["x", "z"], coords={"x": [1, 2], "z": [1]}
                ),
            }
        )
        expected_y2 = Dataset(
            {
                "foo": DataArray(
                    [[[1, 2]], [[1, 2]]],
                    dims=["x", "z", "y"],
                    coords={"z": [1], "x": [1, 2], "y": [5, 6]},
                )
            }
        )
        assert_identical(expected_x2, x2)
        assert_identical(expected_y2, y2)

    def test_broadcast_misaligned(self) -> None:
        x = Dataset({"foo": DataArray([1, 2, 3], coords=[("x", [-1, -2, -3])])})
        y = Dataset(
            {
                "bar": DataArray(
                    [[1, 2], [3, 4]],
                    dims=["y", "x"],
                    coords={"y": [1, 2], "x": [10, -3]},
                )
            }
        )
        x2, y2 = broadcast(x, y)
        expected_x2 = Dataset(
            {
                "foo": DataArray(
                    [[3, 3], [2, 2], [1, 1], [np.nan, np.nan]],
                    dims=["x", "y"],
                    coords={"y": [1, 2], "x": [-3, -2, -1, 10]},
                )
            }
        )
        expected_y2 = Dataset(
            {
                "bar": DataArray(
                    [[2, 4], [np.nan, np.nan], [np.nan, np.nan], [1, 3]],
                    dims=["x", "y"],
                    coords={"y": [1, 2], "x": [-3, -2, -1, 10]},
                )
            }
        )
        assert_identical(expected_x2, x2)
        assert_identical(expected_y2, y2)

    def test_broadcast_multi_index(self) -> None:
        # GH6430
        ds = Dataset(
            {"foo": (("x", "y", "z"), np.ones((3, 4, 2)))},
            {"x": ["a", "b", "c"], "y": [1, 2, 3, 4]},
        )
        stacked = ds.stack(space=["x", "y"])
        broadcasted, _ = broadcast(stacked, stacked.space)

        assert broadcasted.xindexes["x"] is broadcasted.xindexes["space"]
        assert broadcasted.xindexes["y"] is broadcasted.xindexes["space"]

    def test_variable_indexing(self) -> None:
        data = create_test_data()
        v = data["var1"]
        d1 = data["dim1"]
        d2 = data["dim2"]
        assert_equal(v, v[d1.values])
        assert_equal(v, v[d1])
        assert_equal(v[:3], v[d1 < 3])
        assert_equal(v[:, 3:], v[:, d2 >= 1.5])
        assert_equal(v[:3, 3:], v[d1 < 3, d2 >= 1.5])
        assert_equal(v[:3, :2], v[range(3), range(2)])
        assert_equal(v[:3, :2], v.loc[d1[:3], d2[:2]])

    def test_drop_variables(self) -> None:
        data = create_test_data()

        assert_identical(data, data.drop_vars([]))

        expected = Dataset({k: data[k] for k in data.variables if k != "time"})
        actual = data.drop_vars("time")
        assert_identical(expected, actual)
        actual = data.drop_vars(["time"])
        assert_identical(expected, actual)

        with pytest.raises(ValueError, match=r"cannot be found"):
            data.drop_vars("not_found_here")

        actual = data.drop_vars("not_found_here", errors="ignore")
        assert_identical(data, actual)

        actual = data.drop_vars(["not_found_here"], errors="ignore")
        assert_identical(data, actual)

        actual = data.drop_vars(["time", "not_found_here"], errors="ignore")
        assert_identical(expected, actual)

        # deprecated approach with `drop` works (straight copy paste from above)

        with pytest.warns(PendingDeprecationWarning):
            actual = data.drop("not_found_here", errors="ignore")
        assert_identical(data, actual)

        with pytest.warns(PendingDeprecationWarning):
            actual = data.drop(["not_found_here"], errors="ignore")
        assert_identical(data, actual)

        with pytest.warns(PendingDeprecationWarning):
            actual = data.drop(["time", "not_found_here"], errors="ignore")
        assert_identical(expected, actual)

        with pytest.warns(PendingDeprecationWarning):
            actual = data.drop({"time", "not_found_here"}, errors="ignore")
        assert_identical(expected, actual)

    def test_drop_multiindex_level(self) -> None:
        data = create_test_multiindex()
        expected = data.drop_vars(["x", "level_1", "level_2"])
        with pytest.warns(DeprecationWarning):
            actual = data.drop_vars("level_1")
        assert_identical(expected, actual)

    def test_drop_index_labels(self) -> None:
        data = Dataset({"A": (["x", "y"], np.random.randn(2, 3)), "x": ["a", "b"]})

        with pytest.warns(DeprecationWarning):
            actual = data.drop(["a"], dim="x")
        expected = data.isel(x=[1])
        assert_identical(expected, actual)

        with pytest.warns(DeprecationWarning):
            actual = data.drop(["a", "b"], dim="x")
        expected = data.isel(x=slice(0, 0))
        assert_identical(expected, actual)

        with pytest.raises(KeyError):
            # not contained in axis
            with pytest.warns(DeprecationWarning):
                data.drop(["c"], dim="x")

        with pytest.warns(DeprecationWarning):
            actual = data.drop(["c"], dim="x", errors="ignore")
        assert_identical(data, actual)

        with pytest.raises(ValueError):
            with pytest.warns(DeprecationWarning):
                data.drop(["c"], dim="x", errors="wrong_value")  # type: ignore[arg-type]

        with pytest.warns(DeprecationWarning):
            actual = data.drop(["a", "b", "c"], "x", errors="ignore")
        expected = data.isel(x=slice(0, 0))
        assert_identical(expected, actual)

        # DataArrays as labels are a nasty corner case as they are not
        # Iterable[Hashable] - DataArray.__iter__ yields scalar DataArrays.
        actual = data.drop_sel(x=DataArray(["a", "b", "c"]), errors="ignore")
        expected = data.isel(x=slice(0, 0))
        assert_identical(expected, actual)
        with pytest.warns(DeprecationWarning):
            data.drop(DataArray(["a", "b", "c"]), dim="x", errors="ignore")
        assert_identical(expected, actual)

        actual = data.drop_sel(y=[1])
        expected = data.isel(y=[0, 2])
        assert_identical(expected, actual)

        with pytest.raises(KeyError, match=r"not found in axis"):
            data.drop_sel(x=0)

    def test_drop_labels_by_keyword(self) -> None:
        data = Dataset(
            {"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)}
        )
        # Basic functionality.
        assert len(data.coords["x"]) == 2

        with pytest.warns(DeprecationWarning):
            ds1 = data.drop(["a"], dim="x")
        ds2 = data.drop_sel(x="a")
        ds3 = data.drop_sel(x=["a"])
        ds4 = data.drop_sel(x=["a", "b"])
        ds5 = data.drop_sel(x=["a", "b"], y=range(0, 6, 2))

        arr = DataArray(range(3), dims=["c"])
        with pytest.warns(FutureWarning):
            data.drop(arr.coords)
        with pytest.warns(FutureWarning):
            data.drop(arr.xindexes)

        assert_array_equal(ds1.coords["x"], ["b"])
        assert_array_equal(ds2.coords["x"], ["b"])
        assert_array_equal(ds3.coords["x"], ["b"])
        assert ds4.coords["x"].size == 0
        assert ds5.coords["x"].size == 0
        assert_array_equal(ds5.coords["y"], [1, 3, 5])

        # Error handling if user tries both approaches.
        with pytest.raises(ValueError):
            data.drop(labels=["a"], x="a")
        with pytest.raises(ValueError):
            data.drop(labels=["a"], dim="x", x="a")
        warnings.filterwarnings("ignore", r"\W*drop")
        with pytest.raises(ValueError):
            data.drop(dim="x", x="a")

    def test_drop_labels_by_position(self) -> None:
        data = Dataset(
            {"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)}
        )
        # Basic functionality.
        assert len(data.coords["x"]) == 2

        actual = data.drop_isel(x=0)
        expected = data.drop_sel(x="a")
        assert_identical(expected, actual)

        actual = data.drop_isel(x=[0])
        expected = data.drop_sel(x=["a"])
        assert_identical(expected, actual)

        actual = data.drop_isel(x=[0, 1])
        expected = data.drop_sel(x=["a", "b"])
        assert_identical(expected, actual)
        assert actual.coords["x"].size == 0

        actual = data.drop_isel(x=[0, 1], y=range(0, 6, 2))
        expected = data.drop_sel(x=["a", "b"], y=range(0, 6, 2))
        assert_identical(expected, actual)
        assert actual.coords["x"].size == 0

        with pytest.raises(KeyError):
            data.drop_isel(z=1)

    def test_drop_indexes(self) -> None:
        ds = Dataset(
            coords={
                "x": ("x", [0, 1, 2]),
                "y": ("y", [3, 4, 5]),
                "foo": ("x", ["a", "a", "b"]),
            }
        )

        actual = ds.drop_indexes("x")
        assert "x" not in actual.xindexes
        assert type(actual.x.variable) is Variable

        actual = ds.drop_indexes(["x", "y"])
        assert "x" not in actual.xindexes
        assert "y" not in actual.xindexes
        assert type(actual.x.variable) is Variable
        assert type(actual.y.variable) is Variable

        with pytest.raises(ValueError, match="those coordinates don't exist"):
            ds.drop_indexes("not_a_coord")

        with pytest.raises(ValueError, match="those coordinates do not have an index"):
            ds.drop_indexes("foo")

        actual = ds.drop_indexes(["foo", "not_a_coord"], errors="ignore")
        assert_identical(actual, ds)

        # test index corrupted
        mindex = pd.MultiIndex.from_tuples([([1, 2]), ([3, 4])], names=["a", "b"])
        ds = Dataset(coords={"x": mindex})

        with pytest.raises(ValueError, match=".*would corrupt the following index.*"):
            ds.drop_indexes("a")

    def test_drop_dims(self) -> None:
        data = xr.Dataset(
            {
                "A": (["x", "y"], np.random.randn(2, 3)),
                "B": ("x", np.random.randn(2)),
                "x": ["a", "b"],
                "z": np.pi,
            }
        )

        actual = data.drop_dims("x")
        expected = data.drop_vars(["A", "B", "x"])
        assert_identical(expected, actual)

        actual = data.drop_dims("y")
        expected = data.drop_vars("A")
        assert_identical(expected, actual)

        actual = data.drop_dims(["x", "y"])
        expected = data.drop_vars(["A", "B", "x"])
        assert_identical(expected, actual)

        with pytest.raises((ValueError, KeyError)):
            data.drop_dims("z")  # not a dimension

        with pytest.raises((ValueError, KeyError)):
            data.drop_dims(None)  # type:ignore[arg-type]

        actual = data.drop_dims("z", errors="ignore")
        assert_identical(data, actual)

        # should this be allowed?
        actual = data.drop_dims(None, errors="ignore")  # type:ignore[arg-type]
        assert_identical(data, actual)

        with pytest.raises(ValueError):
            actual = data.drop_dims("z", errors="wrong_value")  # type: ignore[arg-type]

        actual = data.drop_dims(["x", "y", "z"], errors="ignore")
        expected = data.drop_vars(["A", "B", "x"])
        assert_identical(expected, actual)

    def test_copy(self) -> None:
        data = create_test_data()
        data.attrs["Test"] = [1, 2, 3]

        for copied in [data.copy(deep=False), copy(data)]:
            assert_identical(data, copied)
            assert data.encoding == copied.encoding
            # Note: IndexVariable objects with string dtype are always
            # copied because of xarray.core.indexes.safe_cast_to_index.
            # Limiting the test to data variables.
            for k in data.data_vars:
                v0 = data.variables[k]
                v1 = copied.variables[k]
                assert source_ndarray(v0.data) is source_ndarray(v1.data)
            copied["foo"] = ("z", np.arange(5))
            assert "foo" not in data

            copied.attrs["foo"] = "bar"
            assert "foo" not in data.attrs
            assert data.attrs["Test"] is copied.attrs["Test"]

        for copied in [data.copy(deep=True), deepcopy(data)]:
            assert_identical(data, copied)
            for k, v0 in data.variables.items():
                v1 = copied.variables[k]
                assert v0 is not v1

            assert data.attrs["Test"] is not copied.attrs["Test"]

    def test_copy_with_data(self) -> None:
        orig = create_test_data()
        new_data = {k: np.random.randn(*v.shape) for k, v in orig.data_vars.items()}
        actual = orig.copy(data=new_data)

        expected = orig.copy()
        for k, v in new_data.items():
            expected[k].data = v
        assert_identical(expected, actual)

    @pytest.mark.xfail(raises=AssertionError)
    @pytest.mark.parametrize(
        "deep, expected_orig",
        [
            [
                True,
                xr.DataArray(
                    xr.IndexVariable("a", np.array([1, 2])),
                    coords={"a": [1, 2]},
                    dims=["a"],
                ),
            ],
            [
                False,
                xr.DataArray(
                    xr.IndexVariable("a", np.array([999, 2])),
                    coords={"a": [999, 2]},
                    dims=["a"],
                ),
            ],
        ],
    )
    def test_copy_coords(self, deep, expected_orig) -> None:
        """The test fails for the shallow copy, and apparently only on Windows
        for some reason. In windows coords seem to be immutable unless it's one
        dataset deep copied from another."""
        ds = xr.DataArray(
            np.ones([2, 2, 2]),
            coords={"a": [1, 2], "b": ["x", "y"], "c": [0, 1]},
            dims=["a", "b", "c"],
            name="value",
        ).to_dataset()
        ds_cp = ds.copy(deep=deep)
        ds_cp.coords["a"].data[0] = 999

        expected_cp = xr.DataArray(
            xr.IndexVariable("a", np.array([999, 2])),
            coords={"a": [999, 2]},
            dims=["a"],
        )
        assert_identical(ds_cp.coords["a"], expected_cp)

        assert_identical(ds.coords["a"], expected_orig)

    def test_copy_with_data_errors(self) -> None:
        orig = create_test_data()
        new_var1 = np.arange(orig["var1"].size).reshape(orig["var1"].shape)
        with pytest.raises(ValueError, match=r"Data must be dict-like"):
            orig.copy(data=new_var1)  # type: ignore[arg-type]
        with pytest.raises(ValueError, match=r"only contain variables in original"):
            orig.copy(data={"not_in_original": new_var1})
        with pytest.raises(ValueError, match=r"contain all variables in original"):
            orig.copy(data={"var1": new_var1})

    def test_rename(self) -> None:
        data = create_test_data()
        newnames = {
            "var1": "renamed_var1",
            "dim2": "renamed_dim2",
        }
        renamed = data.rename(newnames)

        variables = dict(data.variables)
        for nk, nv in newnames.items():
            variables[nv] = variables.pop(nk)

        for k, v in variables.items():
            dims = list(v.dims)
            for name, newname in newnames.items():
                if name in dims:
                    dims[dims.index(name)] = newname

            assert_equal(
                Variable(dims, v.values, v.attrs),
                renamed[k].variable.to_base_variable(),
            )
            assert v.encoding == renamed[k].encoding
            assert type(v) is type(renamed.variables[k])  # noqa: E721

        assert "var1" not in renamed
        assert "dim2" not in renamed

        with pytest.raises(ValueError, match=r"cannot rename 'not_a_var'"):
            data.rename({"not_a_var": "nada"})

        with pytest.raises(ValueError, match=r"'var1' conflicts"):
            data.rename({"var2": "var1"})

        # verify that we can rename a variable without accessing the data
        var1 = data["var1"]
        data["var1"] = (var1.dims, InaccessibleArray(var1.values))
        renamed = data.rename(newnames)
        with pytest.raises(UnexpectedDataAccess):
            renamed["renamed_var1"].values

        # https://github.com/python/mypy/issues/10008
        renamed_kwargs = data.rename(**newnames)  # type: ignore[arg-type]
        assert_identical(renamed, renamed_kwargs)

    def test_rename_old_name(self) -> None:
        # regtest for GH1477
        data = create_test_data()

        with pytest.raises(ValueError, match=r"'samecol' conflicts"):
            data.rename({"var1": "samecol", "var2": "samecol"})

        # This shouldn't cause any problems.
        data.rename({"var1": "var2", "var2": "var1"})

    def test_rename_same_name(self) -> None:
        data = create_test_data()
        newnames = {"var1": "var1", "dim2": "dim2"}
        with pytest.warns(UserWarning, match="does not create an index anymore"):
            renamed = data.rename(newnames)
        assert_identical(renamed, data)

    def test_rename_dims(self) -> None:
        original = Dataset({"x": ("x", [0, 1, 2]), "y": ("x", [10, 11, 12]), "z": 42})
        expected = Dataset(
            {"x": ("x_new", [0, 1, 2]), "y": ("x_new", [10, 11, 12]), "z": 42}
        )
        # TODO: (benbovy - explicit indexes) update when set_index supports
        # setting index for non-dimension variables
        expected = expected.set_coords("x")
        actual = original.rename_dims({"x": "x_new"})
        assert_identical(expected, actual, check_default_indexes=False)
        actual_2 = original.rename_dims(x="x_new")
        assert_identical(expected, actual_2, check_default_indexes=False)

        # Test to raise ValueError
        dims_dict_bad = {"x_bad": "x_new"}
        with pytest.raises(ValueError):
            original.rename_dims(dims_dict_bad)

        with pytest.raises(ValueError):
            original.rename_dims({"x": "z"})

    def test_rename_vars(self) -> None:
        original = Dataset({"x": ("x", [0, 1, 2]), "y": ("x", [10, 11, 12]), "z": 42})
        expected = Dataset(
            {"x_new": ("x", [0, 1, 2]), "y": ("x", [10, 11, 12]), "z": 42}
        )
        # TODO: (benbovy - explicit indexes) update when set_index supports
        # setting index for non-dimension variables
        expected = expected.set_coords("x_new")
        actual = original.rename_vars({"x": "x_new"})
        assert_identical(expected, actual, check_default_indexes=False)
        actual_2 = original.rename_vars(x="x_new")
        assert_identical(expected, actual_2, check_default_indexes=False)

        # Test to raise ValueError
        names_dict_bad = {"x_bad": "x_new"}
        with pytest.raises(ValueError):
            original.rename_vars(names_dict_bad)

    def test_rename_dimension_coord(self) -> None:
        # rename a dimension corodinate to a non-dimension coordinate
        # should preserve index
        original = Dataset(coords={"x": ("x", [0, 1, 2])})

        actual = original.rename_vars({"x": "x_new"})
        assert "x_new" in actual.xindexes

        actual_2 = original.rename_dims({"x": "x_new"})
        assert "x" in actual_2.xindexes

    def test_rename_dimension_coord_warnings(self) -> None:
        # create a dimension coordinate by renaming a dimension or coordinate
        # should raise a warning (no index created)
        ds = Dataset(coords={"x": ("y", [0, 1])})

        with pytest.warns(
            UserWarning, match="rename 'x' to 'y' does not create an index.*"
        ):
            ds.rename(x="y")

        ds = Dataset(coords={"y": ("x", [0, 1])})

        with pytest.warns(
            UserWarning, match="rename 'x' to 'y' does not create an index.*"
        ):
            ds.rename(x="y")

    def test_rename_multiindex(self) -> None:
        mindex = pd.MultiIndex.from_tuples([([1, 2]), ([3, 4])], names=["a", "b"])
        original = Dataset({}, {"x": mindex})
        expected = Dataset({}, {"x": mindex.rename(["a", "c"])})

        actual = original.rename({"b": "c"})
        assert_identical(expected, actual)

        with pytest.raises(ValueError, match=r"'a' conflicts"):
            with pytest.warns(UserWarning, match="does not create an index anymore"):
                original.rename({"x": "a"})

        with pytest.raises(ValueError, match=r"'x' conflicts"):
            with pytest.warns(UserWarning, match="does not create an index anymore"):
                original.rename({"a": "x"})

        with pytest.raises(ValueError, match=r"'b' conflicts"):
            with pytest.warns(UserWarning, match="does not create an index anymore"):
                original.rename({"a": "b"})

    def test_rename_perserve_attrs_encoding(self) -> None:
        # test propagate attrs/encoding to new variable(s) created from Index object
        original = Dataset(coords={"x": ("x", [0, 1, 2])})
        expected = Dataset(coords={"y": ("y", [0, 1, 2])})
        for ds, dim in zip([original, expected], ["x", "y"]):
            ds[dim].attrs = {"foo": "bar"}
            ds[dim].encoding = {"foo": "bar"}

        actual = original.rename({"x": "y"})
        assert_identical(actual, expected)

    @requires_cftime
    def test_rename_does_not_change_CFTimeIndex_type(self) -> None:
        # make sure CFTimeIndex is not converted to DatetimeIndex #3522

        time = xr.cftime_range(start="2000", periods=6, freq="2MS", calendar="noleap")
        orig = Dataset(coords={"time": time})

        renamed = orig.rename(time="time_new")
        assert "time_new" in renamed.xindexes
        # TODO: benbovy - flexible indexes: update when CFTimeIndex
        # inherits from xarray.Index
        assert isinstance(renamed.xindexes["time_new"].to_pandas_index(), CFTimeIndex)
        assert renamed.xindexes["time_new"].to_pandas_index().name == "time_new"

        # check original has not changed
        assert "time" in orig.xindexes
        assert isinstance(orig.xindexes["time"].to_pandas_index(), CFTimeIndex)
        assert orig.xindexes["time"].to_pandas_index().name == "time"

        # note: rename_dims(time="time_new") drops "ds.indexes"
        renamed = orig.rename_dims()
        assert isinstance(renamed.xindexes["time"].to_pandas_index(), CFTimeIndex)

        renamed = orig.rename_vars()
        assert isinstance(renamed.xindexes["time"].to_pandas_index(), CFTimeIndex)

    def test_rename_does_not_change_DatetimeIndex_type(self) -> None:
        # make sure DatetimeIndex is conderved on rename

        time = pd.date_range(start="2000", periods=6, freq="2MS")
        orig = Dataset(coords={"time": time})

        renamed = orig.rename(time="time_new")
        assert "time_new" in renamed.xindexes
        # TODO: benbovy - flexible indexes: update when DatetimeIndex
        # inherits from xarray.Index?
        assert isinstance(renamed.xindexes["time_new"].to_pandas_index(), DatetimeIndex)
        assert renamed.xindexes["time_new"].to_pandas_index().name == "time_new"

        # check original has not changed
        assert "time" in orig.xindexes
        assert isinstance(orig.xindexes["time"].to_pandas_index(), DatetimeIndex)
        assert orig.xindexes["time"].to_pandas_index().name == "time"

        # note: rename_dims(time="time_new") drops "ds.indexes"
        renamed = orig.rename_dims()
        assert isinstance(renamed.xindexes["time"].to_pandas_index(), DatetimeIndex)

        renamed = orig.rename_vars()
        assert isinstance(renamed.xindexes["time"].to_pandas_index(), DatetimeIndex)

    def test_swap_dims(self) -> None:
        original = Dataset({"x": [1, 2, 3], "y": ("x", list("abc")), "z": 42})
        expected = Dataset({"z": 42}, {"x": ("y", [1, 2, 3]), "y": list("abc")})
        actual = original.swap_dims({"x": "y"})
        assert_identical(expected, actual)
        assert isinstance(actual.variables["y"], IndexVariable)
        assert isinstance(actual.variables["x"], Variable)
        assert actual.xindexes["y"].equals(expected.xindexes["y"])

        roundtripped = actual.swap_dims({"y": "x"})
        assert_identical(original.set_coords("y"), roundtripped)

        with pytest.raises(ValueError, match=r"cannot swap"):
            original.swap_dims({"y": "x"})
        with pytest.raises(ValueError, match=r"replacement dimension"):
            original.swap_dims({"x": "z"})

        expected = Dataset(
            {"y": ("u", list("abc")), "z": 42}, coords={"x": ("u", [1, 2, 3])}
        )
        actual = original.swap_dims({"x": "u"})
        assert_identical(expected, actual)

        # as kwargs
        expected = Dataset(
            {"y": ("u", list("abc")), "z": 42}, coords={"x": ("u", [1, 2, 3])}
        )
        actual = original.swap_dims(x="u")
        assert_identical(expected, actual)

        # handle multiindex case
        idx = pd.MultiIndex.from_arrays([list("aab"), list("yzz")], names=["y1", "y2"])
        original = Dataset({"x": [1, 2, 3], "y": ("x", idx), "z": 42})
        expected = Dataset({"z": 42}, {"x": ("y", [1, 2, 3]), "y": idx})
        actual = original.swap_dims({"x": "y"})
        assert_identical(expected, actual)
        assert isinstance(actual.variables["y"], IndexVariable)
        assert isinstance(actual.variables["x"], Variable)
        assert actual.xindexes["y"].equals(expected.xindexes["y"])

    def test_expand_dims_error(self) -> None:
        original = Dataset(
            {
                "x": ("a", np.random.randn(3)),
                "y": (["b", "a"], np.random.randn(4, 3)),
                "z": ("a", np.random.randn(3)),
            },
            coords={
                "a": np.linspace(0, 1, 3),
                "b": np.linspace(0, 1, 4),
                "c": np.linspace(0, 1, 5),
            },
            attrs={"key": "entry"},
        )

        with pytest.raises(ValueError, match=r"already exists"):
            original.expand_dims(dim=["x"])

        # Make sure it raises true error also for non-dimensional coordinates
        # which has dimension.
        original = original.set_coords("z")
        with pytest.raises(ValueError, match=r"already exists"):
            original.expand_dims(dim=["z"])

        original = Dataset(
            {
                "x": ("a", np.random.randn(3)),
                "y": (["b", "a"], np.random.randn(4, 3)),
                "z": ("a", np.random.randn(3)),
            },
            coords={
                "a": np.linspace(0, 1, 3),
                "b": np.linspace(0, 1, 4),
                "c": np.linspace(0, 1, 5),
            },
            attrs={"key": "entry"},
        )
        with pytest.raises(TypeError, match=r"value of new dimension"):
            original.expand_dims({"d": 3.2})
        with pytest.raises(ValueError, match=r"both keyword and positional"):
            original.expand_dims({"d": 4}, e=4)

    def test_expand_dims_int(self) -> None:
        original = Dataset(
            {"x": ("a", np.random.randn(3)), "y": (["b", "a"], np.random.randn(4, 3))},
            coords={
                "a": np.linspace(0, 1, 3),
                "b": np.linspace(0, 1, 4),
                "c": np.linspace(0, 1, 5),
            },
            attrs={"key": "entry"},
        )

        actual = original.expand_dims(["z"], [1])
        expected = Dataset(
            {
                "x": original["x"].expand_dims("z", 1),
                "y": original["y"].expand_dims("z", 1),
            },
            coords={
                "a": np.linspace(0, 1, 3),
                "b": np.linspace(0, 1, 4),
                "c": np.linspace(0, 1, 5),
            },
            attrs={"key": "entry"},
        )
        assert_identical(expected, actual)
        # make sure squeeze restores the original data set.
        roundtripped = actual.squeeze("z")
        assert_identical(original, roundtripped)

        # another test with a negative axis
        actual = original.expand_dims(["z"], [-1])
        expected = Dataset(
            {
                "x": original["x"].expand_dims("z", -1),
                "y": original["y"].expand_dims("z", -1),
            },
            coords={
                "a": np.linspace(0, 1, 3),
                "b": np.linspace(0, 1, 4),
                "c": np.linspace(0, 1, 5),
            },
            attrs={"key": "entry"},
        )
        assert_identical(expected, actual)
        # make sure squeeze restores the original data set.
        roundtripped = actual.squeeze("z")
        assert_identical(original, roundtripped)

    def test_expand_dims_coords(self) -> None:
        original = Dataset({"x": ("a", np.array([1, 2, 3]))})
        expected = Dataset(
            {"x": (("b", "a"), np.array([[1, 2, 3], [1, 2, 3]]))}, coords={"b": [1, 2]}
        )
        actual = original.expand_dims(dict(b=[1, 2]))
        assert_identical(expected, actual)
        assert "b" not in original._coord_names

    def test_expand_dims_existing_scalar_coord(self) -> None:
        original = Dataset({"x": 1}, {"a": 2})
        expected = Dataset({"x": (("a",), [1])}, {"a": [2]})
        actual = original.expand_dims("a")
        assert_identical(expected, actual)

    def test_isel_expand_dims_roundtrip(self) -> None:
        original = Dataset({"x": (("a",), [1])}, {"a": [2]})
        actual = original.isel(a=0).expand_dims("a")
        assert_identical(actual, original)

    def test_expand_dims_mixed_int_and_coords(self) -> None:
        # Test expanding one dimension to have size > 1 that doesn't have
        # coordinates, and also expanding another dimension to have size > 1
        # that DOES have coordinates.
        original = Dataset(
            {"x": ("a", np.random.randn(3)), "y": (["b", "a"], np.random.randn(4, 3))},
            coords={
                "a": np.linspace(0, 1, 3),
                "b": np.linspace(0, 1, 4),
                "c": np.linspace(0, 1, 5),
            },
        )

        actual = original.expand_dims({"d": 4, "e": ["l", "m", "n"]})

        expected = Dataset(
            {
                "x": xr.DataArray(
                    original["x"].values * np.ones([4, 3, 3]),
                    coords=dict(d=range(4), e=["l", "m", "n"], a=np.linspace(0, 1, 3)),
                    dims=["d", "e", "a"],
                ).drop_vars("d"),
                "y": xr.DataArray(
                    original["y"].values * np.ones([4, 3, 4, 3]),
                    coords=dict(
                        d=range(4),
                        e=["l", "m", "n"],
                        b=np.linspace(0, 1, 4),
                        a=np.linspace(0, 1, 3),
                    ),
                    dims=["d", "e", "b", "a"],
                ).drop_vars("d"),
            },
            coords={"c": np.linspace(0, 1, 5)},
        )
        assert_identical(actual, expected)

    def test_expand_dims_kwargs_python36plus(self) -> None:
        original = Dataset(
            {"x": ("a", np.random.randn(3)), "y": (["b", "a"], np.random.randn(4, 3))},
            coords={
                "a": np.linspace(0, 1, 3),
                "b": np.linspace(0, 1, 4),
                "c": np.linspace(0, 1, 5),
            },
            attrs={"key": "entry"},
        )
        other_way = original.expand_dims(e=["l", "m", "n"])
        other_way_expected = Dataset(
            {
                "x": xr.DataArray(
                    original["x"].values * np.ones([3, 3]),
                    coords=dict(e=["l", "m", "n"], a=np.linspace(0, 1, 3)),
                    dims=["e", "a"],
                ),
                "y": xr.DataArray(
                    original["y"].values * np.ones([3, 4, 3]),
                    coords=dict(
                        e=["l", "m", "n"],
                        b=np.linspace(0, 1, 4),
                        a=np.linspace(0, 1, 3),
                    ),
                    dims=["e", "b", "a"],
                ),
            },
            coords={"c": np.linspace(0, 1, 5)},
            attrs={"key": "entry"},
        )
        assert_identical(other_way_expected, other_way)

    def test_set_index(self) -> None:
        expected = create_test_multiindex()
        mindex = expected["x"].to_index()
        indexes = [mindex.get_level_values(n) for n in mindex.names]
        coords = {idx.name: ("x", idx) for idx in indexes}
        ds = Dataset({}, coords=coords)

        obj = ds.set_index(x=mindex.names)
        assert_identical(obj, expected)

        # ensure pre-existing indexes involved are removed
        # (level_2 should be a coordinate with no index)
        ds = create_test_multiindex()
        coords = {"x": coords["level_1"], "level_2": coords["level_2"]}
        expected = Dataset({}, coords=coords)

        obj = ds.set_index(x="level_1")
        assert_identical(obj, expected)

        # ensure set_index with no existing index and a single data var given
        # doesn't return multi-index
        ds = Dataset(data_vars={"x_var": ("x", [0, 1, 2])})
        expected = Dataset(coords={"x": [0, 1, 2]})
        assert_identical(ds.set_index(x="x_var"), expected)

        with pytest.raises(ValueError, match=r"bar variable\(s\) do not exist"):
            ds.set_index(foo="bar")

        with pytest.raises(ValueError, match=r"dimension mismatch.*"):
            ds.set_index(y="x_var")

    def test_set_index_deindexed_coords(self) -> None:
        # test de-indexed coordinates are converted to base variable
        # https://github.com/pydata/xarray/issues/6969
        one = ["a", "a", "b", "b"]
        two = [1, 2, 1, 2]
        three = ["c", "c", "d", "d"]
        four = [3, 4, 3, 4]

        mindex_12 = pd.MultiIndex.from_arrays([one, two], names=["one", "two"])
        mindex_34 = pd.MultiIndex.from_arrays([three, four], names=["three", "four"])

        ds = xr.Dataset(
            coords={"x": mindex_12, "three": ("x", three), "four": ("x", four)}
        )
        actual = ds.set_index(x=["three", "four"])
        expected = xr.Dataset(
            coords={"x": mindex_34, "one": ("x", one), "two": ("x", two)}
        )
        assert_identical(actual, expected)

    def test_reset_index(self) -> None:
        ds = create_test_multiindex()
        mindex = ds["x"].to_index()
        indexes = [mindex.get_level_values(n) for n in mindex.names]
        coords = {idx.name: ("x", idx) for idx in indexes}
        expected = Dataset({}, coords=coords)

        obj = ds.reset_index("x")
        assert_identical(obj, expected, check_default_indexes=False)
        assert len(obj.xindexes) == 0

        ds = Dataset(coords={"y": ("x", [1, 2, 3])})
        with pytest.raises(ValueError, match=r".*not coordinates with an index"):
            ds.reset_index("y")

    def test_reset_index_keep_attrs(self) -> None:
        coord_1 = DataArray([1, 2], dims=["coord_1"], attrs={"attrs": True})
        ds = Dataset({}, {"coord_1": coord_1})
        obj = ds.reset_index("coord_1")
        assert ds.coord_1.attrs == obj.coord_1.attrs
        assert len(obj.xindexes) == 0

    def test_reset_index_drop_dims(self) -> None:
        ds = Dataset(coords={"x": [1, 2]})
        reset = ds.reset_index("x", drop=True)
        assert len(reset.dims) == 0

    @pytest.mark.parametrize(
        "arg,drop,dropped,converted,renamed",
        [
            ("foo", False, [], [], {"bar": "x"}),
            ("foo", True, ["foo"], [], {"bar": "x"}),
            ("x", False, ["x"], ["foo", "bar"], {}),
            ("x", True, ["x", "foo", "bar"], [], {}),
            (["foo", "bar"], False, ["x"], ["foo", "bar"], {}),
            (["foo", "bar"], True, ["x", "foo", "bar"], [], {}),
            (["x", "foo"], False, ["x"], ["foo", "bar"], {}),
            (["foo", "x"], True, ["x", "foo", "bar"], [], {}),
        ],
    )
    def test_reset_index_drop_convert(
        self, arg, drop, dropped, converted, renamed
    ) -> None:
        # regressions https://github.com/pydata/xarray/issues/6946 and
        # https://github.com/pydata/xarray/issues/6989
        # check that multi-index dimension or level coordinates are dropped, converted
        # from IndexVariable to Variable or renamed to dimension as expected
        midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("foo", "bar"))
        ds = xr.Dataset(coords={"x": midx})
        reset = ds.reset_index(arg, drop=drop)

        for name in dropped:
            assert name not in reset.variables
        for name in converted:
            assert_identical(reset[name].variable, ds[name].variable.to_base_variable())
        for old_name, new_name in renamed.items():
            assert_identical(ds[old_name].variable, reset[new_name].variable)

    def test_reorder_levels(self) -> None:
        ds = create_test_multiindex()
        mindex = ds["x"].to_index()
        midx = mindex.reorder_levels(["level_2", "level_1"])
        expected = Dataset({}, coords={"x": midx})

        # check attrs propagated
        ds["level_1"].attrs["foo"] = "bar"
        expected["level_1"].attrs["foo"] = "bar"

        reindexed = ds.reorder_levels(x=["level_2", "level_1"])
        assert_identical(reindexed, expected)

        ds = Dataset({}, coords={"x": [1, 2]})
        with pytest.raises(ValueError, match=r"has no MultiIndex"):
            ds.reorder_levels(x=["level_1", "level_2"])

    def test_set_xindex(self) -> None:
        ds = Dataset(
            coords={"foo": ("x", ["a", "a", "b", "b"]), "bar": ("x", [0, 1, 2, 3])}
        )

        actual = ds.set_xindex("foo")
        expected = ds.set_index(x="foo").rename_vars(x="foo")
        assert_identical(actual, expected, check_default_indexes=False)

        actual_mindex = ds.set_xindex(["foo", "bar"])
        expected_mindex = ds.set_index(x=["foo", "bar"])
        assert_identical(actual_mindex, expected_mindex)

        class NotAnIndex:
            ...

        with pytest.raises(TypeError, match=".*not a subclass of xarray.Index"):
            ds.set_xindex("foo", NotAnIndex)  # type: ignore

        with pytest.raises(ValueError, match="those variables don't exist"):
            ds.set_xindex("not_a_coordinate", PandasIndex)

        ds["data_var"] = ("x", [1, 2, 3, 4])

        with pytest.raises(ValueError, match="those variables are data variables"):
            ds.set_xindex("data_var", PandasIndex)

        ds2 = Dataset(coords={"x": ("x", [0, 1, 2, 3])})

        with pytest.raises(ValueError, match="those coordinates already have an index"):
            ds2.set_xindex("x", PandasIndex)

    def test_set_xindex_options(self) -> None:
        ds = Dataset(coords={"foo": ("x", ["a", "a", "b", "b"])})

        class IndexWithOptions(Index):
            def __init__(self, opt):
                self.opt = opt

            @classmethod
            def from_variables(cls, variables, options):
                return cls(options["opt"])

        indexed = ds.set_xindex("foo", IndexWithOptions, opt=1)
        assert getattr(indexed.xindexes["foo"], "opt") == 1

    def test_stack(self) -> None:
        ds = Dataset(
            data_vars={"b": (("x", "y"), [[0, 1], [2, 3]])},
            coords={"x": ("x", [0, 1]), "y": ["a", "b"]},
        )

        exp_index = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["x", "y"])
        expected = Dataset(
            data_vars={"b": ("z", [0, 1, 2, 3])},
            coords={"z": exp_index},
        )
        # check attrs propagated
        ds["x"].attrs["foo"] = "bar"
        expected["x"].attrs["foo"] = "bar"

        actual = ds.stack(z=["x", "y"])
        assert_identical(expected, actual)
        assert list(actual.xindexes) == ["z", "x", "y"]

        actual = ds.stack(z=[...])
        assert_identical(expected, actual)

        # non list dims with ellipsis
        actual = ds.stack(z=(...,))
        assert_identical(expected, actual)

        # ellipsis with given dim
        actual = ds.stack(z=[..., "y"])
        assert_identical(expected, actual)

        exp_index = pd.MultiIndex.from_product([["a", "b"], [0, 1]], names=["y", "x"])
        expected = Dataset(
            data_vars={"b": ("z", [0, 2, 1, 3])},
            coords={"z": exp_index},
        )
        expected["x"].attrs["foo"] = "bar"

        actual = ds.stack(z=["y", "x"])
        assert_identical(expected, actual)
        assert list(actual.xindexes) == ["z", "y", "x"]

    @pytest.mark.parametrize(
        "create_index,expected_keys",
        [
            (True, ["z", "x", "y"]),
            (False, []),
            (None, ["z", "x", "y"]),
        ],
    )
    def test_stack_create_index(self, create_index, expected_keys) -> None:
        ds = Dataset(
            data_vars={"b": (("x", "y"), [[0, 1], [2, 3]])},
            coords={"x": ("x", [0, 1]), "y": ["a", "b"]},
        )

        actual = ds.stack(z=["x", "y"], create_index=create_index)
        assert list(actual.xindexes) == expected_keys

        # TODO: benbovy (flexible indexes) - test error multiple indexes found
        # along dimension + create_index=True

    def test_stack_multi_index(self) -> None:
        # multi-index on a dimension to stack is discarded too
        midx = pd.MultiIndex.from_product([["a", "b"], [0, 1]], names=("lvl1", "lvl2"))
        ds = xr.Dataset(
            data_vars={"b": (("x", "y"), [[0, 1], [2, 3], [4, 5], [6, 7]])},
            coords={"x": midx, "y": [0, 1]},
        )
        expected = Dataset(
            data_vars={"b": ("z", [0, 1, 2, 3, 4, 5, 6, 7])},
            coords={
                "x": ("z", np.repeat(midx.values, 2)),
                "lvl1": ("z", np.repeat(midx.get_level_values("lvl1"), 2)),
                "lvl2": ("z", np.repeat(midx.get_level_values("lvl2"), 2)),
                "y": ("z", [0, 1, 0, 1] * 2),
            },
        )
        actual = ds.stack(z=["x", "y"], create_index=False)
        assert_identical(expected, actual)
        assert len(actual.xindexes) == 0

        with pytest.raises(ValueError, match=r"cannot create.*wraps a multi-index"):
            ds.stack(z=["x", "y"], create_index=True)

    def test_stack_non_dim_coords(self) -> None:
        ds = Dataset(
            data_vars={"b": (("x", "y"), [[0, 1], [2, 3]])},
            coords={"x": ("x", [0, 1]), "y": ["a", "b"]},
        ).rename_vars(x="xx")

        exp_index = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["xx", "y"])
        expected = Dataset(
            data_vars={"b": ("z", [0, 1, 2, 3])},
            coords={"z": exp_index},
        )

        actual = ds.stack(z=["x", "y"])
        assert_identical(expected, actual)
        assert list(actual.xindexes) == ["z", "xx", "y"]

    def test_unstack(self) -> None:
        index = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["x", "y"])
        ds = Dataset(data_vars={"b": ("z", [0, 1, 2, 3])}, coords={"z": index})
        expected = Dataset(
            {"b": (("x", "y"), [[0, 1], [2, 3]]), "x": [0, 1], "y": ["a", "b"]}
        )

        # check attrs propagated
        ds["x"].attrs["foo"] = "bar"
        expected["x"].attrs["foo"] = "bar"

        for dim in ["z", ["z"], None]:
            actual = ds.unstack(dim)
            assert_identical(actual, expected)

    def test_unstack_errors(self) -> None:
        ds = Dataset({"x": [1, 2, 3]})
        with pytest.raises(ValueError, match=r"does not contain the dimensions"):
            ds.unstack("foo")
        with pytest.raises(ValueError, match=r".*do not have exactly one multi-index"):
            ds.unstack("x")

    def test_unstack_fill_value(self) -> None:
        ds = xr.Dataset(
            {"var": (("x",), np.arange(6)), "other_var": (("x",), np.arange(3, 9))},
            coords={"x": [0, 1, 2] * 2, "y": (("x",), ["a"] * 3 + ["b"] * 3)},
        )
        # make ds incomplete
        ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"])
        # test fill_value
        actual1 = ds.unstack("index", fill_value=-1)
        expected1 = ds.unstack("index").fillna(-1).astype(int)
        assert actual1["var"].dtype == int
        assert_equal(actual1, expected1)

        actual2 = ds["var"].unstack("index", fill_value=-1)
        expected2 = ds["var"].unstack("index").fillna(-1).astype(int)
        assert_equal(actual2, expected2)

        actual3 = ds.unstack("index", fill_value={"var": -1, "other_var": 1})
        expected3 = ds.unstack("index").fillna({"var": -1, "other_var": 1}).astype(int)
        assert_equal(actual3, expected3)

    @requires_sparse
    def test_unstack_sparse(self) -> None:
        ds = xr.Dataset(
            {"var": (("x",), np.arange(6))},
            coords={"x": [0, 1, 2] * 2, "y": (("x",), ["a"] * 3 + ["b"] * 3)},
        )
        # make ds incomplete
        ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"])
        # test fill_value
        actual1 = ds.unstack("index", sparse=True)
        expected1 = ds.unstack("index")
        assert isinstance(actual1["var"].data, sparse_array_type)
        assert actual1["var"].variable._to_dense().equals(expected1["var"].variable)
        assert actual1["var"].data.density < 1.0

        actual2 = ds["var"].unstack("index", sparse=True)
        expected2 = ds["var"].unstack("index")
        assert isinstance(actual2.data, sparse_array_type)
        assert actual2.variable._to_dense().equals(expected2.variable)
        assert actual2.data.density < 1.0

        mindex = pd.MultiIndex.from_arrays(
            [np.arange(3), np.arange(3)], names=["a", "b"]
        )
        ds_eye = Dataset(
            {"var": (("z", "foo", "bar"), np.ones((3, 4, 5)))},
            coords={"z": mindex, "foo": np.arange(4), "bar": np.arange(5)},
        )
        actual3 = ds_eye.unstack(sparse=True, fill_value=0)
        assert isinstance(actual3["var"].data, sparse_array_type)
        expected3 = xr.Dataset(
            {
                "var": (
                    ("foo", "bar", "a", "b"),
                    np.broadcast_to(np.eye(3, 3), (4, 5, 3, 3)),
                )
            },
            coords={
                "foo": np.arange(4),
                "bar": np.arange(5),
                "a": np.arange(3),
                "b": np.arange(3),
            },
        )
        actual3["var"].data = actual3["var"].data.todense()
        assert_equal(expected3, actual3)

    def test_stack_unstack_fast(self) -> None:
        ds = Dataset(
            {
                "a": ("x", [0, 1]),
                "b": (("x", "y"), [[0, 1], [2, 3]]),
                "x": [0, 1],
                "y": ["a", "b"],
            }
        )
        actual = ds.stack(z=["x", "y"]).unstack("z")
        assert actual.broadcast_equals(ds)

        actual = ds[["b"]].stack(z=["x", "y"]).unstack("z")
        assert actual.identical(ds[["b"]])

    def test_stack_unstack_slow(self) -> None:
        ds = Dataset(
            data_vars={
                "a": ("x", [0, 1]),
                "b": (("x", "y"), [[0, 1], [2, 3]]),
            },
            coords={"x": [0, 1], "y": ["a", "b"]},
        )
        stacked = ds.stack(z=["x", "y"])
        actual = stacked.isel(z=slice(None, None, -1)).unstack("z")
        assert actual.broadcast_equals(ds)

        stacked = ds[["b"]].stack(z=["x", "y"])
        actual = stacked.isel(z=slice(None, None, -1)).unstack("z")
        assert actual.identical(ds[["b"]])

    def test_to_stacked_array_invalid_sample_dims(self) -> None:
        data = xr.Dataset(
            data_vars={"a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), "b": ("x", [6, 7])},
            coords={"y": ["u", "v", "w"]},
        )
        with pytest.raises(ValueError):
            data.to_stacked_array("features", sample_dims=["y"])

    def test_to_stacked_array_name(self) -> None:
        name = "adf9d"

        # make a two dimensional dataset
        a, b = create_test_stacked_array()
        D = xr.Dataset({"a": a, "b": b})
        sample_dims = ["x"]

        y = D.to_stacked_array("features", sample_dims, name=name)
        assert y.name == name

    def test_to_stacked_array_dtype_dims(self) -> None:
        # make a two dimensional dataset
        a, b = create_test_stacked_array()
        D = xr.Dataset({"a": a, "b": b})
        sample_dims = ["x"]
        y = D.to_stacked_array("features", sample_dims)
        assert y.xindexes["features"].to_pandas_index().levels[1].dtype == D.y.dtype
        assert y.dims == ("x", "features")

    def test_to_stacked_array_to_unstacked_dataset(self) -> None:

        # single dimension: regression test for GH4049
        arr = xr.DataArray(np.arange(3), coords=[("x", [0, 1, 2])])
        data = xr.Dataset({"a": arr, "b": arr})
        stacked = data.to_stacked_array("y", sample_dims=["x"])
        unstacked = stacked.to_unstacked_dataset("y")
        assert_identical(unstacked, data)

        # make a two dimensional dataset
        a, b = create_test_stacked_array()
        D = xr.Dataset({"a": a, "b": b})
        sample_dims = ["x"]
        y = D.to_stacked_array("features", sample_dims).transpose("x", "features")

        x = y.to_unstacked_dataset("features")
        assert_identical(D, x)

        # test on just one sample
        x0 = y[0].to_unstacked_dataset("features")
        d0 = D.isel(x=0)
        assert_identical(d0, x0)

    def test_to_stacked_array_to_unstacked_dataset_different_dimension(self) -> None:
        # test when variables have different dimensionality
        a, b = create_test_stacked_array()
        sample_dims = ["x"]
        D = xr.Dataset({"a": a, "b": b.isel(y=0)})

        y = D.to_stacked_array("features", sample_dims)
        x = y.to_unstacked_dataset("features")
        assert_identical(D, x)

    def test_update(self) -> None:
        data = create_test_data(seed=0)
        expected = data.copy()
        var2 = Variable("dim1", np.arange(8))
        actual = data
        actual.update({"var2": var2})
        expected["var2"] = var2
        assert_identical(expected, actual)

        actual = data.copy()
        actual.update(data)
        assert_identical(expected, actual)

        other = Dataset(attrs={"new": "attr"})
        actual = data.copy()
        actual.update(other)
        assert_identical(expected, actual)

    def test_update_overwrite_coords(self) -> None:
        data = Dataset({"a": ("x", [1, 2])}, {"b": 3})
        data.update(Dataset(coords={"b": 4}))
        expected = Dataset({"a": ("x", [1, 2])}, {"b": 4})
        assert_identical(data, expected)

        data = Dataset({"a": ("x", [1, 2])}, {"b": 3})
        data.update(Dataset({"c": 5}, coords={"b": 4}))
        expected = Dataset({"a": ("x", [1, 2]), "c": 5}, {"b": 4})
        assert_identical(data, expected)

        data = Dataset({"a": ("x", [1, 2])}, {"b": 3})
        data.update({"c": DataArray(5, coords={"b": 4})})
        expected = Dataset({"a": ("x", [1, 2]), "c": 5}, {"b": 3})
        assert_identical(data, expected)

    def test_update_multiindex_level(self) -> None:
        data = create_test_multiindex()

        with pytest.raises(
            ValueError, match=r"cannot set or update variable.*corrupt.*index "
        ):
            data.update({"level_1": range(4)})

    def test_update_auto_align(self) -> None:
        ds = Dataset({"x": ("t", [3, 4])}, {"t": [0, 1]})

        expected1 = Dataset(
            {"x": ("t", [3, 4]), "y": ("t", [np.nan, 5])}, {"t": [0, 1]}
        )
        actual1 = ds.copy()
        other1 = {"y": ("t", [5]), "t": [1]}
        with pytest.raises(ValueError, match=r"conflicting sizes"):
            actual1.update(other1)
        actual1.update(Dataset(other1))
        assert_identical(expected1, actual1)

        actual2 = ds.copy()
        other2 = Dataset({"y": ("t", [5]), "t": [100]})
        actual2.update(other2)
        expected2 = Dataset(
            {"x": ("t", [3, 4]), "y": ("t", [np.nan] * 2)}, {"t": [0, 1]}
        )
        assert_identical(expected2, actual2)

    def test_getitem(self) -> None:
        data = create_test_data()
        assert isinstance(data["var1"], DataArray)
        assert_equal(data["var1"].variable, data.variables["var1"])
        with pytest.raises(KeyError):
            data["notfound"]
        with pytest.raises(KeyError):
            data[["var1", "notfound"]]

        actual1 = data[["var1", "var2"]]
        expected1 = Dataset({"var1": data["var1"], "var2": data["var2"]})
        assert_equal(expected1, actual1)

        actual2 = data["numbers"]
        expected2 = DataArray(
            data["numbers"].variable,
            {"dim3": data["dim3"], "numbers": data["numbers"]},
            dims="dim3",
            name="numbers",
        )
        assert_identical(expected2, actual2)

        actual3 = data[dict(dim1=0)]
        expected3 = data.isel(dim1=0)
        assert_identical(expected3, actual3)

    def test_getitem_hashable(self) -> None:
        data = create_test_data()
        data[(3, 4)] = data["var1"] + 1
        expected = data["var1"] + 1
        expected.name = (3, 4)
        assert_identical(expected, data[(3, 4)])
        with pytest.raises(KeyError, match=r"('var1', 'var2')"):
            data[("var1", "var2")]

    def test_getitem_multiple_dtype(self) -> None:
        keys = ["foo", 1]
        dataset = Dataset({key: ("dim0", range(1)) for key in keys})
        assert_identical(dataset, dataset[keys])

    def test_virtual_variables_default_coords(self) -> None:
        dataset = Dataset({"foo": ("x", range(10))})
        expected1 = DataArray(range(10), dims="x", name="x")
        actual1 = dataset["x"]
        assert_identical(expected1, actual1)
        assert isinstance(actual1.variable, IndexVariable)

        actual2 = dataset[["x", "foo"]]
        expected2 = dataset.assign_coords(x=range(10))
        assert_identical(expected2, actual2)

    def test_virtual_variables_time(self) -> None:
        # access virtual variables
        data = create_test_data()
        assert_array_equal(
            data["time.month"].values, data.variables["time"].to_index().month
        )
        assert_array_equal(data["time.season"].values, "DJF")
        # test virtual variable math
        assert_array_equal(data["time.dayofyear"] + 1, 2 + np.arange(20))
        assert_array_equal(np.sin(data["time.dayofyear"]), np.sin(1 + np.arange(20)))
        # ensure they become coordinates
        expected = Dataset({}, {"dayofyear": data["time.dayofyear"]})
        actual = data[["time.dayofyear"]]
        assert_equal(expected, actual)
        # non-coordinate variables
        ds = Dataset({"t": ("x", pd.date_range("2000-01-01", periods=3))})
        assert (ds["t.year"] == 2000).all()

    def test_virtual_variable_same_name(self) -> None:
        # regression test for GH367
        times = pd.date_range("2000-01-01", freq="H", periods=5)
        data = Dataset({"time": times})
        actual = data["time.time"]
        expected = DataArray(times.time, [("time", times)], name="time")
        assert_identical(actual, expected)

    def test_time_season(self) -> None:
        ds = Dataset({"t": pd.date_range("2000-01-01", periods=12, freq="M")})
        seas = ["DJF"] * 2 + ["MAM"] * 3 + ["JJA"] * 3 + ["SON"] * 3 + ["DJF"]
        assert_array_equal(seas, ds["t.season"])

    def test_slice_virtual_variable(self) -> None:
        data = create_test_data()
        assert_equal(
            data["time.dayofyear"][:10].variable, Variable(["time"], 1 + np.arange(10))
        )
        assert_equal(data["time.dayofyear"][0].variable, Variable([], 1))

    def test_setitem(self) -> None:
        # assign a variable
        var = Variable(["dim1"], np.random.randn(8))
        data1 = create_test_data()
        data1["A"] = var
        data2 = data1.copy()
        data2["A"] = var
        assert_identical(data1, data2)
        # assign a dataset array
        dv = 2 * data2["A"]
        data1["B"] = dv.variable
        data2["B"] = dv
        assert_identical(data1, data2)
        # can't assign an ND array without dimensions
        with pytest.raises(ValueError, match=r"without explicit dimension names"):
            data2["C"] = var.values.reshape(2, 4)
        # but can assign a 1D array
        data1["C"] = var.values
        data2["C"] = ("C", var.values)
        assert_identical(data1, data2)
        # can assign a scalar
        data1["scalar"] = 0
        data2["scalar"] = ([], 0)
        assert_identical(data1, data2)
        # can't use the same dimension name as a scalar var
        with pytest.raises(ValueError, match=r"already exists as a scalar"):
            data1["newvar"] = ("scalar", [3, 4, 5])
        # can't resize a used dimension
        with pytest.raises(ValueError, match=r"conflicting dimension sizes"):
            data1["dim1"] = data1["dim1"][:5]
        # override an existing value
        data1["A"] = 3 * data2["A"]
        assert_equal(data1["A"], 3 * data2["A"])
        # can't assign a dataset to a single key
        with pytest.raises(TypeError, match="Cannot assign a Dataset to a single key"):
            data1["D"] = xr.Dataset()

        # test assignment with positional and label-based indexing
        data3 = data1[["var1", "var2"]]
        data3["var3"] = data3.var1.isel(dim1=0)
        data4 = data3.copy()
        err_msg = (
            "can only set locations defined by dictionaries from Dataset.loc. Got: a"
        )
        with pytest.raises(TypeError, match=err_msg):
            data1.loc["a"] = 0
        err_msg = r"Variables \['A', 'B', 'scalar'\] in new values not available in original dataset:"
        with pytest.raises(ValueError, match=err_msg):
            data4[{"dim2": 1}] = data1[{"dim2": 2}]
        err_msg = "Variable 'var3': indexer {'dim2': 0} not available"
        with pytest.raises(ValueError, match=err_msg):
            data1[{"dim2": 0}] = 0.0
        err_msg = "Variable 'var1': indexer {'dim2': 10} not available"
        with pytest.raises(ValueError, match=err_msg):
            data4[{"dim2": 10}] = data3[{"dim2": 2}]
        err_msg = "Variable 'var1': dimension 'dim2' appears in new values"
        with pytest.raises(KeyError, match=err_msg):
            data4[{"dim2": 2}] = data3[{"dim2": [2]}]
        err_msg = (
            "Variable 'var2': dimension order differs between original and new data"
        )
        data3["var2"] = data3["var2"].T
        with pytest.raises(ValueError, match=err_msg):
            data4[{"dim2": [2, 3]}] = data3[{"dim2": [2, 3]}]
        data3["var2"] = data3["var2"].T
        err_msg = r"cannot align objects.*not equal along these coordinates.*"
        with pytest.raises(ValueError, match=err_msg):
            data4[{"dim2": [2, 3]}] = data3[{"dim2": [2, 3, 4]}]
        err_msg = "Dataset assignment only accepts DataArrays, Datasets, and scalars."
        with pytest.raises(TypeError, match=err_msg):
            data4[{"dim2": [2, 3]}] = data3["var1"][{"dim2": [3, 4]}].values
        data5 = data4.astype(str)
        data5["var4"] = data4["var1"]
        err_msg = "could not convert string to float: 'a'"
        with pytest.raises(ValueError, match=err_msg):
            data5[{"dim2": 1}] = "a"

        data4[{"dim2": 0}] = 0.0
        data4[{"dim2": 1}] = data3[{"dim2": 2}]
        data4.loc[{"dim2": 1.5}] = 1.0
        data4.loc[{"dim2": 2.0}] = data3.loc[{"dim2": 2.5}]
        for v, dat3 in data3.items():
            dat4 = data4[v]
            assert_array_equal(dat4[{"dim2": 0}], 0.0)
            assert_array_equal(dat4[{"dim2": 1}], dat3[{"dim2": 2}])
            assert_array_equal(dat4.loc[{"dim2": 1.5}], 1.0)
            assert_array_equal(dat4.loc[{"dim2": 2.0}], dat3.loc[{"dim2": 2.5}])
            unchanged = [1.0, 2.5, 3.0, 3.5, 4.0]
            assert_identical(
                dat4.loc[{"dim2": unchanged}], dat3.loc[{"dim2": unchanged}]
            )

    def test_setitem_pandas(self) -> None:

        ds = self.make_example_math_dataset()
        ds["x"] = np.arange(3)
        ds_copy = ds.copy()
        ds_copy["bar"] = ds["bar"].to_pandas()

        assert_equal(ds, ds_copy)

    def test_setitem_auto_align(self) -> None:
        ds = Dataset()
        ds["x"] = ("y", range(3))
        ds["y"] = 1 + np.arange(3)
        expected = Dataset({"x": ("y", range(3)), "y": 1 + np.arange(3)})
        assert_identical(ds, expected)

        ds["y"] = DataArray(range(3), dims="y")
        expected = Dataset({"x": ("y", range(3))}, {"y": range(3)})
        assert_identical(ds, expected)

        ds["x"] = DataArray([1, 2], coords=[("y", [0, 1])])
        expected = Dataset({"x": ("y", [1, 2, np.nan])}, {"y": range(3)})
        assert_identical(ds, expected)

        ds["x"] = 42
        expected = Dataset({"x": 42, "y": range(3)})
        assert_identical(ds, expected)

        ds["x"] = DataArray([4, 5, 6, 7], coords=[("y", [0, 1, 2, 3])])
        expected = Dataset({"x": ("y", [4, 5, 6])}, {"y": range(3)})
        assert_identical(ds, expected)

    def test_setitem_dimension_override(self) -> None:
        # regression test for GH-3377
        ds = xr.Dataset({"x": [0, 1, 2]})
        ds["x"] = ds["x"][:2]
        expected = Dataset({"x": [0, 1]})
        assert_identical(ds, expected)

        ds = xr.Dataset({"x": [0, 1, 2]})
        ds["x"] = np.array([0, 1])
        assert_identical(ds, expected)

        ds = xr.Dataset({"x": [0, 1, 2]})
        ds.coords["x"] = [0, 1]
        assert_identical(ds, expected)

    def test_setitem_with_coords(self) -> None:
        # Regression test for GH:2068
        ds = create_test_data()

        other = DataArray(
            np.arange(10), dims="dim3", coords={"numbers": ("dim3", np.arange(10))}
        )
        expected = ds.copy()
        expected["var3"] = other.drop_vars("numbers")
        actual = ds.copy()
        actual["var3"] = other
        assert_identical(expected, actual)
        assert "numbers" in other.coords  # should not change other

        # with alignment
        other = ds["var3"].isel(dim3=slice(1, -1))
        other["numbers"] = ("dim3", np.arange(8))
        actual = ds.copy()
        actual["var3"] = other
        assert "numbers" in other.coords  # should not change other
        expected = ds.copy()
        expected["var3"] = ds["var3"].isel(dim3=slice(1, -1))
        assert_identical(expected, actual)

        # with non-duplicate coords
        other = ds["var3"].isel(dim3=slice(1, -1))
        other["numbers"] = ("dim3", np.arange(8))
        other["position"] = ("dim3", np.arange(8))
        actual = ds.copy()
        actual["var3"] = other
        assert "position" in actual
        assert "position" in other.coords

        # assigning a coordinate-only dataarray
        actual = ds.copy()
        other = actual["numbers"]
        other[0] = 10
        actual["numbers"] = other
        assert actual["numbers"][0] == 10

        # GH: 2099
        ds = Dataset(
            {"var": ("x", [1, 2, 3])},
            coords={"x": [0, 1, 2], "z1": ("x", [1, 2, 3]), "z2": ("x", [1, 2, 3])},
        )
        ds["var"] = ds["var"] * 2
        assert np.allclose(ds["var"], [2, 4, 6])

    def test_setitem_align_new_indexes(self) -> None:
        ds = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]})
        ds["bar"] = DataArray([2, 3, 4], [("x", [1, 2, 3])])
        expected = Dataset(
            {"foo": ("x", [1, 2, 3]), "bar": ("x", [np.nan, 2, 3])}, {"x": [0, 1, 2]}
        )
        assert_identical(ds, expected)

    @pytest.mark.parametrize("dtype", [str, bytes])
    def test_setitem_str_dtype(self, dtype) -> None:

        ds = xr.Dataset(coords={"x": np.array(["x", "y"], dtype=dtype)})
        # test Dataset update
        ds["foo"] = xr.DataArray(np.array([0, 0]), dims=["x"])

        assert np.issubdtype(ds.x.dtype, dtype)

    def test_setitem_using_list(self) -> None:

        # assign a list of variables
        var1 = Variable(["dim1"], np.random.randn(8))
        var2 = Variable(["dim1"], np.random.randn(8))
        actual = create_test_data()
        expected = actual.copy()
        expected["A"] = var1
        expected["B"] = var2
        actual[["A", "B"]] = [var1, var2]
        assert_identical(actual, expected)
        # assign a list of dataset arrays
        dv = 2 * expected[["A", "B"]]
        actual[["C", "D"]] = [d.variable for d in dv.data_vars.values()]
        expected[["C", "D"]] = dv
        assert_identical(actual, expected)

    @pytest.mark.parametrize(
        "var_list, data, error_regex",
        [
            (
                ["A", "B"],
                [Variable(["dim1"], np.random.randn(8))],
                r"Different lengths",
            ),
            ([], [Variable(["dim1"], np.random.randn(8))], r"Empty list of variables"),
            (["A", "B"], xr.DataArray([1, 2]), r"assign single DataArray"),
        ],
    )
    def test_setitem_using_list_errors(self, var_list, data, error_regex) -> None:
        actual = create_test_data()
        with pytest.raises(ValueError, match=error_regex):
            actual[var_list] = data

    def test_assign(self) -> None:
        ds = Dataset()
        actual = ds.assign(x=[0, 1, 2], y=2)
        expected = Dataset({"x": [0, 1, 2], "y": 2})
        assert_identical(actual, expected)
        assert list(actual.variables) == ["x", "y"]
        assert_identical(ds, Dataset())

        actual = actual.assign(y=lambda ds: ds.x**2)
        expected = Dataset({"y": ("x", [0, 1, 4]), "x": [0, 1, 2]})
        assert_identical(actual, expected)

        actual = actual.assign_coords(z=2)
        expected = Dataset({"y": ("x", [0, 1, 4])}, {"z": 2, "x": [0, 1, 2]})
        assert_identical(actual, expected)

    def test_assign_coords(self) -> None:
        ds = Dataset()

        actual = ds.assign(x=[0, 1, 2], y=2)
        actual = actual.assign_coords(x=list("abc"))
        expected = Dataset({"x": list("abc"), "y": 2})
        assert_identical(actual, expected)

        actual = ds.assign(x=[0, 1, 2], y=[2, 3])
        actual = actual.assign_coords({"y": [2.0, 3.0]})
        expected = ds.assign(x=[0, 1, 2], y=[2.0, 3.0])
        assert_identical(actual, expected)

    def test_assign_attrs(self) -> None:
        expected = Dataset(attrs=dict(a=1, b=2))
        new = Dataset()
        actual = new.assign_attrs(a=1, b=2)
        assert_identical(actual, expected)
        assert new.attrs == {}

        expected.attrs["c"] = 3
        new_actual = actual.assign_attrs({"c": 3})
        assert_identical(new_actual, expected)
        assert actual.attrs == dict(a=1, b=2)

    def test_assign_multiindex_level(self) -> None:
        data = create_test_multiindex()
        with pytest.raises(
            ValueError, match=r"cannot set or update variable.*corrupt.*index "
        ):
            data.assign(level_1=range(4))
            data.assign_coords(level_1=range(4))

    def test_assign_coords_existing_multiindex(self) -> None:
        data = create_test_multiindex()
        with pytest.warns(FutureWarning, match=r"Updating MultiIndexed coordinate"):
            data.assign_coords(x=range(4))

        with pytest.warns(FutureWarning, match=r"Updating MultiIndexed coordinate"):
            data.assign(x=range(4))

        # https://github.com/pydata/xarray/issues/7097 (coord names updated)
        updated = data.assign_coords(x=range(4))
        assert len(updated.coords) == 1

    def test_assign_all_multiindex_coords(self) -> None:
        data = create_test_multiindex()
        actual = data.assign(x=range(4), level_1=range(4), level_2=range(4))
        # no error but multi-index dropped in favor of single indexes for each level
        assert (
            actual.xindexes["x"]
            is not actual.xindexes["level_1"]
            is not actual.xindexes["level_2"]
        )

    def test_assign_coords_custom_index_side_effect(self) -> None:
        # test that assigning new coordinates do not reset other dimension coord indexes
        # to default (pandas) index (https://github.com/pydata/xarray/issues/7346)
        class CustomIndex(PandasIndex):
            pass

        ds = (
            Dataset(coords={"x": [1, 2, 3]})
            .drop_indexes("x")
            .set_xindex("x", CustomIndex)
        )
        actual = ds.assign_coords(y=[4, 5, 6])
        assert isinstance(actual.xindexes["x"], CustomIndex)

    def test_merge_multiindex_level(self) -> None:
        data = create_test_multiindex()

        other = Dataset({"level_1": ("x", [0, 1])})
        with pytest.raises(ValueError, match=r".*conflicting dimension sizes.*"):
            data.merge(other)

        other = Dataset({"level_1": ("x", range(4))})
        with pytest.raises(
            ValueError, match=r"unable to determine.*coordinates or not.*"
        ):
            data.merge(other)

        # `other` Dataset coordinates are ignored (bug or feature?)
        other = Dataset(coords={"level_1": ("x", range(4))})
        assert_identical(data.merge(other), data)

    def test_setitem_original_non_unique_index(self) -> None:
        # regression test for GH943
        original = Dataset({"data": ("x", np.arange(5))}, coords={"x": [0, 1, 2, 0, 1]})
        expected = Dataset({"data": ("x", np.arange(5))}, {"x": range(5)})

        actual = original.copy()
        actual["x"] = list(range(5))
        assert_identical(actual, expected)

        actual = original.copy()
        actual["x"] = ("x", list(range(5)))
        assert_identical(actual, expected)

        actual = original.copy()
        actual.coords["x"] = list(range(5))
        assert_identical(actual, expected)

    def test_setitem_both_non_unique_index(self) -> None:
        # regression test for GH956
        names = ["joaquin", "manolo", "joaquin"]
        values = np.random.randint(0, 256, (3, 4, 4))
        array = DataArray(
            values, dims=["name", "row", "column"], coords=[names, range(4), range(4)]
        )
        expected = Dataset({"first": array, "second": array})
        actual = array.rename("first").to_dataset()
        actual["second"] = array
        assert_identical(expected, actual)

    def test_setitem_multiindex_level(self) -> None:
        data = create_test_multiindex()
        with pytest.raises(
            ValueError, match=r"cannot set or update variable.*corrupt.*index "
        ):
            data["level_1"] = range(4)

    def test_delitem(self) -> None:
        data = create_test_data()
        all_items = set(data.variables)
        assert set(data.variables) == all_items
        del data["var1"]
        assert set(data.variables) == all_items - {"var1"}
        del data["numbers"]
        assert set(data.variables) == all_items - {"var1", "numbers"}
        assert "numbers" not in data.coords

        expected = Dataset()
        actual = Dataset({"y": ("x", [1, 2])})
        del actual["y"]
        assert_identical(expected, actual)

    def test_delitem_multiindex_level(self) -> None:
        data = create_test_multiindex()
        with pytest.raises(
            ValueError, match=r"cannot remove coordinate.*corrupt.*index "
        ):
            del data["level_1"]

    def test_squeeze(self) -> None:
        data = Dataset({"foo": (["x", "y", "z"], [[[1], [2]]])})
        test_args: list[list] = [[], [["x"]], [["x", "z"]]]
        for args in test_args:

            def get_args(v):
                return [set(args[0]) & set(v.dims)] if args else []

            expected = Dataset(
                {k: v.squeeze(*get_args(v)) for k, v in data.variables.items()}
            )
            expected = expected.set_coords(data.coords)
            assert_identical(expected, data.squeeze(*args))
        # invalid squeeze
        with pytest.raises(ValueError, match=r"cannot select a dimension"):
            data.squeeze("y")

    def test_squeeze_drop(self) -> None:
        data = Dataset({"foo": ("x", [1])}, {"x": [0]})
        expected = Dataset({"foo": 1})
        selected = data.squeeze(drop=True)
        assert_identical(expected, selected)

        expected = Dataset({"foo": 1}, {"x": 0})
        selected = data.squeeze(drop=False)
        assert_identical(expected, selected)

        data = Dataset({"foo": (("x", "y"), [[1]])}, {"x": [0], "y": [0]})
        expected = Dataset({"foo": 1})
        selected = data.squeeze(drop=True)
        assert_identical(expected, selected)

        expected = Dataset({"foo": ("x", [1])}, {"x": [0]})
        selected = data.squeeze(dim="y", drop=True)
        assert_identical(expected, selected)

        data = Dataset({"foo": (("x",), [])}, {"x": []})
        selected = data.squeeze(drop=True)
        assert_identical(data, selected)

    def test_to_array(self) -> None:
        ds = Dataset(
            {"a": 1, "b": ("x", [1, 2, 3])},
            coords={"c": 42},
            attrs={"Conventions": "None"},
        )
        data = [[1, 1, 1], [1, 2, 3]]
        coords = {"c": 42, "variable": ["a", "b"]}
        dims = ("variable", "x")
        expected = DataArray(data, coords, dims, attrs=ds.attrs)
        actual = ds.to_array()
        assert_identical(expected, actual)

        actual = ds.to_array("abc", name="foo")
        expected = expected.rename({"variable": "abc"}).rename("foo")
        assert_identical(expected, actual)

    def test_to_and_from_dataframe(self) -> None:
        x = np.random.randn(10)
        y = np.random.randn(10)
        t = list("abcdefghij")
        ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t)})
        expected = pd.DataFrame(
            np.array([x, y]).T, columns=["a", "b"], index=pd.Index(t, name="t")
        )
        actual = ds.to_dataframe()
        # use the .equals method to check all DataFrame metadata
        assert expected.equals(actual), (expected, actual)

        # verify coords are included
        actual = ds.set_coords("b").to_dataframe()
        assert expected.equals(actual), (expected, actual)

        # check roundtrip
        assert_identical(ds, Dataset.from_dataframe(actual))

        # test a case with a MultiIndex
        w = np.random.randn(2, 3)
        ds = Dataset({"w": (("x", "y"), w)})
        ds["y"] = ("y", list("abc"))
        exp_index = pd.MultiIndex.from_arrays(
            [[0, 0, 0, 1, 1, 1], ["a", "b", "c", "a", "b", "c"]], names=["x", "y"]
        )
        expected = pd.DataFrame(w.reshape(-1), columns=["w"], index=exp_index)
        actual = ds.to_dataframe()
        assert expected.equals(actual)

        # check roundtrip
        assert_identical(ds.assign_coords(x=[0, 1]), Dataset.from_dataframe(actual))

        # Check multiindex reordering
        new_order = ["x", "y"]
        actual = ds.to_dataframe(dim_order=new_order)
        assert expected.equals(actual)

        new_order = ["y", "x"]
        exp_index = pd.MultiIndex.from_arrays(
            [["a", "a", "b", "b", "c", "c"], [0, 1, 0, 1, 0, 1]], names=["y", "x"]
        )
        expected = pd.DataFrame(
            w.transpose().reshape(-1), columns=["w"], index=exp_index
        )
        actual = ds.to_dataframe(dim_order=new_order)
        assert expected.equals(actual)

        invalid_order = ["x"]
        with pytest.raises(
            ValueError, match="does not match the set of dimensions of this"
        ):
            ds.to_dataframe(dim_order=invalid_order)

        invalid_order = ["x", "z"]
        with pytest.raises(
            ValueError, match="does not match the set of dimensions of this"
        ):
            ds.to_dataframe(dim_order=invalid_order)

        # check pathological cases
        df = pd.DataFrame([1])
        actual = Dataset.from_dataframe(df)
        expected = Dataset({0: ("index", [1])}, {"index": [0]})
        assert_identical(expected, actual)

        df = pd.DataFrame()
        actual = Dataset.from_dataframe(df)
        expected = Dataset(coords={"index": []})
        assert_identical(expected, actual)

        # GH697
        df = pd.DataFrame({"A": []})
        actual = Dataset.from_dataframe(df)
        expected = Dataset({"A": DataArray([], dims=("index",))}, {"index": []})
        assert_identical(expected, actual)

        # regression test for GH278
        # use int64 to ensure consistent results for the pandas .equals method
        # on windows (which requires the same dtype)
        ds = Dataset({"x": pd.Index(["bar"]), "a": ("y", np.array([1], "int64"))}).isel(
            x=0
        )
        # use .loc to ensure consistent results on Python 3
        actual = ds.to_dataframe().loc[:, ["a", "x"]]
        expected = pd.DataFrame(
            [[1, "bar"]], index=pd.Index([0], name="y"), columns=["a", "x"]
        )
        assert expected.equals(actual), (expected, actual)

        ds = Dataset({"x": np.array([0], "int64"), "y": np.array([1], "int64")})
        actual = ds.to_dataframe()
        idx = pd.MultiIndex.from_arrays([[0], [1]], names=["x", "y"])
        expected = pd.DataFrame([[]], index=idx)
        assert expected.equals(actual), (expected, actual)

    def test_from_dataframe_categorical(self) -> None:
        cat = pd.CategoricalDtype(
            categories=["foo", "bar", "baz", "qux", "quux", "corge"]
        )
        i1 = pd.Series(["foo", "bar", "foo"], dtype=cat)
        i2 = pd.Series(["bar", "bar", "baz"], dtype=cat)

        df = pd.DataFrame({"i1": i1, "i2": i2, "values": [1, 2, 3]})
        ds = df.set_index("i1").to_xarray()
        assert len(ds["i1"]) == 3

        ds = df.set_index(["i1", "i2"]).to_xarray()
        assert len(ds["i1"]) == 2
        assert len(ds["i2"]) == 2

    @requires_sparse
    def test_from_dataframe_sparse(self) -> None:
        import sparse

        df_base = pd.DataFrame(
            {"x": range(10), "y": list("abcdefghij"), "z": np.arange(0, 100, 10)}
        )

        ds_sparse = Dataset.from_dataframe(df_base.set_index("x"), sparse=True)
        ds_dense = Dataset.from_dataframe(df_base.set_index("x"), sparse=False)
        assert isinstance(ds_sparse["y"].data, sparse.COO)
        assert isinstance(ds_sparse["z"].data, sparse.COO)
        ds_sparse["y"].data = ds_sparse["y"].data.todense()
        ds_sparse["z"].data = ds_sparse["z"].data.todense()
        assert_identical(ds_dense, ds_sparse)

        ds_sparse = Dataset.from_dataframe(df_base.set_index(["x", "y"]), sparse=True)
        ds_dense = Dataset.from_dataframe(df_base.set_index(["x", "y"]), sparse=False)
        assert isinstance(ds_sparse["z"].data, sparse.COO)
        ds_sparse["z"].data = ds_sparse["z"].data.todense()
        assert_identical(ds_dense, ds_sparse)

    def test_to_and_from_empty_dataframe(self) -> None:
        # GH697
        expected = pd.DataFrame({"foo": []})
        ds = Dataset.from_dataframe(expected)
        assert len(ds["foo"]) == 0
        actual = ds.to_dataframe()
        assert len(actual) == 0
        assert expected.equals(actual)

    def test_from_dataframe_multiindex(self) -> None:
        index = pd.MultiIndex.from_product([["a", "b"], [1, 2, 3]], names=["x", "y"])
        df = pd.DataFrame({"z": np.arange(6)}, index=index)

        expected = Dataset(
            {"z": (("x", "y"), [[0, 1, 2], [3, 4, 5]])},
            coords={"x": ["a", "b"], "y": [1, 2, 3]},
        )
        actual = Dataset.from_dataframe(df)
        assert_identical(actual, expected)

        df2 = df.iloc[[3, 2, 1, 0, 4, 5], :]
        actual = Dataset.from_dataframe(df2)
        assert_identical(actual, expected)

        df3 = df.iloc[:4, :]
        expected3 = Dataset(
            {"z": (("x", "y"), [[0, 1, 2], [3, np.nan, np.nan]])},
            coords={"x": ["a", "b"], "y": [1, 2, 3]},
        )
        actual = Dataset.from_dataframe(df3)
        assert_identical(actual, expected3)

        df_nonunique = df.iloc[[0, 0], :]
        with pytest.raises(ValueError, match=r"non-unique MultiIndex"):
            Dataset.from_dataframe(df_nonunique)

    def test_from_dataframe_unsorted_levels(self) -> None:
        # regression test for GH-4186
        index = pd.MultiIndex(
            levels=[["b", "a"], ["foo"]], codes=[[0, 1], [0, 0]], names=["lev1", "lev2"]
        )
        df = pd.DataFrame({"c1": [0, 2], "c2": [1, 3]}, index=index)
        expected = Dataset(
            {
                "c1": (("lev1", "lev2"), [[0], [2]]),
                "c2": (("lev1", "lev2"), [[1], [3]]),
            },
            coords={"lev1": ["b", "a"], "lev2": ["foo"]},
        )
        actual = Dataset.from_dataframe(df)
        assert_identical(actual, expected)

    def test_from_dataframe_non_unique_columns(self) -> None:
        # regression test for GH449
        df = pd.DataFrame(np.zeros((2, 2)))
        df.columns = ["foo", "foo"]
        with pytest.raises(ValueError, match=r"non-unique columns"):
            Dataset.from_dataframe(df)

    def test_convert_dataframe_with_many_types_and_multiindex(self) -> None:
        # regression test for GH737
        df = pd.DataFrame(
            {
                "a": list("abc"),
                "b": list(range(1, 4)),
                "c": np.arange(3, 6).astype("u1"),
                "d": np.arange(4.0, 7.0, dtype="float64"),
                "e": [True, False, True],
                "f": pd.Categorical(list("abc")),
                "g": pd.date_range("20130101", periods=3),
                "h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
            }
        )
        df.index = pd.MultiIndex.from_product([["a"], range(3)], names=["one", "two"])
        roundtripped = Dataset.from_dataframe(df).to_dataframe()
        # we can't do perfectly, but we should be at least as faithful as
        # np.asarray
        expected = df.apply(np.asarray)
        assert roundtripped.equals(expected)

    def test_to_and_from_dict(self) -> None:
        # <xarray.Dataset>
        # Dimensions:  (t: 10)
        # Coordinates:
        #   * t        (t) <U1 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j'
        # Data variables:
        #     a        (t) float64 0.6916 -1.056 -1.163 0.9792 -0.7865 ...
        #     b        (t) float64 1.32 0.1954 1.91 1.39 0.519 -0.2772 ...
        x = np.random.randn(10)
        y = np.random.randn(10)
        t = list("abcdefghij")
        ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t)})
        expected: dict[str, dict[str, Any]] = {
            "coords": {"t": {"dims": ("t",), "data": t, "attrs": {}}},
            "attrs": {},
            "dims": {"t": 10},
            "data_vars": {
                "a": {"dims": ("t",), "data": x.tolist(), "attrs": {}},
                "b": {"dims": ("t",), "data": y.tolist(), "attrs": {}},
            },
        }

        actual = ds.to_dict()

        # check that they are identical
        assert expected == actual

        # check roundtrip
        assert_identical(ds, Dataset.from_dict(actual))

        # check the data=False option
        expected_no_data = expected.copy()
        del expected_no_data["coords"]["t"]["data"]
        del expected_no_data["data_vars"]["a"]["data"]
        del expected_no_data["data_vars"]["b"]["data"]
        endiantype = "<U1" if sys.byteorder == "little" else ">U1"
        expected_no_data["coords"]["t"].update({"dtype": endiantype, "shape": (10,)})
        expected_no_data["data_vars"]["a"].update({"dtype": "float64", "shape": (10,)})
        expected_no_data["data_vars"]["b"].update({"dtype": "float64", "shape": (10,)})
        actual_no_data = ds.to_dict(data=False)
        assert expected_no_data == actual_no_data

        # verify coords are included roundtrip
        expected_ds = ds.set_coords("b")
        actual2 = Dataset.from_dict(expected_ds.to_dict())

        assert_identical(expected_ds, actual2)

        # test some incomplete dicts:
        # this one has no attrs field, the dims are strings, and x, y are
        # np.arrays

        d = {
            "coords": {"t": {"dims": "t", "data": t}},
            "dims": "t",
            "data_vars": {"a": {"dims": "t", "data": x}, "b": {"dims": "t", "data": y}},
        }
        assert_identical(ds, Dataset.from_dict(d))

        # this is kind of a flattened version with no coords, or data_vars
        d = {
            "a": {"dims": "t", "data": x},
            "t": {"data": t, "dims": "t"},
            "b": {"dims": "t", "data": y},
        }
        assert_identical(ds, Dataset.from_dict(d))

        # this one is missing some necessary information
        d = {
            "a": {"data": x},
            "t": {"data": t, "dims": "t"},
            "b": {"dims": "t", "data": y},
        }
        with pytest.raises(
            ValueError, match=r"cannot convert dict without the key 'dims'"
        ):
            Dataset.from_dict(d)

    def test_to_and_from_dict_with_time_dim(self) -> None:
        x = np.random.randn(10, 3)
        y = np.random.randn(10, 3)
        t = pd.date_range("20130101", periods=10)
        lat = [77.7, 83.2, 76]
        ds = Dataset(
            {
                "a": (["t", "lat"], x),
                "b": (["t", "lat"], y),
                "t": ("t", t),
                "lat": ("lat", lat),
            }
        )
        roundtripped = Dataset.from_dict(ds.to_dict())
        assert_identical(ds, roundtripped)

    def test_to_and_from_dict_with_nan_nat(self) -> None:
        x = np.random.randn(10, 3)
        y = np.random.randn(10, 3)
        y[2] = np.nan
        t = pd.Series(pd.date_range("20130101", periods=10))
        t[2] = np.nan

        lat = [77.7, 83.2, 76]
        ds = Dataset(
            {
                "a": (["t", "lat"], x),
                "b": (["t", "lat"], y),
                "t": ("t", t),
                "lat": ("lat", lat),
            }
        )
        roundtripped = Dataset.from_dict(ds.to_dict())
        assert_identical(ds, roundtripped)

    def test_to_dict_with_numpy_attrs(self) -> None:
        # this doesn't need to roundtrip
        x = np.random.randn(10)
        y = np.random.randn(10)
        t = list("abcdefghij")
        attrs = {
            "created": np.float64(1998),
            "coords": np.array([37, -110.1, 100]),
            "maintainer": "bar",
        }
        ds = Dataset({"a": ("t", x, attrs), "b": ("t", y, attrs), "t": ("t", t)})
        expected_attrs = {
            "created": attrs["created"].item(),  # type: ignore[attr-defined]
            "coords": attrs["coords"].tolist(),  # type: ignore[attr-defined]
            "maintainer": "bar",
        }
        actual = ds.to_dict()

        # check that they are identical
        assert expected_attrs == actual["data_vars"]["a"]["attrs"]

    def test_pickle(self) -> None:
        data = create_test_data()
        roundtripped = pickle.loads(pickle.dumps(data))
        assert_identical(data, roundtripped)
        # regression test for #167:
        assert data.dims == roundtripped.dims

    def test_lazy_load(self) -> None:
        store = InaccessibleVariableDataStore()
        create_test_data().dump_to_store(store)

        for decode_cf in [True, False]:
            ds = open_dataset(store, decode_cf=decode_cf)
            with pytest.raises(UnexpectedDataAccess):
                ds.load()
            with pytest.raises(UnexpectedDataAccess):
                ds["var1"].values

            # these should not raise UnexpectedDataAccess:
            ds.isel(time=10)
            ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1)

    def test_dropna(self) -> None:
        x = np.random.randn(4, 4)
        x[::2, 0] = np.nan
        y = np.random.randn(4)
        y[-1] = np.nan
        ds = Dataset({"foo": (("a", "b"), x), "bar": (("b", y))})

        expected = ds.isel(a=slice(1, None, 2))
        actual = ds.dropna("a")
        assert_identical(actual, expected)

        expected = ds.isel(b=slice(1, 3))
        actual = ds.dropna("b")
        assert_identical(actual, expected)

        actual = ds.dropna("b", subset=["foo", "bar"])
        assert_identical(actual, expected)

        expected = ds.isel(b=slice(1, None))
        actual = ds.dropna("b", subset=["foo"])
        assert_identical(actual, expected)

        expected = ds.isel(b=slice(3))
        actual = ds.dropna("b", subset=["bar"])
        assert_identical(actual, expected)

        actual = ds.dropna("a", subset=[])
        assert_identical(actual, ds)

        actual = ds.dropna("a", subset=["bar"])
        assert_identical(actual, ds)

        actual = ds.dropna("a", how="all")
        assert_identical(actual, ds)

        actual = ds.dropna("b", how="all", subset=["bar"])
        expected = ds.isel(b=[0, 1, 2])
        assert_identical(actual, expected)

        actual = ds.dropna("b", thresh=1, subset=["bar"])
        assert_identical(actual, expected)

        actual = ds.dropna("b", thresh=2)
        assert_identical(actual, ds)

        actual = ds.dropna("b", thresh=4)
        expected = ds.isel(b=[1, 2, 3])
        assert_identical(actual, expected)

        actual = ds.dropna("a", thresh=3)
        expected = ds.isel(a=[1, 3])
        assert_identical(actual, ds)

        with pytest.raises(ValueError, match=r"a single dataset dimension"):
            ds.dropna("foo")
        with pytest.raises(ValueError, match=r"invalid how"):
            ds.dropna("a", how="somehow")  # type: ignore
        with pytest.raises(TypeError, match=r"must specify how or thresh"):
            ds.dropna("a", how=None)  # type: ignore

    def test_fillna(self) -> None:
        ds = Dataset({"a": ("x", [np.nan, 1, np.nan, 3])}, {"x": [0, 1, 2, 3]})

        # fill with -1
        actual1 = ds.fillna(-1)
        expected = Dataset({"a": ("x", [-1, 1, -1, 3])}, {"x": [0, 1, 2, 3]})
        assert_identical(expected, actual1)

        actual2 = ds.fillna({"a": -1})
        assert_identical(expected, actual2)

        other = Dataset({"a": -1})
        actual3 = ds.fillna(other)
        assert_identical(expected, actual3)

        actual4 = ds.fillna({"a": other.a})
        assert_identical(expected, actual4)

        # fill with range(4)
        b = DataArray(range(4), coords=[("x", range(4))])
        actual5 = ds.fillna(b)
        expected = b.rename("a").to_dataset()
        assert_identical(expected, actual5)

        actual6 = ds.fillna(expected)
        assert_identical(expected, actual6)

        actual7 = ds.fillna(range(4))
        assert_identical(expected, actual7)

        actual8 = ds.fillna(b[:3])
        assert_identical(expected, actual8)

        # okay to only include some data variables
        ds["b"] = np.nan
        actual9 = ds.fillna({"a": -1})
        expected = Dataset(
            {"a": ("x", [-1, 1, -1, 3]), "b": np.nan}, {"x": [0, 1, 2, 3]}
        )
        assert_identical(expected, actual9)

        # but new data variables is not okay
        with pytest.raises(ValueError, match=r"must be contained"):
            ds.fillna({"x": 0})

        # empty argument should be OK
        result1 = ds.fillna({})
        assert_identical(ds, result1)

        result2 = ds.fillna(Dataset(coords={"c": 42}))
        expected = ds.assign_coords(c=42)
        assert_identical(expected, result2)

        da = DataArray(range(5), name="a", attrs={"attr": "da"})
        actual10 = da.fillna(1)
        assert actual10.name == "a"
        assert actual10.attrs == da.attrs

        ds = Dataset({"a": da}, attrs={"attr": "ds"})
        actual11 = ds.fillna({"a": 1})
        assert actual11.attrs == ds.attrs
        assert actual11.a.name == "a"
        assert actual11.a.attrs == ds.a.attrs

    @pytest.mark.parametrize(
        "func", [lambda x: x.clip(0, 1), lambda x: np.float64(1.0) * x, np.abs, abs]
    )
    def test_propagate_attrs(self, func) -> None:

        da = DataArray(range(5), name="a", attrs={"attr": "da"})
        ds = Dataset({"a": da}, attrs={"attr": "ds"})

        # test defaults
        assert func(ds).attrs == ds.attrs
        with set_options(keep_attrs=False):
            assert func(ds).attrs != ds.attrs
            assert func(ds).a.attrs != ds.a.attrs

        with set_options(keep_attrs=False):
            assert func(ds).attrs != ds.attrs
            assert func(ds).a.attrs != ds.a.attrs

        with set_options(keep_attrs=True):
            assert func(ds).attrs == ds.attrs
            assert func(ds).a.attrs == ds.a.attrs

    def test_where(self) -> None:
        ds = Dataset({"a": ("x", range(5))})
        expected1 = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])})
        actual1 = ds.where(ds > 1)
        assert_identical(expected1, actual1)

        actual2 = ds.where(ds.a > 1)
        assert_identical(expected1, actual2)

        actual3 = ds.where(ds.a.values > 1)
        assert_identical(expected1, actual3)

        actual4 = ds.where(True)
        assert_identical(ds, actual4)

        expected5 = ds.copy(deep=True)
        expected5["a"].values = np.array([np.nan] * 5)
        actual5 = ds.where(False)
        assert_identical(expected5, actual5)

        # 2d
        ds = Dataset({"a": (("x", "y"), [[0, 1], [2, 3]])})
        expected6 = Dataset({"a": (("x", "y"), [[np.nan, 1], [2, 3]])})
        actual6 = ds.where(ds > 0)
        assert_identical(expected6, actual6)

        # attrs
        da = DataArray(range(5), name="a", attrs={"attr": "da"})
        actual7 = da.where(da.values > 1)
        assert actual7.name == "a"
        assert actual7.attrs == da.attrs

        ds = Dataset({"a": da}, attrs={"attr": "ds"})
        actual8 = ds.where(ds > 0)
        assert actual8.attrs == ds.attrs
        assert actual8.a.name == "a"
        assert actual8.a.attrs == ds.a.attrs

        # lambda
        ds = Dataset({"a": ("x", range(5))})
        expected9 = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])})
        actual9 = ds.where(lambda x: x > 1)
        assert_identical(expected9, actual9)

    def test_where_other(self) -> None:
        ds = Dataset({"a": ("x", range(5))}, {"x": range(5)})
        expected = Dataset({"a": ("x", [-1, -1, 2, 3, 4])}, {"x": range(5)})
        actual = ds.where(ds > 1, -1)
        assert_equal(expected, actual)
        assert actual.a.dtype == int

        actual = ds.where(lambda x: x > 1, -1)
        assert_equal(expected, actual)

        actual = ds.where(ds > 1, other=-1, drop=True)
        expected_nodrop = ds.where(ds > 1, -1)
        _, expected = xr.align(actual, expected_nodrop, join="left")
        assert_equal(actual, expected)
        assert actual.a.dtype == int

        with pytest.raises(ValueError, match=r"cannot align .* are not equal"):
            ds.where(ds > 1, ds.isel(x=slice(3)))

        with pytest.raises(ValueError, match=r"exact match required"):
            ds.where(ds > 1, ds.assign(b=2))

    def test_where_drop(self) -> None:
        # if drop=True

        # 1d
        # data array case
        array = DataArray(range(5), coords=[range(5)], dims=["x"])
        expected1 = DataArray(range(5)[2:], coords=[range(5)[2:]], dims=["x"])
        actual1 = array.where(array > 1, drop=True)
        assert_identical(expected1, actual1)

        # dataset case
        ds = Dataset({"a": array})
        expected2 = Dataset({"a": expected1})

        actual2 = ds.where(ds > 1, drop=True)
        assert_identical(expected2, actual2)

        actual3 = ds.where(ds.a > 1, drop=True)
        assert_identical(expected2, actual3)

        with pytest.raises(TypeError, match=r"must be a"):
            ds.where(np.arange(5) > 1, drop=True)

        # 1d with odd coordinates
        array = DataArray(
            np.array([2, 7, 1, 8, 3]), coords=[np.array([3, 1, 4, 5, 9])], dims=["x"]
        )
        expected4 = DataArray(
            np.array([7, 8, 3]), coords=[np.array([1, 5, 9])], dims=["x"]
        )
        actual4 = array.where(array > 2, drop=True)
        assert_identical(expected4, actual4)

        # 1d multiple variables
        ds = Dataset({"a": (("x"), [0, 1, 2, 3]), "b": (("x"), [4, 5, 6, 7])})
        expected5 = Dataset(
            {"a": (("x"), [np.nan, 1, 2, 3]), "b": (("x"), [4, 5, 6, np.nan])}
        )
        actual5 = ds.where((ds > 0) & (ds < 7), drop=True)
        assert_identical(expected5, actual5)

        # 2d
        ds = Dataset({"a": (("x", "y"), [[0, 1], [2, 3]])})
        expected6 = Dataset({"a": (("x", "y"), [[np.nan, 1], [2, 3]])})
        actual6 = ds.where(ds > 0, drop=True)
        assert_identical(expected6, actual6)

        # 2d with odd coordinates
        ds = Dataset(
            {"a": (("x", "y"), [[0, 1], [2, 3]])},
            coords={
                "x": [4, 3],
                "y": [1, 2],
                "z": (["x", "y"], [[np.e, np.pi], [np.pi * np.e, np.pi * 3]]),
            },
        )
        expected7 = Dataset(
            {"a": (("x", "y"), [[3]])},
            coords={"x": [3], "y": [2], "z": (["x", "y"], [[np.pi * 3]])},
        )
        actual7 = ds.where(ds > 2, drop=True)
        assert_identical(expected7, actual7)

        # 2d multiple variables
        ds = Dataset(
            {"a": (("x", "y"), [[0, 1], [2, 3]]), "b": (("x", "y"), [[4, 5], [6, 7]])}
        )
        expected8 = Dataset(
            {
                "a": (("x", "y"), [[np.nan, 1], [2, 3]]),
                "b": (("x", "y"), [[4, 5], [6, 7]]),
            }
        )
        actual8 = ds.where(ds > 0, drop=True)
        assert_identical(expected8, actual8)

        # mixed dimensions: PR#6690, Issue#6227
        ds = xr.Dataset(
            {
                "a": ("x", [1, 2, 3]),
                "b": ("y", [2, 3, 4]),
                "c": (("x", "y"), np.arange(9).reshape((3, 3))),
            }
        )
        expected9 = xr.Dataset(
            {
                "a": ("x", [np.nan, 3]),
                "b": ("y", [np.nan, 3, 4]),
                "c": (("x", "y"), np.arange(3.0, 9.0).reshape((2, 3))),
            }
        )
        actual9 = ds.where(ds > 2, drop=True)
        assert actual9.sizes["x"] == 2
        assert_identical(expected9, actual9)

    def test_where_drop_empty(self) -> None:
        # regression test for GH1341
        array = DataArray(np.random.rand(100, 10), dims=["nCells", "nVertLevels"])
        mask = DataArray(np.zeros((100,), dtype="bool"), dims="nCells")
        actual = array.where(mask, drop=True)
        expected = DataArray(np.zeros((0, 10)), dims=["nCells", "nVertLevels"])
        assert_identical(expected, actual)

    def test_where_drop_no_indexes(self) -> None:
        ds = Dataset({"foo": ("x", [0.0, 1.0])})
        expected = Dataset({"foo": ("x", [1.0])})
        actual = ds.where(ds == 1, drop=True)
        assert_identical(expected, actual)

    def test_reduce(self) -> None:
        data = create_test_data()

        assert len(data.mean().coords) == 0

        actual = data.max()
        expected = Dataset({k: v.max() for k, v in data.data_vars.items()})
        assert_equal(expected, actual)

        assert_equal(data.min(dim=["dim1"]), data.min(dim="dim1"))

        for reduct, expected_dims in [
            ("dim2", ["dim3", "time", "dim1"]),
            (["dim2", "time"], ["dim3", "dim1"]),
            (("dim2", "time"), ["dim3", "dim1"]),
            ((), ["dim2", "dim3", "time", "dim1"]),
        ]:
            actual_dims = list(data.min(dim=reduct).dims)
            assert actual_dims == expected_dims

        assert_equal(data.mean(dim=[]), data)

        with pytest.raises(ValueError):
            data.mean(axis=0)

    def test_reduce_coords(self) -> None:
        # regression test for GH1470
        data = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"b": 4})
        expected = xr.Dataset({"a": 2}, coords={"b": 4})
        actual = data.mean("x")
        assert_identical(actual, expected)

        # should be consistent
        actual = data["a"].mean("x").to_dataset()
        assert_identical(actual, expected)

    def test_mean_uint_dtype(self) -> None:
        data = xr.Dataset(
            {
                "a": (("x", "y"), np.arange(6).reshape(3, 2).astype("uint")),
                "b": (("x",), np.array([0.1, 0.2, np.nan])),
            }
        )
        actual = data.mean("x", skipna=True)
        expected = xr.Dataset(
            {"a": data["a"].mean("x"), "b": data["b"].mean("x", skipna=True)}
        )
        assert_identical(actual, expected)

    def test_reduce_bad_dim(self) -> None:
        data = create_test_data()
        with pytest.raises(ValueError, match=r"Dataset does not contain"):
            data.mean(dim="bad_dim")

    def test_reduce_cumsum(self) -> None:
        data = xr.Dataset(
            {"a": 1, "b": ("x", [1, 2]), "c": (("x", "y"), [[np.nan, 3], [0, 4]])}
        )
        assert_identical(data.fillna(0), data.cumsum("y"))

        expected = xr.Dataset(
            {"a": 1, "b": ("x", [1, 3]), "c": (("x", "y"), [[0, 3], [0, 7]])}
        )
        assert_identical(expected, data.cumsum())

    @pytest.mark.parametrize(
        "reduct, expected",
        [
            ("dim1", ["dim2", "dim3", "time", "dim1"]),
            ("dim2", ["dim3", "time", "dim1", "dim2"]),
            ("dim3", ["dim2", "time", "dim1", "dim3"]),
            ("time", ["dim2", "dim3", "dim1"]),
        ],
    )
    @pytest.mark.parametrize("func", ["cumsum", "cumprod"])
    def test_reduce_cumsum_test_dims(self, reduct, expected, func) -> None:
        data = create_test_data()
        with pytest.raises(ValueError, match=r"Dataset does not contain"):
            getattr(data, func)(dim="bad_dim")

        # ensure dimensions are correct
        actual = getattr(data, func)(dim=reduct).dims
        assert list(actual) == expected

    def test_reduce_non_numeric(self) -> None:
        data1 = create_test_data(seed=44)
        data2 = create_test_data(seed=44)
        add_vars = {"var4": ["dim1", "dim2"], "var5": ["dim1"]}
        for v, dims in sorted(add_vars.items()):
            size = tuple(data1.dims[d] for d in dims)
            data = np.random.randint(0, 100, size=size).astype(np.str_)
            data1[v] = (dims, data, {"foo": "variable"})

        assert "var4" not in data1.mean() and "var5" not in data1.mean()
        assert_equal(data1.mean(), data2.mean())
        assert_equal(data1.mean(dim="dim1"), data2.mean(dim="dim1"))
        assert "var4" not in data1.mean(dim="dim2") and "var5" in data1.mean(dim="dim2")

    @pytest.mark.filterwarnings(
        "ignore:Once the behaviour of DataArray:DeprecationWarning"
    )
    def test_reduce_strings(self) -> None:
        expected = Dataset({"x": "a"})
        ds = Dataset({"x": ("y", ["a", "b"])})
        ds.coords["y"] = [-10, 10]
        actual = ds.min()
        assert_identical(expected, actual)

        expected = Dataset({"x": "b"})
        actual = ds.max()
        assert_identical(expected, actual)

        expected = Dataset({"x": 0})
        actual = ds.argmin()
        assert_identical(expected, actual)

        expected = Dataset({"x": 1})
        actual = ds.argmax()
        assert_identical(expected, actual)

        expected = Dataset({"x": -10})
        actual = ds.idxmin()
        assert_identical(expected, actual)

        expected = Dataset({"x": 10})
        actual = ds.idxmax()
        assert_identical(expected, actual)

        expected = Dataset({"x": b"a"})
        ds = Dataset({"x": ("y", np.array(["a", "b"], "S1"))})
        actual = ds.min()
        assert_identical(expected, actual)

        expected = Dataset({"x": "a"})
        ds = Dataset({"x": ("y", np.array(["a", "b"], "U1"))})
        actual = ds.min()
        assert_identical(expected, actual)

    def test_reduce_dtypes(self) -> None:
        # regression test for GH342
        expected = Dataset({"x": 1})
        actual = Dataset({"x": True}).sum()
        assert_identical(expected, actual)

        # regression test for GH505
        expected = Dataset({"x": 3})
        actual = Dataset({"x": ("y", np.array([1, 2], "uint16"))}).sum()
        assert_identical(expected, actual)

        expected = Dataset({"x": 1 + 1j})
        actual = Dataset({"x": ("y", [1, 1j])}).sum()
        assert_identical(expected, actual)

    def test_reduce_keep_attrs(self) -> None:
        data = create_test_data()
        _attrs = {"attr1": "value1", "attr2": 2929}

        attrs = dict(_attrs)
        data.attrs = attrs

        # Test dropped attrs
        ds = data.mean()
        assert ds.attrs == {}
        for v in ds.data_vars.values():
            assert v.attrs == {}

        # Test kept attrs
        ds = data.mean(keep_attrs=True)
        assert ds.attrs == attrs
        for k, v in ds.data_vars.items():
            assert v.attrs == data[k].attrs

    @pytest.mark.filterwarnings(
        "ignore:Once the behaviour of DataArray:DeprecationWarning"
    )
    def test_reduce_argmin(self) -> None:
        # regression test for #205
        ds = Dataset({"a": ("x", [0, 1])})
        expected = Dataset({"a": ([], 0)})
        actual = ds.argmin()
        assert_identical(expected, actual)

        actual = ds.argmin("x")
        assert_identical(expected, actual)

    def test_reduce_scalars(self) -> None:
        ds = Dataset({"x": ("a", [2, 2]), "y": 2, "z": ("b", [2])})
        expected = Dataset({"x": 0, "y": 0, "z": 0})
        actual = ds.var()
        assert_identical(expected, actual)

        expected = Dataset({"x": 0, "y": 0, "z": ("b", [0])})
        actual = ds.var("a")
        assert_identical(expected, actual)

    def test_reduce_only_one_axis(self) -> None:
        def mean_only_one_axis(x, axis):
            if not isinstance(axis, integer_types):
                raise TypeError("non-integer axis")
            return x.mean(axis)

        ds = Dataset({"a": (["x", "y"], [[0, 1, 2, 3, 4]])})
        expected = Dataset({"a": ("x", [2])})
        actual = ds.reduce(mean_only_one_axis, "y")
        assert_identical(expected, actual)

        with pytest.raises(
            TypeError, match=r"missing 1 required positional argument: 'axis'"
        ):
            ds.reduce(mean_only_one_axis)

    def test_reduce_no_axis(self) -> None:
        def total_sum(x):
            return np.sum(x.flatten())

        ds = Dataset({"a": (["x", "y"], [[0, 1, 2, 3, 4]])})
        expected = Dataset({"a": ((), 10)})
        actual = ds.reduce(total_sum)
        assert_identical(expected, actual)

        with pytest.raises(TypeError, match=r"unexpected keyword argument 'axis'"):
            ds.reduce(total_sum, dim="x")

    def test_reduce_keepdims(self) -> None:
        ds = Dataset(
            {"a": (["x", "y"], [[0, 1, 2, 3, 4]])},
            coords={
                "y": [0, 1, 2, 3, 4],
                "x": [0],
                "lat": (["x", "y"], [[0, 1, 2, 3, 4]]),
                "c": -999.0,
            },
        )

        # Shape should match behaviour of numpy reductions with keepdims=True
        # Coordinates involved in the reduction should be removed
        actual = ds.mean(keepdims=True)
        expected = Dataset(
            {"a": (["x", "y"], np.mean(ds.a, keepdims=True).data)}, coords={"c": ds.c}
        )
        assert_identical(expected, actual)

        actual = ds.mean("x", keepdims=True)
        expected = Dataset(
            {"a": (["x", "y"], np.mean(ds.a, axis=0, keepdims=True).data)},
            coords={"y": ds.y, "c": ds.c},
        )
        assert_identical(expected, actual)

    @pytest.mark.parametrize("skipna", [True, False, None])
    @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
    def test_quantile(self, q, skipna) -> None:
        ds = create_test_data(seed=123)
        ds.var1.data[0, 0] = np.NaN

        for dim in [None, "dim1", ["dim1"]]:
            ds_quantile = ds.quantile(q, dim=dim, skipna=skipna)
            if is_scalar(q):
                assert "quantile" not in ds_quantile.dims
            else:
                assert "quantile" in ds_quantile.dims

            for var, dar in ds.data_vars.items():
                assert var in ds_quantile
                assert_identical(
                    ds_quantile[var], dar.quantile(q, dim=dim, skipna=skipna)
                )
        dim = ["dim1", "dim2"]
        ds_quantile = ds.quantile(q, dim=dim, skipna=skipna)
        assert "dim3" in ds_quantile.dims
        assert all(d not in ds_quantile.dims for d in dim)

    @pytest.mark.parametrize("skipna", [True, False])
    def test_quantile_skipna(self, skipna) -> None:
        q = 0.1
        dim = "time"
        ds = Dataset({"a": ([dim], np.arange(0, 11))})
        ds = ds.where(ds >= 1)

        result = ds.quantile(q=q, dim=dim, skipna=skipna)

        value = 1.9 if skipna else np.nan
        expected = Dataset({"a": value}, coords={"quantile": q})
        assert_identical(result, expected)

    @pytest.mark.parametrize("method", ["midpoint", "lower"])
    def test_quantile_method(self, method) -> None:

        ds = create_test_data(seed=123)
        q = [0.25, 0.5, 0.75]

        result = ds.quantile(q, method=method)

        assert_identical(result.var1, ds.var1.quantile(q, method=method))
        assert_identical(result.var2, ds.var2.quantile(q, method=method))
        assert_identical(result.var3, ds.var3.quantile(q, method=method))

    @pytest.mark.parametrize("method", ["midpoint", "lower"])
    def test_quantile_interpolation_deprecated(self, method) -> None:

        ds = create_test_data(seed=123)
        q = [0.25, 0.5, 0.75]

        with warnings.catch_warnings(record=True) as w:
            ds.quantile(q, interpolation=method)

            # ensure the warning is only raised once
            assert len(w) == 1

        with warnings.catch_warnings(record=True):
            with pytest.raises(TypeError, match="interpolation and method keywords"):
                ds.quantile(q, method=method, interpolation=method)

    @requires_bottleneck
    def test_rank(self) -> None:
        ds = create_test_data(seed=1234)
        # only ds.var3 depends on dim3
        z = ds.rank("dim3")
        assert ["var3"] == list(z.data_vars)
        # same as dataarray version
        x = z.var3
        y = ds.var3.rank("dim3")
        assert_equal(x, y)
        # coordinates stick
        assert list(z.coords) == list(ds.coords)
        assert list(x.coords) == list(y.coords)
        # invalid dim
        with pytest.raises(ValueError, match=r"does not contain"):
            x.rank("invalid_dim")

    def test_rank_use_bottleneck(self) -> None:
        ds = Dataset({"a": ("x", [0, np.nan, 2]), "b": ("y", [4, 6, 3, 4])})
        with xr.set_options(use_bottleneck=False):
            with pytest.raises(RuntimeError):
                ds.rank("x")

    def test_count(self) -> None:
        ds = Dataset({"x": ("a", [np.nan, 1]), "y": 0, "z": np.nan})
        expected = Dataset({"x": 1, "y": 1, "z": 0})
        actual = ds.count()
        assert_identical(expected, actual)

    def test_map(self) -> None:
        data = create_test_data()
        data.attrs["foo"] = "bar"

        assert_identical(data.map(np.mean), data.mean())

        expected = data.mean(keep_attrs=True)
        actual = data.map(lambda x: x.mean(keep_attrs=True), keep_attrs=True)
        assert_identical(expected, actual)

        assert_identical(data.map(lambda x: x, keep_attrs=True), data.drop_vars("time"))

        def scale(x, multiple=1):
            return multiple * x

        actual = data.map(scale, multiple=2)
        assert_equal(actual["var1"], 2 * data["var1"])
        assert_identical(actual["numbers"], data["numbers"])

        actual = data.map(np.asarray)
        expected = data.drop_vars("time")  # time is not used on a data var
        assert_equal(expected, actual)

    def test_apply_pending_deprecated_map(self) -> None:
        data = create_test_data()
        data.attrs["foo"] = "bar"

        with pytest.warns(PendingDeprecationWarning):
            assert_identical(data.apply(np.mean), data.mean())

    def make_example_math_dataset(self):
        variables = {
            "bar": ("x", np.arange(100, 400, 100)),
            "foo": (("x", "y"), 1.0 * np.arange(12).reshape(3, 4)),
        }
        coords = {"abc": ("x", ["a", "b", "c"]), "y": 10 * np.arange(4)}
        ds = Dataset(variables, coords)
        ds["foo"][0, 0] = np.nan
        return ds

    def test_dataset_number_math(self) -> None:
        ds = self.make_example_math_dataset()

        assert_identical(ds, +ds)
        assert_identical(ds, ds + 0)
        assert_identical(ds, 0 + ds)
        assert_identical(ds, ds + np.array(0))
        assert_identical(ds, np.array(0) + ds)

        actual = ds.copy(deep=True)
        actual += 0
        assert_identical(ds, actual)

    def test_unary_ops(self) -> None:
        ds = self.make_example_math_dataset()

        assert_identical(ds.map(abs), abs(ds))
        assert_identical(ds.map(lambda x: x + 4), ds + 4)

        for func in [
            lambda x: x.isnull(),
            lambda x: x.round(),
            lambda x: x.astype(int),
        ]:
            assert_identical(ds.map(func), func(ds))

        assert_identical(ds.isnull(), ~ds.notnull())

        # don't actually patch these methods in
        with pytest.raises(AttributeError):
            ds.item
        with pytest.raises(AttributeError):
            ds.searchsorted

    def test_dataset_array_math(self) -> None:
        ds = self.make_example_math_dataset()

        expected = ds.map(lambda x: x - ds["foo"])
        assert_identical(expected, ds - ds["foo"])
        assert_identical(expected, -ds["foo"] + ds)
        assert_identical(expected, ds - ds["foo"].variable)
        assert_identical(expected, -ds["foo"].variable + ds)
        actual = ds.copy(deep=True)
        actual -= ds["foo"]
        assert_identical(expected, actual)

        expected = ds.map(lambda x: x + ds["bar"])
        assert_identical(expected, ds + ds["bar"])
        actual = ds.copy(deep=True)
        actual += ds["bar"]
        assert_identical(expected, actual)

        expected = Dataset({"bar": ds["bar"] + np.arange(3)})
        assert_identical(expected, ds[["bar"]] + np.arange(3))
        assert_identical(expected, np.arange(3) + ds[["bar"]])

    def test_dataset_dataset_math(self) -> None:
        ds = self.make_example_math_dataset()

        assert_identical(ds, ds + 0 * ds)
        assert_identical(ds, ds + {"foo": 0, "bar": 0})

        expected = ds.map(lambda x: 2 * x)
        assert_identical(expected, 2 * ds)
        assert_identical(expected, ds + ds)
        assert_identical(expected, ds + ds.data_vars)
        assert_identical(expected, ds + dict(ds.data_vars))

        actual = ds.copy(deep=True)
        expected_id = id(actual)
        actual += ds
        assert_identical(expected, actual)
        assert expected_id == id(actual)

        assert_identical(ds == ds, ds.notnull())

        subsampled = ds.isel(y=slice(2))
        expected = 2 * subsampled
        assert_identical(expected, subsampled + ds)
        assert_identical(expected, ds + subsampled)

    def test_dataset_math_auto_align(self) -> None:
        ds = self.make_example_math_dataset()
        subset = ds.isel(y=[1, 3])
        expected = 2 * subset
        actual = ds + subset
        assert_identical(expected, actual)

        actual = ds.isel(y=slice(1)) + ds.isel(y=slice(1, None))
        expected = 2 * ds.drop_sel(y=ds.y)
        assert_equal(actual, expected)

        actual = ds + ds[["bar"]]
        expected = (2 * ds[["bar"]]).merge(ds.coords)
        assert_identical(expected, actual)

        assert_identical(ds + Dataset(), ds.coords.to_dataset())
        assert_identical(Dataset() + Dataset(), Dataset())

        ds2 = Dataset(coords={"bar": 42})
        assert_identical(ds + ds2, ds.coords.merge(ds2))

        # maybe unary arithmetic with empty datasets should raise instead?
        assert_identical(Dataset() + 1, Dataset())

        actual = ds.copy(deep=True)
        other = ds.isel(y=slice(2))
        actual += other
        expected = ds + other.reindex_like(ds)
        assert_identical(expected, actual)

    def test_dataset_math_errors(self) -> None:
        ds = self.make_example_math_dataset()

        with pytest.raises(TypeError):
            ds["foo"] += ds
        with pytest.raises(TypeError):
            ds["foo"].variable += ds
        with pytest.raises(ValueError, match=r"must have the same"):
            ds += ds[["bar"]]

        # verify we can rollback in-place operations if something goes wrong
        # nb. inplace datetime64 math actually will work with an integer array
        # but not floats thanks to numpy's inconsistent handling
        other = DataArray(np.datetime64("2000-01-01"), coords={"c": 2})
        actual = ds.copy(deep=True)
        with pytest.raises(TypeError):
            actual += other
        assert_identical(actual, ds)

    def test_dataset_transpose(self) -> None:
        ds = Dataset(
            {
                "a": (("x", "y"), np.random.randn(3, 4)),
                "b": (("y", "x"), np.random.randn(4, 3)),
            },
            coords={
                "x": range(3),
                "y": range(4),
                "xy": (("x", "y"), np.random.randn(3, 4)),
            },
        )

        actual = ds.transpose()
        expected = Dataset(
            {"a": (("y", "x"), ds.a.values.T), "b": (("x", "y"), ds.b.values.T)},
            coords={
                "x": ds.x.values,
                "y": ds.y.values,
                "xy": (("y", "x"), ds.xy.values.T),
            },
        )
        assert_identical(expected, actual)

        actual = ds.transpose(...)
        expected = ds
        assert_identical(expected, actual)

        actual = ds.transpose("x", "y")
        expected = ds.map(lambda x: x.transpose("x", "y", transpose_coords=True))
        assert_identical(expected, actual)

        ds = create_test_data()
        actual = ds.transpose()
        for k in ds.variables:
            assert actual[k].dims[::-1] == ds[k].dims

        new_order = ("dim2", "dim3", "dim1", "time")
        actual = ds.transpose(*new_order)
        for k in ds.variables:
            expected_dims = tuple(d for d in new_order if d in ds[k].dims)
            assert actual[k].dims == expected_dims

        # same as above but with ellipsis
        new_order = ("dim2", "dim3", "dim1", "time")
        actual = ds.transpose("dim2", "dim3", ...)
        for k in ds.variables:
            expected_dims = tuple(d for d in new_order if d in ds[k].dims)
            assert actual[k].dims == expected_dims

        # test missing dimension, raise error
        with pytest.raises(ValueError):
            ds.transpose(..., "not_a_dim")

        # test missing dimension, ignore error
        actual = ds.transpose(..., "not_a_dim", missing_dims="ignore")
        expected_ell = ds.transpose(...)
        assert_identical(expected_ell, actual)

        # test missing dimension, raise warning
        with pytest.warns(UserWarning):
            actual = ds.transpose(..., "not_a_dim", missing_dims="warn")
            assert_identical(expected_ell, actual)

        assert "T" not in dir(ds)

    def test_dataset_ellipsis_transpose_different_ordered_vars(self) -> None:
        # https://github.com/pydata/xarray/issues/1081#issuecomment-544350457
        ds = Dataset(
            dict(
                a=(("w", "x", "y", "z"), np.ones((2, 3, 4, 5))),
                b=(("x", "w", "y", "z"), np.zeros((3, 2, 4, 5))),
            )
        )
        result = ds.transpose(..., "z", "y")
        assert list(result["a"].dims) == list("wxzy")
        assert list(result["b"].dims) == list("xwzy")

    def test_dataset_retains_period_index_on_transpose(self) -> None:

        ds = create_test_data()
        ds["time"] = pd.period_range("2000-01-01", periods=20)

        transposed = ds.transpose()

        assert isinstance(transposed.time.to_index(), pd.PeriodIndex)

    def test_dataset_diff_n1_simple(self) -> None:
        ds = Dataset({"foo": ("x", [5, 5, 6, 6])})
        actual = ds.diff("x")
        expected = Dataset({"foo": ("x", [0, 1, 0])})
        assert_equal(expected, actual)

    def test_dataset_diff_n1_label(self) -> None:
        ds = Dataset({"foo": ("x", [5, 5, 6, 6])}, {"x": [0, 1, 2, 3]})
        actual = ds.diff("x", label="lower")
        expected = Dataset({"foo": ("x", [0, 1, 0])}, {"x": [0, 1, 2]})
        assert_equal(expected, actual)

        actual = ds.diff("x", label="upper")
        expected = Dataset({"foo": ("x", [0, 1, 0])}, {"x": [1, 2, 3]})
        assert_equal(expected, actual)

    def test_dataset_diff_n1(self) -> None:
        ds = create_test_data(seed=1)
        actual = ds.diff("dim2")
        expected_dict = {}
        expected_dict["var1"] = DataArray(
            np.diff(ds["var1"].values, axis=1),
            {"dim2": ds["dim2"].values[1:]},
            ["dim1", "dim2"],
        )
        expected_dict["var2"] = DataArray(
            np.diff(ds["var2"].values, axis=1),
            {"dim2": ds["dim2"].values[1:]},
            ["dim1", "dim2"],
        )
        expected_dict["var3"] = ds["var3"]
        expected = Dataset(expected_dict, coords={"time": ds["time"].values})
        expected.coords["numbers"] = ("dim3", ds["numbers"].values)
        assert_equal(expected, actual)

    def test_dataset_diff_n2(self) -> None:
        ds = create_test_data(seed=1)
        actual = ds.diff("dim2", n=2)
        expected_dict = {}
        expected_dict["var1"] = DataArray(
            np.diff(ds["var1"].values, axis=1, n=2),
            {"dim2": ds["dim2"].values[2:]},
            ["dim1", "dim2"],
        )
        expected_dict["var2"] = DataArray(
            np.diff(ds["var2"].values, axis=1, n=2),
            {"dim2": ds["dim2"].values[2:]},
            ["dim1", "dim2"],
        )
        expected_dict["var3"] = ds["var3"]
        expected = Dataset(expected_dict, coords={"time": ds["time"].values})
        expected.coords["numbers"] = ("dim3", ds["numbers"].values)
        assert_equal(expected, actual)

    def test_dataset_diff_exception_n_neg(self) -> None:
        ds = create_test_data(seed=1)
        with pytest.raises(ValueError, match=r"must be non-negative"):
            ds.diff("dim2", n=-1)

    def test_dataset_diff_exception_label_str(self) -> None:
        ds = create_test_data(seed=1)
        with pytest.raises(ValueError, match=r"'label' argument has to"):
            ds.diff("dim2", label="raise_me")  # type: ignore[arg-type]

    @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"foo": -10}])
    def test_shift(self, fill_value) -> None:
        coords = {"bar": ("x", list("abc")), "x": [-4, 3, 2]}
        attrs = {"meta": "data"}
        ds = Dataset({"foo": ("x", [1, 2, 3])}, coords, attrs)
        actual = ds.shift(x=1, fill_value=fill_value)
        if fill_value == dtypes.NA:
            # if we supply the default, we expect the missing value for a
            # float array
            fill_value = np.nan
        elif isinstance(fill_value, dict):
            fill_value = fill_value.get("foo", np.nan)
        expected = Dataset({"foo": ("x", [fill_value, 1, 2])}, coords, attrs)
        assert_identical(expected, actual)

        with pytest.raises(ValueError, match=r"dimensions"):
            ds.shift(foo=123)

    def test_roll_coords(self) -> None:
        coords = {"bar": ("x", list("abc")), "x": [-4, 3, 2]}
        attrs = {"meta": "data"}
        ds = Dataset({"foo": ("x", [1, 2, 3])}, coords, attrs)
        actual = ds.roll(x=1, roll_coords=True)

        ex_coords = {"bar": ("x", list("cab")), "x": [2, -4, 3]}
        expected = Dataset({"foo": ("x", [3, 1, 2])}, ex_coords, attrs)
        assert_identical(expected, actual)

        with pytest.raises(ValueError, match=r"dimensions"):
            ds.roll(foo=123, roll_coords=True)

    def test_roll_no_coords(self) -> None:
        coords = {"bar": ("x", list("abc")), "x": [-4, 3, 2]}
        attrs = {"meta": "data"}
        ds = Dataset({"foo": ("x", [1, 2, 3])}, coords, attrs)
        actual = ds.roll(x=1)

        expected = Dataset({"foo": ("x", [3, 1, 2])}, coords, attrs)
        assert_identical(expected, actual)

        with pytest.raises(ValueError, match=r"dimensions"):
            ds.roll(abc=321)

    def test_roll_multidim(self) -> None:
        # regression test for 2445
        arr = xr.DataArray(
            [[1, 2, 3], [4, 5, 6]],
            coords={"x": range(3), "y": range(2)},
            dims=("y", "x"),
        )
        actual = arr.roll(x=1, roll_coords=True)
        expected = xr.DataArray(
            [[3, 1, 2], [6, 4, 5]], coords=[("y", [0, 1]), ("x", [2, 0, 1])]
        )
        assert_identical(expected, actual)

    def test_real_and_imag(self) -> None:
        attrs = {"foo": "bar"}
        ds = Dataset({"x": ((), 1 + 2j, attrs)}, attrs=attrs)

        expected_re = Dataset({"x": ((), 1, attrs)}, attrs=attrs)
        assert_identical(ds.real, expected_re)

        expected_im = Dataset({"x": ((), 2, attrs)}, attrs=attrs)
        assert_identical(ds.imag, expected_im)

    def test_setattr_raises(self) -> None:
        ds = Dataset({}, coords={"scalar": 1}, attrs={"foo": "bar"})
        with pytest.raises(AttributeError, match=r"cannot set attr"):
            ds.scalar = 2
        with pytest.raises(AttributeError, match=r"cannot set attr"):
            ds.foo = 2
        with pytest.raises(AttributeError, match=r"cannot set attr"):
            ds.other = 2

    def test_filter_by_attrs(self) -> None:
        precip = dict(standard_name="convective_precipitation_flux")
        temp0 = dict(standard_name="air_potential_temperature", height="0 m")
        temp10 = dict(standard_name="air_potential_temperature", height="10 m")
        ds = Dataset(
            {
                "temperature_0": (["t"], [0], temp0),
                "temperature_10": (["t"], [0], temp10),
                "precipitation": (["t"], [0], precip),
            },
            coords={"time": (["t"], [0], dict(axis="T", long_name="time_in_seconds"))},
        )

        # Test return empty Dataset.
        ds.filter_by_attrs(standard_name="invalid_standard_name")
        new_ds = ds.filter_by_attrs(standard_name="invalid_standard_name")
        assert not bool(new_ds.data_vars)

        # Test return one DataArray.
        new_ds = ds.filter_by_attrs(standard_name="convective_precipitation_flux")
        assert new_ds["precipitation"].standard_name == "convective_precipitation_flux"

        assert_equal(new_ds["precipitation"], ds["precipitation"])

        # Test filter coordinates
        new_ds = ds.filter_by_attrs(long_name="time_in_seconds")
        assert new_ds["time"].long_name == "time_in_seconds"
        assert not bool(new_ds.data_vars)

        # Test return more than one DataArray.
        new_ds = ds.filter_by_attrs(standard_name="air_potential_temperature")
        assert len(new_ds.data_vars) == 2
        for var in new_ds.data_vars:
            assert new_ds[var].standard_name == "air_potential_temperature"

        # Test callable.
        new_ds = ds.filter_by_attrs(height=lambda v: v is not None)
        assert len(new_ds.data_vars) == 2
        for var in new_ds.data_vars:
            assert new_ds[var].standard_name == "air_potential_temperature"

        new_ds = ds.filter_by_attrs(height="10 m")
        assert len(new_ds.data_vars) == 1
        for var in new_ds.data_vars:
            assert new_ds[var].height == "10 m"

        # Test return empty Dataset due to conflicting filters
        new_ds = ds.filter_by_attrs(
            standard_name="convective_precipitation_flux", height="0 m"
        )
        assert not bool(new_ds.data_vars)

        # Test return one DataArray with two filter conditions
        new_ds = ds.filter_by_attrs(
            standard_name="air_potential_temperature", height="0 m"
        )
        for var in new_ds.data_vars:
            assert new_ds[var].standard_name == "air_potential_temperature"
            assert new_ds[var].height == "0 m"
            assert new_ds[var].height != "10 m"

        # Test return empty Dataset due to conflicting callables
        new_ds = ds.filter_by_attrs(
            standard_name=lambda v: False, height=lambda v: True
        )
        assert not bool(new_ds.data_vars)

    def test_binary_op_propagate_indexes(self) -> None:
        ds = Dataset(
            {"d1": DataArray([1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]})}
        )
        expected = ds.xindexes["x"]
        actual = (ds * 2).xindexes["x"]
        assert expected is actual

    def test_binary_op_join_setting(self) -> None:
        # arithmetic_join applies to data array coordinates
        missing_2 = xr.Dataset({"x": [0, 1]})
        missing_0 = xr.Dataset({"x": [1, 2]})
        with xr.set_options(arithmetic_join="outer"):
            actual = missing_2 + missing_0
        expected = xr.Dataset({"x": [0, 1, 2]})
        assert_equal(actual, expected)

        # arithmetic join also applies to data_vars
        ds1 = xr.Dataset({"foo": 1, "bar": 2})
        ds2 = xr.Dataset({"bar": 2, "baz": 3})
        expected = xr.Dataset({"bar": 4})  # default is inner joining
        actual = ds1 + ds2
        assert_equal(actual, expected)

        with xr.set_options(arithmetic_join="outer"):
            expected = xr.Dataset({"foo": np.nan, "bar": 4, "baz": np.nan})
            actual = ds1 + ds2
            assert_equal(actual, expected)

        with xr.set_options(arithmetic_join="left"):
            expected = xr.Dataset({"foo": np.nan, "bar": 4})
            actual = ds1 + ds2
            assert_equal(actual, expected)

        with xr.set_options(arithmetic_join="right"):
            expected = xr.Dataset({"bar": 4, "baz": np.nan})
            actual = ds1 + ds2
            assert_equal(actual, expected)

    def test_full_like(self) -> None:
        # For more thorough tests, see test_variable.py
        # Note: testing data_vars with mismatched dtypes
        ds = Dataset(
            {
                "d1": DataArray([1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]}),
                "d2": DataArray([1.1, 2.2, 3.3], dims=["y"]),
            },
            attrs={"foo": "bar"},
        )
        actual = full_like(ds, 2)

        expected = ds.copy(deep=True)
        # https://github.com/python/mypy/issues/3004
        expected["d1"].values = [2, 2, 2]  # type: ignore
        expected["d2"].values = [2.0, 2.0, 2.0]  # type: ignore
        assert expected["d1"].dtype == int
        assert expected["d2"].dtype == float
        assert_identical(expected, actual)

        # override dtype
        actual = full_like(ds, fill_value=True, dtype=bool)
        expected = ds.copy(deep=True)
        expected["d1"].values = [True, True, True]  # type: ignore
        expected["d2"].values = [True, True, True]  # type: ignore
        assert expected["d1"].dtype == bool
        assert expected["d2"].dtype == bool
        assert_identical(expected, actual)

        # with multiple fill values
        actual = full_like(ds, {"d1": 1, "d2": 2.3})
        expected = ds.assign(d1=("x", [1, 1, 1]), d2=("y", [2.3, 2.3, 2.3]))
        assert expected["d1"].dtype == int
        assert expected["d2"].dtype == float
        assert_identical(expected, actual)

        # override multiple dtypes
        actual = full_like(ds, fill_value={"d1": 1, "d2": 2.3}, dtype={"d1": bool})
        expected = ds.assign(d1=("x", [True, True, True]), d2=("y", [2.3, 2.3, 2.3]))
        assert expected["d1"].dtype == bool
        assert expected["d2"].dtype == float
        assert_identical(expected, actual)

    def test_combine_first(self) -> None:
        dsx0 = DataArray([0, 0], [("x", ["a", "b"])]).to_dataset(name="dsx0")
        dsx1 = DataArray([1, 1], [("x", ["b", "c"])]).to_dataset(name="dsx1")

        actual = dsx0.combine_first(dsx1)
        expected = Dataset(
            {"dsx0": ("x", [0, 0, np.nan]), "dsx1": ("x", [np.nan, 1, 1])},
            coords={"x": ["a", "b", "c"]},
        )
        assert_equal(actual, expected)
        assert_equal(actual, xr.merge([dsx0, dsx1]))

        # works just like xr.merge([self, other])
        dsy2 = DataArray([2, 2, 2], [("x", ["b", "c", "d"])]).to_dataset(name="dsy2")
        actual = dsx0.combine_first(dsy2)
        expected = xr.merge([dsy2, dsx0])
        assert_equal(actual, expected)

    def test_sortby(self) -> None:
        ds = Dataset(
            {
                "A": DataArray(
                    [[1, 2], [3, 4], [5, 6]], [("x", ["c", "b", "a"]), ("y", [1, 0])]
                ),
                "B": DataArray([[5, 6], [7, 8], [9, 10]], dims=["x", "y"]),
            }
        )

        sorted1d = Dataset(
            {
                "A": DataArray(
                    [[5, 6], [3, 4], [1, 2]], [("x", ["a", "b", "c"]), ("y", [1, 0])]
                ),
                "B": DataArray([[9, 10], [7, 8], [5, 6]], dims=["x", "y"]),
            }
        )

        sorted2d = Dataset(
            {
                "A": DataArray(
                    [[6, 5], [4, 3], [2, 1]], [("x", ["a", "b", "c"]), ("y", [0, 1])]
                ),
                "B": DataArray([[10, 9], [8, 7], [6, 5]], dims=["x", "y"]),
            }
        )

        expected = sorted1d
        dax = DataArray([100, 99, 98], [("x", ["c", "b", "a"])])
        actual = ds.sortby(dax)
        assert_equal(actual, expected)

        # test descending order sort
        actual = ds.sortby(dax, ascending=False)
        assert_equal(actual, ds)

        # test alignment (fills in nan for 'c')
        dax_short = DataArray([98, 97], [("x", ["b", "a"])])
        actual = ds.sortby(dax_short)
        assert_equal(actual, expected)

        # test 1-D lexsort
        # dax0 is sorted first to give indices of [1, 2, 0]
        # and then dax1 would be used to move index 2 ahead of 1
        dax0 = DataArray([100, 95, 95], [("x", ["c", "b", "a"])])
        dax1 = DataArray([0, 1, 0], [("x", ["c", "b", "a"])])
        actual = ds.sortby([dax0, dax1])  # lexsort underneath gives [2, 1, 0]
        assert_equal(actual, expected)

        expected = sorted2d
        # test multi-dim sort by 1D dataarray values
        day = DataArray([90, 80], [("y", [1, 0])])
        actual = ds.sortby([day, dax])
        assert_equal(actual, expected)

        # test exception-raising
        with pytest.raises(KeyError):
            actual = ds.sortby("z")

        with pytest.raises(ValueError) as excinfo:
            actual = ds.sortby(ds["A"])
        assert "DataArray is not 1-D" in str(excinfo.value)

        expected = sorted1d
        actual = ds.sortby("x")
        assert_equal(actual, expected)

        # test pandas.MultiIndex
        indices = (("b", 1), ("b", 0), ("a", 1), ("a", 0))
        midx = pd.MultiIndex.from_tuples(indices, names=["one", "two"])
        ds_midx = Dataset(
            {
                "A": DataArray(
                    [[1, 2], [3, 4], [5, 6], [7, 8]], [("x", midx), ("y", [1, 0])]
                ),
                "B": DataArray([[5, 6], [7, 8], [9, 10], [11, 12]], dims=["x", "y"]),
            }
        )
        actual = ds_midx.sortby("x")
        midx_reversed = pd.MultiIndex.from_tuples(
            tuple(reversed(indices)), names=["one", "two"]
        )
        expected = Dataset(
            {
                "A": DataArray(
                    [[7, 8], [5, 6], [3, 4], [1, 2]],
                    [("x", midx_reversed), ("y", [1, 0])],
                ),
                "B": DataArray([[11, 12], [9, 10], [7, 8], [5, 6]], dims=["x", "y"]),
            }
        )
        assert_equal(actual, expected)

        # multi-dim sort by coordinate objects
        expected = sorted2d
        actual = ds.sortby(["x", "y"])
        assert_equal(actual, expected)

        # test descending order sort
        actual = ds.sortby(["x", "y"], ascending=False)
        assert_equal(actual, ds)

    def test_attribute_access(self) -> None:
        ds = create_test_data(seed=1)
        for key in ["var1", "var2", "var3", "time", "dim1", "dim2", "dim3", "numbers"]:
            assert_equal(ds[key], getattr(ds, key))
            assert key in dir(ds)

        for key in ["dim3", "dim1", "numbers"]:
            assert_equal(ds["var3"][key], getattr(ds.var3, key))
            assert key in dir(ds["var3"])
        # attrs
        assert ds["var3"].attrs["foo"] == ds.var3.foo
        assert "foo" in dir(ds["var3"])

    def test_ipython_key_completion(self) -> None:
        ds = create_test_data(seed=1)
        actual = ds._ipython_key_completions_()
        expected = ["var1", "var2", "var3", "time", "dim1", "dim2", "dim3", "numbers"]
        for item in actual:
            ds[item]  # should not raise
        assert sorted(actual) == sorted(expected)

        # for dataarray
        actual = ds["var3"]._ipython_key_completions_()
        expected = ["dim3", "dim1", "numbers"]
        for item in actual:
            ds["var3"][item]  # should not raise
        assert sorted(actual) == sorted(expected)

        # MultiIndex
        ds_midx = ds.stack(dim12=["dim2", "dim3"])
        actual = ds_midx._ipython_key_completions_()
        expected = [
            "var1",
            "var2",
            "var3",
            "time",
            "dim1",
            "dim2",
            "dim3",
            "numbers",
            "dim12",
        ]
        for item in actual:
            ds_midx[item]  # should not raise
        assert sorted(actual) == sorted(expected)

        # coords
        actual = ds.coords._ipython_key_completions_()
        expected = ["time", "dim1", "dim2", "dim3", "numbers"]
        for item in actual:
            ds.coords[item]  # should not raise
        assert sorted(actual) == sorted(expected)

        actual = ds["var3"].coords._ipython_key_completions_()
        expected = ["dim1", "dim3", "numbers"]
        for item in actual:
            ds["var3"].coords[item]  # should not raise
        assert sorted(actual) == sorted(expected)

        # data_vars
        actual = ds.data_vars._ipython_key_completions_()
        expected = ["var1", "var2", "var3", "dim1"]
        for item in actual:
            ds.data_vars[item]  # should not raise
        assert sorted(actual) == sorted(expected)

    def test_polyfit_output(self) -> None:
        ds = create_test_data(seed=1)

        out = ds.polyfit("dim2", 2, full=False)
        assert "var1_polyfit_coefficients" in out

        out = ds.polyfit("dim1", 2, full=True)
        assert "var1_polyfit_coefficients" in out
        assert "dim1_matrix_rank" in out

        out = ds.polyfit("time", 2)
        assert len(out.data_vars) == 0

    def test_polyfit_warnings(self) -> None:
        ds = create_test_data(seed=1)

        with warnings.catch_warnings(record=True) as ws:
            ds.var1.polyfit("dim2", 10, full=False)
            assert len(ws) == 1
            assert ws[0].category == np.RankWarning
            ds.var1.polyfit("dim2", 10, full=True)
            assert len(ws) == 1

    def test_pad(self) -> None:
        ds = create_test_data(seed=1)
        padded = ds.pad(dim2=(1, 1), constant_values=42)

        assert padded["dim2"].shape == (11,)
        assert padded["var1"].shape == (8, 11)
        assert padded["var2"].shape == (8, 11)
        assert padded["var3"].shape == (10, 8)
        assert dict(padded.dims) == {"dim1": 8, "dim2": 11, "dim3": 10, "time": 20}

        np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42)
        np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan)

    @pytest.mark.parametrize(
        ["keep_attrs", "attrs", "expected"],
        [
            pytest.param(None, {"a": 1, "b": 2}, {"a": 1, "b": 2}, id="default"),
            pytest.param(False, {"a": 1, "b": 2}, {}, id="False"),
            pytest.param(True, {"a": 1, "b": 2}, {"a": 1, "b": 2}, id="True"),
        ],
    )
    def test_pad_keep_attrs(self, keep_attrs, attrs, expected) -> None:
        ds = xr.Dataset(
            {"a": ("x", [1, 2], attrs), "b": ("y", [1, 2], attrs)},
            coords={"c": ("x", [-1, 1], attrs), "d": ("y", [-1, 1], attrs)},
            attrs=attrs,
        )
        expected = xr.Dataset(
            {"a": ("x", [0, 1, 2, 0], expected), "b": ("y", [1, 2], attrs)},
            coords={
                "c": ("x", [np.nan, -1, 1, np.nan], expected),
                "d": ("y", [-1, 1], attrs),
            },
            attrs=expected,
        )

        keep_attrs_ = "default" if keep_attrs is None else keep_attrs

        with set_options(keep_attrs=keep_attrs_):
            actual = ds.pad({"x": (1, 1)}, mode="constant", constant_values=0)
            xr.testing.assert_identical(actual, expected)

        actual = ds.pad(
            {"x": (1, 1)}, mode="constant", constant_values=0, keep_attrs=keep_attrs
        )
        xr.testing.assert_identical(actual, expected)

    def test_astype_attrs(self) -> None:
        data = create_test_data(seed=123)
        data.attrs["foo"] = "bar"

        assert data.attrs == data.astype(float).attrs
        assert data.var1.attrs == data.astype(float).var1.attrs
        assert not data.astype(float, keep_attrs=False).attrs
        assert not data.astype(float, keep_attrs=False).var1.attrs

    @pytest.mark.parametrize("parser", ["pandas", "python"])
    @pytest.mark.parametrize(
        "engine", ["python", None, pytest.param("numexpr", marks=[requires_numexpr])]
    )
    @pytest.mark.parametrize(
        "backend", ["numpy", pytest.param("dask", marks=[requires_dask])]
    )
    def test_query(self, backend, engine, parser) -> None:
        """Test querying a dataset."""

        # setup test data
        np.random.seed(42)
        a = np.arange(0, 10, 1)
        b = np.random.randint(0, 100, size=10)
        c = np.linspace(0, 1, 20)
        d = np.random.choice(["foo", "bar", "baz"], size=30, replace=True).astype(
            object
        )
        e = np.arange(0, 10 * 20).reshape(10, 20)
        f = np.random.normal(0, 1, size=(10, 20, 30))
        if backend == "numpy":
            ds = Dataset(
                {
                    "a": ("x", a),
                    "b": ("x", b),
                    "c": ("y", c),
                    "d": ("z", d),
                    "e": (("x", "y"), e),
                    "f": (("x", "y", "z"), f),
                }
            )
        elif backend == "dask":
            ds = Dataset(
                {
                    "a": ("x", da.from_array(a, chunks=3)),
                    "b": ("x", da.from_array(b, chunks=3)),
                    "c": ("y", da.from_array(c, chunks=7)),
                    "d": ("z", da.from_array(d, chunks=12)),
                    "e": (("x", "y"), da.from_array(e, chunks=(3, 7))),
                    "f": (("x", "y", "z"), da.from_array(f, chunks=(3, 7, 12))),
                }
            )

        # query single dim, single variable
        actual = ds.query(x="a > 5", engine=engine, parser=parser)
        expect = ds.isel(x=(a > 5))
        assert_identical(expect, actual)

        # query single dim, single variable, via dict
        actual = ds.query(dict(x="a > 5"), engine=engine, parser=parser)
        expect = ds.isel(dict(x=(a > 5)))
        assert_identical(expect, actual)

        # query single dim, single variable
        actual = ds.query(x="b > 50", engine=engine, parser=parser)
        expect = ds.isel(x=(b > 50))
        assert_identical(expect, actual)

        # query single dim, single variable
        actual = ds.query(y="c < .5", engine=engine, parser=parser)
        expect = ds.isel(y=(c < 0.5))
        assert_identical(expect, actual)

        # query single dim, single string variable
        if parser == "pandas":
            # N.B., this query currently only works with the pandas parser
            # xref https://github.com/pandas-dev/pandas/issues/40436
            actual = ds.query(z='d == "bar"', engine=engine, parser=parser)
            expect = ds.isel(z=(d == "bar"))
            assert_identical(expect, actual)

        # query single dim, multiple variables
        actual = ds.query(x="(a > 5) & (b > 50)", engine=engine, parser=parser)
        expect = ds.isel(x=((a > 5) & (b > 50)))
        assert_identical(expect, actual)

        # query single dim, multiple variables with computation
        actual = ds.query(x="(a * b) > 250", engine=engine, parser=parser)
        expect = ds.isel(x=(a * b) > 250)
        assert_identical(expect, actual)

        # check pandas query syntax is supported
        if parser == "pandas":
            actual = ds.query(x="(a > 5) and (b > 50)", engine=engine, parser=parser)
            expect = ds.isel(x=((a > 5) & (b > 50)))
            assert_identical(expect, actual)

        # query multiple dims via kwargs
        actual = ds.query(x="a > 5", y="c < .5", engine=engine, parser=parser)
        expect = ds.isel(x=(a > 5), y=(c < 0.5))
        assert_identical(expect, actual)

        # query multiple dims via kwargs
        if parser == "pandas":
            actual = ds.query(
                x="a > 5", y="c < .5", z="d == 'bar'", engine=engine, parser=parser
            )
            expect = ds.isel(x=(a > 5), y=(c < 0.5), z=(d == "bar"))
            assert_identical(expect, actual)

        # query multiple dims via dict
        actual = ds.query(dict(x="a > 5", y="c < .5"), engine=engine, parser=parser)
        expect = ds.isel(dict(x=(a > 5), y=(c < 0.5)))
        assert_identical(expect, actual)

        # query multiple dims via dict
        if parser == "pandas":
            actual = ds.query(
                dict(x="a > 5", y="c < .5", z="d == 'bar'"),
                engine=engine,
                parser=parser,
            )
            expect = ds.isel(dict(x=(a > 5), y=(c < 0.5), z=(d == "bar")))
            assert_identical(expect, actual)

        # test error handling
        with pytest.raises(ValueError):
            ds.query("a > 5")  # type: ignore # must be dict or kwargs
        with pytest.raises(ValueError):
            ds.query(x=(a > 5))
        with pytest.raises(IndexError):
            ds.query(y="a > 5")  # wrong length dimension
        with pytest.raises(IndexError):
            ds.query(x="c < .5")  # wrong length dimension
        with pytest.raises(IndexError):
            ds.query(x="e > 100")  # wrong number of dimensions
        with pytest.raises(UndefinedVariableError):
            ds.query(x="spam > 50")  # name not present


# pytest tests — new tests should go here, rather than in the class.


@pytest.mark.parametrize("test_elements", ([1, 2], np.array([1, 2]), DataArray([1, 2])))
def test_isin(test_elements, backend) -> None:
    expected = Dataset(
        data_vars={
            "var1": (("dim1",), [0, 1]),
            "var2": (("dim1",), [1, 1]),
            "var3": (("dim1",), [0, 1]),
        }
    ).astype("bool")

    if backend == "dask":
        expected = expected.chunk()

    result = Dataset(
        data_vars={
            "var1": (("dim1",), [0, 1]),
            "var2": (("dim1",), [1, 2]),
            "var3": (("dim1",), [0, 1]),
        }
    ).isin(test_elements)

    assert_equal(result, expected)


def test_isin_dataset() -> None:
    ds = Dataset({"x": [1, 2]})
    with pytest.raises(TypeError):
        ds.isin(ds)


@pytest.mark.parametrize(
    "unaligned_coords",
    (
        {"x": [2, 1, 0]},
        {"x": (["x"], np.asarray([2, 1, 0]))},
        {"x": (["x"], np.asarray([1, 2, 0]))},
        {"x": pd.Index([2, 1, 0])},
        {"x": Variable(dims="x", data=[0, 2, 1])},
        {"x": IndexVariable(dims="x", data=[0, 1, 2])},
        {"y": 42},
        {"y": ("x", [2, 1, 0])},
        {"y": ("x", np.asarray([2, 1, 0]))},
        {"y": (["x"], np.asarray([2, 1, 0]))},
    ),
)
@pytest.mark.parametrize("coords", ({"x": ("x", [0, 1, 2])}, {"x": [0, 1, 2]}))
def test_dataset_constructor_aligns_to_explicit_coords(
    unaligned_coords, coords
) -> None:

    a = xr.DataArray([1, 2, 3], dims=["x"], coords=unaligned_coords)

    expected = xr.Dataset(coords=coords)
    expected["a"] = a

    result = xr.Dataset({"a": a}, coords=coords)

    assert_equal(expected, result)


def test_error_message_on_set_supplied() -> None:
    with pytest.raises(TypeError, match="has invalid type <class 'set'>"):
        xr.Dataset(dict(date=[1, 2, 3], sec={4}))


@pytest.mark.parametrize("unaligned_coords", ({"y": ("b", np.asarray([2, 1, 0]))},))
def test_constructor_raises_with_invalid_coords(unaligned_coords) -> None:

    with pytest.raises(ValueError, match="not a subset of the DataArray dimensions"):
        xr.DataArray([1, 2, 3], dims=["x"], coords=unaligned_coords)


@pytest.mark.parametrize("ds", [3], indirect=True)
def test_dir_expected_attrs(ds) -> None:

    some_expected_attrs = {"pipe", "mean", "isnull", "var1", "dim2", "numbers"}
    result = dir(ds)
    assert set(result) >= some_expected_attrs


def test_dir_non_string(ds) -> None:
    # add a numbered key to ensure this doesn't break dir
    ds[5] = "foo"
    result = dir(ds)
    assert 5 not in result

    # GH2172
    sample_data = np.random.uniform(size=[2, 2000, 10000])
    x = xr.Dataset({"sample_data": (sample_data.shape, sample_data)})
    x2 = x["sample_data"]
    dir(x2)


def test_dir_unicode(ds) -> None:
    ds["unicode"] = "uni"
    result = dir(ds)
    assert "unicode" in result


def test_raise_no_warning_for_nan_in_binary_ops() -> None:
    with assert_no_warnings():
        Dataset(data_vars={"x": ("y", [1, 2, np.NaN])}) > 0


@pytest.mark.filterwarnings("error")
@pytest.mark.parametrize("ds", (2,), indirect=True)
def test_raise_no_warning_assert_close(ds) -> None:
    assert_allclose(ds, ds)


@pytest.mark.parametrize("dask", [True, False])
@pytest.mark.parametrize("edge_order", [1, 2])
def test_differentiate(dask, edge_order) -> None:
    rs = np.random.RandomState(42)
    coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8]

    da = xr.DataArray(
        rs.randn(8, 6),
        dims=["x", "y"],
        coords={"x": coord, "z": 3, "x2d": (("x", "y"), rs.randn(8, 6))},
    )
    if dask and has_dask:
        da = da.chunk({"x": 4})

    ds = xr.Dataset({"var": da})

    # along x
    actual = da.differentiate("x", edge_order)
    expected_x = xr.DataArray(
        np.gradient(da, da["x"], axis=0, edge_order=edge_order),
        dims=da.dims,
        coords=da.coords,
    )
    assert_equal(expected_x, actual)
    assert_equal(
        ds["var"].differentiate("x", edge_order=edge_order),
        ds.differentiate("x", edge_order=edge_order)["var"],
    )
    # coordinate should not change
    assert_equal(da["x"], actual["x"])

    # along y
    actual = da.differentiate("y", edge_order)
    expected_y = xr.DataArray(
        np.gradient(da, da["y"], axis=1, edge_order=edge_order),
        dims=da.dims,
        coords=da.coords,
    )
    assert_equal(expected_y, actual)
    assert_equal(actual, ds.differentiate("y", edge_order=edge_order)["var"])
    assert_equal(
        ds["var"].differentiate("y", edge_order=edge_order),
        ds.differentiate("y", edge_order=edge_order)["var"],
    )

    with pytest.raises(ValueError):
        da.differentiate("x2d")


@pytest.mark.parametrize("dask", [True, False])
def test_differentiate_datetime(dask) -> None:
    rs = np.random.RandomState(42)
    coord = np.array(
        [
            "2004-07-13",
            "2006-01-13",
            "2010-08-13",
            "2010-09-13",
            "2010-10-11",
            "2010-12-13",
            "2011-02-13",
            "2012-08-13",
        ],
        dtype="datetime64",
    )

    da = xr.DataArray(
        rs.randn(8, 6),
        dims=["x", "y"],
        coords={"x": coord, "z": 3, "x2d": (("x", "y"), rs.randn(8, 6))},
    )
    if dask and has_dask:
        da = da.chunk({"x": 4})

    # along x
    actual = da.differentiate("x", edge_order=1, datetime_unit="D")
    expected_x = xr.DataArray(
        np.gradient(
            da, da["x"].variable._to_numeric(datetime_unit="D"), axis=0, edge_order=1
        ),
        dims=da.dims,
        coords=da.coords,
    )
    assert_equal(expected_x, actual)

    actual2 = da.differentiate("x", edge_order=1, datetime_unit="h")
    assert np.allclose(actual, actual2 * 24)

    # for datetime variable
    actual = da["x"].differentiate("x", edge_order=1, datetime_unit="D")
    assert np.allclose(actual, 1.0)

    # with different date unit
    da = xr.DataArray(coord.astype("datetime64[ms]"), dims=["x"], coords={"x": coord})
    actual = da.differentiate("x", edge_order=1)
    assert np.allclose(actual, 1.0)


@pytest.mark.skipif(not has_cftime, reason="Test requires cftime.")
@pytest.mark.parametrize("dask", [True, False])
def test_differentiate_cftime(dask) -> None:
    rs = np.random.RandomState(42)
    coord = xr.cftime_range("2000", periods=8, freq="2M")

    da = xr.DataArray(
        rs.randn(8, 6),
        coords={"time": coord, "z": 3, "t2d": (("time", "y"), rs.randn(8, 6))},
        dims=["time", "y"],
    )

    if dask and has_dask:
        da = da.chunk({"time": 4})

    actual = da.differentiate("time", edge_order=1, datetime_unit="D")
    expected_data = np.gradient(
        da, da["time"].variable._to_numeric(datetime_unit="D"), axis=0, edge_order=1
    )
    expected = xr.DataArray(expected_data, coords=da.coords, dims=da.dims)
    assert_equal(expected, actual)

    actual2 = da.differentiate("time", edge_order=1, datetime_unit="h")
    assert_allclose(actual, actual2 * 24)

    # Test the differentiation of datetimes themselves
    actual = da["time"].differentiate("time", edge_order=1, datetime_unit="D")
    assert_allclose(actual, xr.ones_like(da["time"]).astype(float))


@pytest.mark.parametrize("dask", [True, False])
def test_integrate(dask) -> None:
    rs = np.random.RandomState(42)
    coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8]

    da = xr.DataArray(
        rs.randn(8, 6),
        dims=["x", "y"],
        coords={
            "x": coord,
            "x2": (("x",), rs.randn(8)),
            "z": 3,
            "x2d": (("x", "y"), rs.randn(8, 6)),
        },
    )
    if dask and has_dask:
        da = da.chunk({"x": 4})

    ds = xr.Dataset({"var": da})

    # along x
    actual = da.integrate("x")
    # coordinate that contains x should be dropped.
    expected_x = xr.DataArray(
        np.trapz(da.compute(), da["x"], axis=0),
        dims=["y"],
        coords={k: v for k, v in da.coords.items() if "x" not in v.dims},
    )
    assert_allclose(expected_x, actual.compute())
    assert_equal(ds["var"].integrate("x"), ds.integrate("x")["var"])

    # make sure result is also a dask array (if the source is dask array)
    assert isinstance(actual.data, type(da.data))

    # along y
    actual = da.integrate("y")
    expected_y = xr.DataArray(
        np.trapz(da, da["y"], axis=1),
        dims=["x"],
        coords={k: v for k, v in da.coords.items() if "y" not in v.dims},
    )
    assert_allclose(expected_y, actual.compute())
    assert_equal(actual, ds.integrate("y")["var"])
    assert_equal(ds["var"].integrate("y"), ds.integrate("y")["var"])

    # along x and y
    actual = da.integrate(("y", "x"))
    assert actual.ndim == 0

    with pytest.raises(ValueError):
        da.integrate("x2d")


@requires_scipy
@pytest.mark.parametrize("dask", [True, False])
def test_cumulative_integrate(dask) -> None:
    rs = np.random.RandomState(43)
    coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8]

    da = xr.DataArray(
        rs.randn(8, 6),
        dims=["x", "y"],
        coords={
            "x": coord,
            "x2": (("x",), rs.randn(8)),
            "z": 3,
            "x2d": (("x", "y"), rs.randn(8, 6)),
        },
    )
    if dask and has_dask:
        da = da.chunk({"x": 4})

    ds = xr.Dataset({"var": da})

    # along x
    actual = da.cumulative_integrate("x")

    # From scipy-1.6.0 cumtrapz is renamed to cumulative_trapezoid, but cumtrapz is
    # still provided for backward compatibility
    from scipy.integrate import cumtrapz

    expected_x = xr.DataArray(
        cumtrapz(da.compute(), da["x"], axis=0, initial=0.0),
        dims=["x", "y"],
        coords=da.coords,
    )
    assert_allclose(expected_x, actual.compute())
    assert_equal(
        ds["var"].cumulative_integrate("x"),
        ds.cumulative_integrate("x")["var"],
    )

    # make sure result is also a dask array (if the source is dask array)
    assert isinstance(actual.data, type(da.data))

    # along y
    actual = da.cumulative_integrate("y")
    expected_y = xr.DataArray(
        cumtrapz(da, da["y"], axis=1, initial=0.0),
        dims=["x", "y"],
        coords=da.coords,
    )
    assert_allclose(expected_y, actual.compute())
    assert_equal(actual, ds.cumulative_integrate("y")["var"])
    assert_equal(
        ds["var"].cumulative_integrate("y"),
        ds.cumulative_integrate("y")["var"],
    )

    # along x and y
    actual = da.cumulative_integrate(("y", "x"))
    assert actual.ndim == 2

    with pytest.raises(ValueError):
        da.cumulative_integrate("x2d")


@pytest.mark.parametrize("dask", [True, False])
@pytest.mark.parametrize("which_datetime", ["np", "cftime"])
def test_trapz_datetime(dask, which_datetime) -> None:
    rs = np.random.RandomState(42)
    if which_datetime == "np":
        coord = np.array(
            [
                "2004-07-13",
                "2006-01-13",
                "2010-08-13",
                "2010-09-13",
                "2010-10-11",
                "2010-12-13",
                "2011-02-13",
                "2012-08-13",
            ],
            dtype="datetime64",
        )
    else:
        if not has_cftime:
            pytest.skip("Test requires cftime.")
        coord = xr.cftime_range("2000", periods=8, freq="2D")

    da = xr.DataArray(
        rs.randn(8, 6),
        coords={"time": coord, "z": 3, "t2d": (("time", "y"), rs.randn(8, 6))},
        dims=["time", "y"],
    )

    if dask and has_dask:
        da = da.chunk({"time": 4})

    actual = da.integrate("time", datetime_unit="D")
    expected_data = np.trapz(
        da.compute().data,
        duck_array_ops.datetime_to_numeric(da["time"].data, datetime_unit="D"),
        axis=0,
    )
    expected = xr.DataArray(
        expected_data,
        dims=["y"],
        coords={k: v for k, v in da.coords.items() if "time" not in v.dims},
    )
    assert_allclose(expected, actual.compute())

    # make sure result is also a dask array (if the source is dask array)
    assert isinstance(actual.data, type(da.data))

    actual2 = da.integrate("time", datetime_unit="h")
    assert_allclose(actual, actual2 / 24.0)


def test_no_dict() -> None:
    d = Dataset()
    with pytest.raises(AttributeError):
        d.__dict__


def test_subclass_slots() -> None:
    """Test that Dataset subclasses must explicitly define ``__slots__``.

    .. note::
       As of 0.13.0, this is actually mitigated into a FutureWarning for any class
       defined outside of the xarray package.
    """
    with pytest.raises(AttributeError) as e:

        class MyDS(Dataset):
            pass

    assert str(e.value) == "MyDS must explicitly define __slots__"


def test_weakref() -> None:
    """Classes with __slots__ are incompatible with the weakref module unless they
    explicitly state __weakref__ among their slots
    """
    from weakref import ref

    ds = Dataset()
    r = ref(ds)
    assert r() is ds


def test_deepcopy_obj_array() -> None:
    x0 = Dataset(dict(foo=DataArray(np.array([object()]))))
    x1 = deepcopy(x0)
    assert x0["foo"].values[0] is not x1["foo"].values[0]


def test_deepcopy_recursive() -> None:
    # GH:issue:7111

    # direct recursion
    ds = xr.Dataset({"a": (["x"], [1, 2])})
    ds.attrs["other"] = ds

    # TODO: cannot use assert_identical on recursive Vars yet...
    # lets just ensure that deep copy works without RecursionError
    ds.copy(deep=True)

    # indirect recursion
    ds2 = xr.Dataset({"b": (["y"], [3, 4])})
    ds.attrs["other"] = ds2
    ds2.attrs["other"] = ds

    # TODO: cannot use assert_identical on recursive Vars yet...
    # lets just ensure that deep copy works without RecursionError
    ds.copy(deep=True)
    ds2.copy(deep=True)


def test_clip(ds) -> None:
    result = ds.clip(min=0.5)
    assert all((result.min(...) >= 0.5).values())

    result = ds.clip(max=0.5)
    assert all((result.max(...) <= 0.5).values())

    result = ds.clip(min=0.25, max=0.75)
    assert all((result.min(...) >= 0.25).values())
    assert all((result.max(...) <= 0.75).values())

    result = ds.clip(min=ds.mean("y"), max=ds.mean("y"))
    assert result.dims == ds.dims


class TestDropDuplicates:
    @pytest.mark.parametrize("keep", ["first", "last", False])
    def test_drop_duplicates_1d(self, keep) -> None:
        ds = xr.Dataset(
            {"a": ("time", [0, 5, 6, 7]), "b": ("time", [9, 3, 8, 2])},
            coords={"time": [0, 0, 1, 2]},
        )

        if keep == "first":
            a = [0, 6, 7]
            b = [9, 8, 2]
            time = [0, 1, 2]
        elif keep == "last":
            a = [5, 6, 7]
            b = [3, 8, 2]
            time = [0, 1, 2]
        else:
            a = [6, 7]
            b = [8, 2]
            time = [1, 2]

        expected = xr.Dataset(
            {"a": ("time", a), "b": ("time", b)}, coords={"time": time}
        )
        result = ds.drop_duplicates("time", keep=keep)
        assert_equal(expected, result)

        with pytest.raises(ValueError, match="['space'] not found"):
            ds.drop_duplicates("space", keep=keep)


class TestNumpyCoercion:
    def test_from_numpy(self) -> None:
        ds = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", [4, 5, 6])})

        assert_identical(ds.as_numpy(), ds)

    @requires_dask
    def test_from_dask(self) -> None:
        ds = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", [4, 5, 6])})
        ds_chunked = ds.chunk(1)

        assert_identical(ds_chunked.as_numpy(), ds.compute())

    @requires_pint
    def test_from_pint(self) -> None:
        from pint import Quantity

        arr = np.array([1, 2, 3])
        ds = xr.Dataset(
            {"a": ("x", Quantity(arr, units="Pa"))},
            coords={"lat": ("x", Quantity(arr + 3, units="m"))},
        )

        expected = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", arr + 3)})
        assert_identical(ds.as_numpy(), expected)

    @requires_sparse
    def test_from_sparse(self) -> None:
        import sparse

        arr = np.diagflat([1, 2, 3])
        sparr = sparse.COO.from_numpy(arr)
        ds = xr.Dataset(
            {"a": (["x", "y"], sparr)}, coords={"elev": (("x", "y"), sparr + 3)}
        )

        expected = xr.Dataset(
            {"a": (["x", "y"], arr)}, coords={"elev": (("x", "y"), arr + 3)}
        )
        assert_identical(ds.as_numpy(), expected)

    @requires_cupy
    def test_from_cupy(self) -> None:
        import cupy as cp

        arr = np.array([1, 2, 3])
        ds = xr.Dataset(
            {"a": ("x", cp.array(arr))}, coords={"lat": ("x", cp.array(arr + 3))}
        )

        expected = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", arr + 3)})
        assert_identical(ds.as_numpy(), expected)

    @requires_dask
    @requires_pint
    def test_from_pint_wrapping_dask(self) -> None:
        import dask
        from pint import Quantity

        arr = np.array([1, 2, 3])
        d = dask.array.from_array(arr)
        ds = xr.Dataset(
            {"a": ("x", Quantity(d, units="Pa"))},
            coords={"lat": ("x", Quantity(d, units="m") * 2)},
        )

        result = ds.as_numpy()
        expected = xr.Dataset({"a": ("x", arr)}, coords={"lat": ("x", arr * 2)})
        assert_identical(result, expected)


def test_string_keys_typing() -> None:
    """Tests that string keys to `variables` are permitted by mypy"""

    da = xr.DataArray(np.arange(10), dims=["x"])
    ds = xr.Dataset(dict(x=da))
    mapping = {"y": da}
    ds.assign(variables=mapping)


def test_transpose_error() -> None:
    # Transpose dataset with list as argument
    # Should raise error
    ds = xr.Dataset({"foo": (("x", "y"), [[21]]), "bar": (("x", "y"), [[12]])})

    with pytest.raises(
        TypeError,
        match=re.escape(
            "transpose requires dims to be passed as multiple arguments. Expected `'y', 'x'`. Received `['y', 'x']` instead"
        ),
    ):
        ds.transpose(["y", "x"])  # type: ignore
