Split a table in two and remove repeated values.

unjoin(data, ..., key_col = "idx0")

# S3 method for data.frame
unjoin(data, ..., key_col = ".idx0")

# S3 method for unjoin
unjoin(data, ..., key_col = ".idx0")

unjoin_(data, unjoin_cols = character(), key_col = ".idx0")

# S3 method for data.frame
unjoin_(data, unjoin_cols = character(), key_col = ".idx0")

# S3 method for unjoin
unjoin_(data, unjoin_cols = character(), key_col = ".idx0")

Arguments

data

A data frame.

...

Specification of columns to unjoin by. For full details, see the `dplyr::select`` documentation.

key_col

The name of the new column to key the two output data frames.

unjoin_cols

character list of unjoin column names for `unjoin_` backwards compatibility

Details

The data frame on input is treated as "data", the new data frame is treated as the normalized key. This means that the split-off and de-duplicated table has the name given via the `key_col` argument (defaults to ".idx0") and shares this name with the common key.

It's not yet clear if this flexibility around naming is a good idea, but it enables a simple scheme for chaining unjoins, though you'd better not use the same `key_col` again.

This is a subset of the tasks done by nest.

See also

`dplyr::inner_join` for the inverse operation.

`tidyr::nest` for the complementary operation resulting in one nested data frame

Examples

library(dplyr)
#> #> Attaching package: ‘dplyr’
#> The following objects are masked from ‘package:stats’: #> #> filter, lag
#> The following objects are masked from ‘package:base’: #> #> intersect, setdiff, setequal, union
data("Seatbelts", package= "datasets") x <- unjoin(as.data.frame(Seatbelts), front, law) y <- inner_join(x$.idx0, x$data) %>% select(-.idx0)
#> Joining, by = ".idx0"
all.equal(y[colnames(Seatbelts)], as.data.frame(Seatbelts))
#> [1] TRUE
iris %>% unjoin(-Species)
#> $.idx0 #> # A tibble: 149 x 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width .idx0 #> <dbl> <dbl> <dbl> <dbl> <int> #> 1 5.1 3.5 1.4 0.2 36 #> 2 4.9 3 1.4 0.2 19 #> 3 4.7 3.2 1.3 0.2 10 #> 4 4.6 3.1 1.5 0.2 6 #> 5 5 3.6 1.4 0.2 32 #> 6 5.4 3.9 1.7 0.4 52 #> 7 4.6 3.4 1.4 0.3 8 #> 8 5 3.4 1.5 0.2 28 #> 9 4.4 2.9 1.4 0.2 2 #> 10 4.9 3.1 1.5 0.1 20 #> # … with 139 more rows #> #> $data #> # A tibble: 150 x 2 #> Species .idx0 #> <fct> <int> #> 1 setosa 36 #> 2 setosa 19 #> 3 setosa 10 #> 4 setosa 6 #> 5 setosa 32 #> 6 setosa 52 #> 7 setosa 8 #> 8 setosa 28 #> 9 setosa 2 #> 10 setosa 20 #> # … with 140 more rows #> #> attr(,"class") #> [1] "unjoin"
chickwts %>% unjoin(weight)
#> $.idx0 #> # A tibble: 66 x 2 #> weight .idx0 #> <dbl> <int> #> 1 179 14 #> 2 160 10 #> 3 136 3 #> 4 227 25 #> 5 217 22 #> 6 168 11 #> 7 108 1 #> 8 124 2 #> 9 143 6 #> 10 140 4 #> # … with 56 more rows #> #> $data #> # A tibble: 71 x 2 #> feed .idx0 #> <fct> <int> #> 1 horsebean 14 #> 2 horsebean 10 #> 3 horsebean 3 #> 4 horsebean 25 #> 5 horsebean 22 #> 6 horsebean 11 #> 7 horsebean 1 #> 8 horsebean 2 #> 9 horsebean 6 #> 10 horsebean 4 #> # … with 61 more rows #> #> attr(,"class") #> [1] "unjoin"
if (require("gapminder")) { gapminder %>% group_by(country, continent) %>% unjoin() gapminder %>% unjoin(-country, -continent) unjoin(gapminder) }
#> Loading required package: gapminder
#> $.idx0 #> # A tibble: 1 x 1 #> .idx0 #> <int> #> 1 1 #> #> $data #> # A tibble: 1,704 x 7 #> country continent year lifeExp pop gdpPercap .idx0 #> <fct> <fct> <int> <dbl> <int> <dbl> <int> #> 1 Afghanistan Asia 1952 28.8 8425333 779. 1 #> 2 Afghanistan Asia 1957 30.3 9240934 821. 1 #> 3 Afghanistan Asia 1962 32.0 10267083 853. 1 #> 4 Afghanistan Asia 1967 34.0 11537966 836. 1 #> 5 Afghanistan Asia 1972 36.1 13079460 740. 1 #> 6 Afghanistan Asia 1977 38.4 14880372 786. 1 #> 7 Afghanistan Asia 1982 39.9 12881816 978. 1 #> 8 Afghanistan Asia 1987 40.8 13867957 852. 1 #> 9 Afghanistan Asia 1992 41.7 16317921 649. 1 #> 10 Afghanistan Asia 1997 41.8 22227415 635. 1 #> # … with 1,694 more rows #> #> attr(,"class") #> [1] "unjoin"
unjoin(iris, Petal.Width) %>% unjoin(Species, key_col = ".idx1")
#> $.idx0 #> # A tibble: 22 x 2 #> Petal.Width .idx0 #> <dbl> <int> #> 1 0.2 2 #> 2 0.4 4 #> 3 0.3 3 #> 4 0.1 1 #> 5 0.5 5 #> 6 0.6 6 #> 7 1.4 11 #> 8 1.5 12 #> 9 1.3 10 #> 10 1.6 13 #> # … with 12 more rows #> #> $.idx1 #> # A tibble: 3 x 2 #> Species .idx1 #> <fct> <int> #> 1 setosa 1 #> 2 versicolor 2 #> 3 virginica 3 #> #> $data #> # A tibble: 150 x 5 #> Sepal.Length Sepal.Width Petal.Length .idx0 .idx1 #> <dbl> <dbl> <dbl> <int> <int> #> 1 5.1 3.5 1.4 2 1 #> 2 4.9 3 1.4 2 1 #> 3 4.7 3.2 1.3 2 1 #> 4 4.6 3.1 1.5 2 1 #> 5 5 3.6 1.4 2 1 #> 6 5.4 3.9 1.7 4 1 #> 7 4.6 3.4 1.4 3 1 #> 8 5 3.4 1.5 2 1 #> 9 4.4 2.9 1.4 2 1 #> 10 4.9 3.1 1.5 1 1 #> # … with 140 more rows #> #> attr(,"class") #> [1] "unjoin"