unjoin

Split a table in two and remove repeated values.

unjoin(data, ..., key_col = "idx0")

# S3 method for data.frame
unjoin(data, ..., key_col = ".idx0")

# S3 method for unjoin
unjoin(data, ..., key_col = ".idx0")

unjoin_(data, unjoin_cols = character(), key_col = ".idx0")

# S3 method for data.frame
unjoin_(data, unjoin_cols = character(), key_col = ".idx0")

# S3 method for unjoin
unjoin_(data, unjoin_cols = character(), key_col = ".idx0")

Arguments

data	A data frame.
...	Specification of columns to unjoin by. For full details, see the `dplyr::select`` documentation.
key_col	The name of the new column to key the two output data frames.
unjoin_cols	character list of unjoin column names for `unjoin_` backwards compatibility

Details

The data frame on input is treated as "data", the new data frame is treated as the normalized key. This means that the split-off and de-duplicated table has the name given via the `key_col` argument (defaults to ".idx0") and shares this name with the common key.

It's not yet clear if this flexibility around naming is a good idea, but it enables a simple scheme for chaining unjoins, though you'd better not use the same `key_col` again.

This is a subset of the tasks done by nest.

Examples

library(dplyr)
#> 
#> Attaching package: ‘dplyr’
#> The following objects are masked from ‘package:stats’:
#> 
#>     filter, lag
#> The following objects are masked from ‘package:base’:
#> 
#>     intersect, setdiff, setequal, union
data("Seatbelts", package= "datasets")
x <- unjoin(as.data.frame(Seatbelts), front, law)
y <- inner_join(x$.idx0, x$data) %>% select(-.idx0)
#> Joining, by = ".idx0"
all.equal(y[colnames(Seatbelts)], as.data.frame(Seatbelts))
#> [1] TRUE

iris %>% unjoin(-Species)
#> $.idx0
#> # A tibble: 149 x 5
#>    Sepal.Length Sepal.Width Petal.Length Petal.Width .idx0
#>           <dbl>       <dbl>        <dbl>       <dbl> <int>
#>  1          5.1         3.5          1.4         0.2    36
#>  2          4.9         3            1.4         0.2    19
#>  3          4.7         3.2          1.3         0.2    10
#>  4          4.6         3.1          1.5         0.2     6
#>  5          5           3.6          1.4         0.2    32
#>  6          5.4         3.9          1.7         0.4    52
#>  7          4.6         3.4          1.4         0.3     8
#>  8          5           3.4          1.5         0.2    28
#>  9          4.4         2.9          1.4         0.2     2
#> 10          4.9         3.1          1.5         0.1    20
#> # … with 139 more rows
#> 
#> $data
#> # A tibble: 150 x 2
#>    Species .idx0
#>    <fct>   <int>
#>  1 setosa     36
#>  2 setosa     19
#>  3 setosa     10
#>  4 setosa      6
#>  5 setosa     32
#>  6 setosa     52
#>  7 setosa      8
#>  8 setosa     28
#>  9 setosa      2
#> 10 setosa     20
#> # … with 140 more rows
#> 
#> attr(,"class")
#> [1] "unjoin"
chickwts %>% unjoin(weight)
#> $.idx0
#> # A tibble: 66 x 2
#>    weight .idx0
#>     <dbl> <int>
#>  1    179    14
#>  2    160    10
#>  3    136     3
#>  4    227    25
#>  5    217    22
#>  6    168    11
#>  7    108     1
#>  8    124     2
#>  9    143     6
#> 10    140     4
#> # … with 56 more rows
#> 
#> $data
#> # A tibble: 71 x 2
#>    feed      .idx0
#>    <fct>     <int>
#>  1 horsebean    14
#>  2 horsebean    10
#>  3 horsebean     3
#>  4 horsebean    25
#>  5 horsebean    22
#>  6 horsebean    11
#>  7 horsebean     1
#>  8 horsebean     2
#>  9 horsebean     6
#> 10 horsebean     4
#> # … with 61 more rows
#> 
#> attr(,"class")
#> [1] "unjoin"

if (require("gapminder")) {
  gapminder %>%
    group_by(country, continent) %>%
    unjoin()

  gapminder %>%
    unjoin(-country, -continent)
  unjoin(gapminder)
}
#> Loading required package: gapminder
#> $.idx0
#> # A tibble: 1 x 1
#>   .idx0
#>   <int>
#> 1     1
#> 
#> $data
#> # A tibble: 1,704 x 7
#>    country     continent  year lifeExp      pop gdpPercap .idx0
#>    <fct>       <fct>     <int>   <dbl>    <int>     <dbl> <int>
#>  1 Afghanistan Asia       1952    28.8  8425333      779.     1
#>  2 Afghanistan Asia       1957    30.3  9240934      821.     1
#>  3 Afghanistan Asia       1962    32.0 10267083      853.     1
#>  4 Afghanistan Asia       1967    34.0 11537966      836.     1
#>  5 Afghanistan Asia       1972    36.1 13079460      740.     1
#>  6 Afghanistan Asia       1977    38.4 14880372      786.     1
#>  7 Afghanistan Asia       1982    39.9 12881816      978.     1
#>  8 Afghanistan Asia       1987    40.8 13867957      852.     1
#>  9 Afghanistan Asia       1992    41.7 16317921      649.     1
#> 10 Afghanistan Asia       1997    41.8 22227415      635.     1
#> # … with 1,694 more rows
#> 
#> attr(,"class")
#> [1] "unjoin"
unjoin(iris, Petal.Width) %>% unjoin(Species, key_col = ".idx1")
#> $.idx0
#> # A tibble: 22 x 2
#>    Petal.Width .idx0
#>          <dbl> <int>
#>  1         0.2     2
#>  2         0.4     4
#>  3         0.3     3
#>  4         0.1     1
#>  5         0.5     5
#>  6         0.6     6
#>  7         1.4    11
#>  8         1.5    12
#>  9         1.3    10
#> 10         1.6    13
#> # … with 12 more rows
#> 
#> $.idx1
#> # A tibble: 3 x 2
#>   Species    .idx1
#>   <fct>      <int>
#> 1 setosa         1
#> 2 versicolor     2
#> 3 virginica      3
#> 
#> $data
#> # A tibble: 150 x 5
#>    Sepal.Length Sepal.Width Petal.Length .idx0 .idx1
#>           <dbl>       <dbl>        <dbl> <int> <int>
#>  1          5.1         3.5          1.4     2     1
#>  2          4.9         3            1.4     2     1
#>  3          4.7         3.2          1.3     2     1
#>  4          4.6         3.1          1.5     2     1
#>  5          5           3.6          1.4     2     1
#>  6          5.4         3.9          1.7     4     1
#>  7          4.6         3.4          1.4     3     1
#>  8          5           3.4          1.5     2     1
#>  9          4.4         2.9          1.4     2     1
#> 10          4.9         3.1          1.5     1     1
#> # … with 140 more rows
#> 
#> attr(,"class")
#> [1] "unjoin"

Arguments

Details

See also

Examples

Contents