Skip to contents

Tool to transform any type of vector, or even combination of vectors, into an integer vector ranging from 1 to the number of unique values. This actually creates an unique identifier vector.

Usage

to_integer(
  ...,
  inputs = NULL,
  sorted = FALSE,
  add_items = FALSE,
  items.list = FALSE,
  multi.df = FALSE,
  multi.join = "_",
  na.valid = FALSE,
  internal = FALSE
)

Arguments

...

Vectors of any type, to be transformed into a single integer vector ranging from 1 to the number of unique elements.

inputs

A list of inputs, by default it is NULL. If provided, it completely replaces the elements in ....

sorted

Logical, default is FALSE. Whether the integer vector should make reference to sorted values?

add_items

Logical, default is FALSE. Whether to add the unique values of the original vector(s). If requested, an attribute items is created containing the values (alternatively, they can appear in a list if items.list=TRUE).

items.list

Logical, default is FALSE. Only used if add_items=TRUE. If TRUE, then a list of length 2 is returned with x the integer vector and items the vector of items.

multi.df

Logical, default is FALSE. If TRUE then a data.frame listing the unique elements is returned in the form of a data.frame. Ignored if add_items = FALSE.

multi.join

Character scalar used to join the items of multiple vectors. The default is "_". Ignored if add_items = FALSE.

na.valid

Logical, default is FALSE. Whether to consider NAs as regular values. If TRUE, the returned index will not contain any NA value.

internal

Logical, default is FALSE. For programming only. If this function is used within another function, setting internal = TRUE is needed to make the evaluation of ... valid. End users of to_integer should not care.

Value

Reruns a vector of the same length as the input vectors. If add_items=TRUE and items.list=TRUE, a list of two elements is returned: x being the integer vector and items being the unique values to which the values in x make reference.

Author

Laurent Berge

Examples


x1 = iris$Species
x2 = as.integer(iris$Sepal.Length)

# transforms the species vector into integers
to_integer(x1)
#>   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#>  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#>  [75] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3
#> [112] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
#> [149] 3 3

# To obtain the "items":
to_integer(x1, add_items = TRUE)
#>   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#>  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#>  [75] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3
#> [112] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
#> [149] 3 3
#> attr(,"items")
#> [1] setosa     versicolor virginica 
#> Levels: setosa versicolor virginica
# same but in list form
to_integer(x1, add_items = TRUE, items.list = TRUE)
#> $x
#>   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#>  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#>  [75] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3
#> [112] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
#> [149] 3 3
#> 
#> $items
#> [1] setosa     versicolor virginica 
#> Levels: setosa versicolor virginica
#> 

# transforms x2 into an integer vector from 1 to 4
to_integer(x2, add_items = TRUE)
#>   [1] 1 2 2 2 1 1 2 1 2 2 1 2 2 2 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 2 2 1 1 1 2 1 1
#>  [38] 2 2 1 1 2 2 1 1 2 1 2 1 1 3 4 4 1 4 1 4 2 4 1 1 1 4 4 1 4 1 1 4 1 1 4 4 4
#>  [75] 4 4 4 4 4 1 1 1 1 4 1 4 4 4 1 1 1 4 1 1 1 1 1 4 1 1 4 1 3 4 4 3 2 3 4 3 4
#> [112] 4 4 1 1 4 4 3 3 4 4 1 3 4 4 3 4 4 4 3 3 3 4 4 4 3 4 4 4 4 4 4 1 4 4 4 4 4
#> [149] 4 1
#> attr(,"items")
#> [1] 5 4 7 6

# To have the sorted items:
to_integer(x2, add_items = TRUE, sorted = TRUE)
#>   [1] 2 1 1 1 2 2 1 2 1 1 2 1 1 1 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2 1 1 2 2 2 1 2 2
#>  [38] 1 1 2 2 1 1 2 2 1 2 1 2 2 4 3 3 2 3 2 3 1 3 2 2 2 3 3 2 3 2 2 3 2 2 3 3 3
#>  [75] 3 3 3 3 3 2 2 2 2 3 2 3 3 3 2 2 2 3 2 2 2 2 2 3 2 2 3 2 4 3 3 4 1 4 3 4 3
#> [112] 3 3 2 2 3 3 4 4 3 3 2 4 3 3 4 3 3 3 4 4 4 3 3 3 4 3 3 3 3 3 3 2 3 3 3 3 3
#> [149] 3 2
#> attr(,"items")
#> [1] 4 5 6 7

# placing the three side to side
head(cbind(x2, as_index = to_integer(x2), 
           as_index_sorted = to_integer(x2, sorted = TRUE)))
#>      x2 as_index as_index_sorted
#> [1,]  5        1               2
#> [2,]  4        2               1
#> [3,]  4        2               1
#> [4,]  4        2               1
#> [5,]  5        1               2
#> [6,]  5        1               2

# The result can safely be used as an index
res = to_integer(x2, add_items = TRUE, sorted = TRUE, items.list = TRUE)
all(res$items[res$x] == x2)
#> [1] TRUE


#
# Multiple vectors
#

to_integer(x1, x2, add_items = TRUE)
#>   [1]  1  2  2  2  1  1  2  1  2  2  1  2  2  2  1  1  1  1  1  1  1  1  2  1  2
#>  [26]  1  1  1  1  2  2  1  1  1  2  1  1  2  2  1  1  2  2  1  1  2  1  2  1  1
#>  [51]  3  4  4  5  4  5  4  6  4  5  5  5  4  4  5  4  5  5  4  5  5  4  4  4  4
#>  [76]  4  4  4  4  5  5  5  5  4  5  4  4  4  5  5  5  4  5  5  5  5  5  4  5  5
#> [101]  7  8  9  7  7  9 10  9  7  9  7  7  7  8  8  7  7  9  9  7  7  8  9  7  7
#> [126]  9  7  7  7  9  9  9  7  7  7  9  7  7  7  7  7  7  8  7  7  7  7  7  7  8
#> attr(,"items")
#>  [1] "setosa_5"     "setosa_4"     "versicolor_7" "versicolor_6" "versicolor_5"
#>  [6] "versicolor_4" "virginica_6"  "virginica_5"  "virginica_7"  "virginica_4" 

# You can use multi.join to handle the join of the items:
to_integer(x1, x2, add_items = TRUE, multi.join = "; ")
#>   [1]  1  2  2  2  1  1  2  1  2  2  1  2  2  2  1  1  1  1  1  1  1  1  2  1  2
#>  [26]  1  1  1  1  2  2  1  1  1  2  1  1  2  2  1  1  2  2  1  1  2  1  2  1  1
#>  [51]  3  4  4  5  4  5  4  6  4  5  5  5  4  4  5  4  5  5  4  5  5  4  4  4  4
#>  [76]  4  4  4  4  5  5  5  5  4  5  4  4  4  5  5  5  4  5  5  5  5  5  4  5  5
#> [101]  7  8  9  7  7  9 10  9  7  9  7  7  7  8  8  7  7  9  9  7  7  8  9  7  7
#> [126]  9  7  7  7  9  9  9  7  7  7  9  7  7  7  7  7  7  8  7  7  7  7  7  7  8
#> attr(,"items")
#>  [1] "setosa; 5"     "setosa; 4"     "versicolor; 7" "versicolor; 6"
#>  [5] "versicolor; 5" "versicolor; 4" "virginica; 6"  "virginica; 5" 
#>  [9] "virginica; 7"  "virginica; 4" 

# alternatively, return the items as a data.frame
to_integer(x1, x2, add_items = TRUE, multi.df = TRUE)
#>   [1]  1  2  2  2  1  1  2  1  2  2  1  2  2  2  1  1  1  1  1  1  1  1  2  1  2
#>  [26]  1  1  1  1  2  2  1  1  1  2  1  1  2  2  1  1  2  2  1  1  2  1  2  1  1
#>  [51]  3  4  4  5  4  5  4  6  4  5  5  5  4  4  5  4  5  5  4  5  5  4  4  4  4
#>  [76]  4  4  4  4  5  5  5  5  4  5  4  4  4  5  5  5  4  5  5  5  5  5  4  5  5
#> [101]  7  8  9  7  7  9 10  9  7  9  7  7  7  8  8  7  7  9  9  7  7  8  9  7  7
#> [126]  9  7  7  7  9  9  9  7  7  7  9  7  7  7  7  7  7  8  7  7  7  7  7  7  8
#> attr(,"items")
#>            x1 x2
#> 1      setosa  5
#> 2      setosa  4
#> 3  versicolor  7
#> 4  versicolor  6
#> 5  versicolor  5
#> 6  versicolor  4
#> 7   virginica  6
#> 8   virginica  5
#> 9   virginica  7
#> 10  virginica  4

#
# NA values
#

x1_na = c("a", "a", "b", NA, NA, "b", "a", "c", NA)
x2_na = c(NA,    1,  NA,  1,  1,   1,   2,   2,  2)

# by default the NAs are propagated
to_integer(x1_na, x2_na, add_items = TRUE)
#> [1] NA  1 NA NA NA  2  3  4 NA
#> attr(,"items")
#> [1] "a_1" "b_1" "a_2" "c_2"

# but you can treat them as valid values with na.valid = TRUE
to_integer(x1_na, x2_na, add_items = TRUE, na.valid = TRUE)
#> [1] 1 2 3 4 4 5 6 7 8
#> attr(,"items")
#> [1] "a_NA" "a_1"  "b_NA" "NA_1" "b_1"  "a_2"  "c_2"  "NA_2"

#
# programmatic use
#

# the argument `inputs` can be used for easy programmatic use
all_vars = list(x1_na, x2_na)
to_integer(inputs = all_vars)
#> [1] NA  1 NA NA NA  2  3  4 NA