Commit 04671b80 authored by Pyry Kantanen's avatar Pyry Kantanen
Browse files

Rjournal changes to main

parent ee2b85cf
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
Package: hetu
Type: Package
Title: Structural Handling of Finnish Personal Identity Codes
Version: 1.0.6.9000
Date: 2022-01-18
Version: 1.0.7.9000
Date: 2022-05-16
Authors@R:
c(
person(given = "Pyry",
......@@ -41,7 +41,6 @@ URL: https://ropengov.github.io/hetu, https://github.com/ropengov/hetu
Depends:
R (>= 3.6.0)
Imports:
dplyr,
lubridate,
checkmate,
parallel
......@@ -50,11 +49,11 @@ Suggests:
knitr,
testthat,
rmarkdown,
covr
RoxygenNote: 7.1.2
covr,
dplyr
RoxygenNote: 7.2.0
X-schema.org-isPartOf: http://ropengov.org/
X-schema.org-keywords: ropengov
Config/Needs/website:
magick,
ropengov/rogtemplate
......@@ -23,7 +23,6 @@ export(satu_ctrl)
importFrom(checkmate,assert_choice)
importFrom(checkmate,assert_date)
importFrom(checkmate,assert_double)
importFrom(dplyr,filter)
importFrom(lubridate,days)
importFrom(lubridate,interval)
importFrom(lubridate,period)
......
# *News*
==========
# hetu 1.0.7.9000 (2022-05-16)
* subsetting-parameter (TRUE or FALSE) dropped from `hetu_diagnostic()` function as it was unnecessary syntactic sugar that was difficult to communicate to users. Similar functionalities can be easily achieved with standard subsetting functionalities found in base R and especially in tidyverse.
* `satu_ctrl_char()` parameter for printing whole SATU/FINUID-numbers is now called "print.full" instead of "complement".
# hetu 1.0.6.9000 (2022-01-18)
* Rewritten `rpin()` function for increased speed
* Added new function `hetu_control_char()` both for internal use in other functions as well as convenience (sometimes you know the rest of the identity code and just need to determine the control character)
* Added support for checking the validity of Finnish electronic Unique Identification Numbers (SATU / FINUID). Two new functions: `satu_ctrl()` and `satu_ctrl_char()`, the former works like `hetu_ctrl()` and the latter works like abovementioned `hetu_control_char()`
* `hetu()` table column name checksum changed to more descriptive ctrl.char. The change also affects related column names in `hetu_diagnostic()`
* `hetu()` table column name checksum changed to more descriptive ctrl.char. The change also affects related column names in `hetu_diagnostic()`. This is to illustrate the point that Finnish personal identity code has control characters (numbers and letters) instead of check digits.
# hetu 1.0.3 (2021-07-28)
......
......@@ -6,4 +6,4 @@
#' available at \code{help("-deprecated")}.
#' @name hetu-deprecated
#' @keywords internal
NULL
\ No newline at end of file
NULL
......@@ -48,9 +48,9 @@
#' hetu(c("010101-0101", "111111-111C"))
#' # Process a vector of hetu's and extract sex information from each
#' hetu(c("010101-0101", "111111-111C"), extract="sex")
#'
#'
#' @importFrom checkmate assert_choice
#'
#'
#' @export
hetu <- function(pin, extract = NULL, allow.temp = FALSE, diagnostic = FALSE) {
......
#' @title Finnish Personal Identification Number Control Character Calculator
#' @description Calculate a valid control character for an incomplete
#' @description Calculate a valid control character for an incomplete
#' Finnish personal identification numbers (hetu).
#' @param pin An incomplete PIN that ONLY has a date, century marker (optional,
#' see parameter with.century) and personal number
#' @param with.century If TRUE (default), the function assumes that the PIN
#' input contains a century marker (DDMMYYQZZZ). If FALSE, the function
#' @param with.century If TRUE (default), the function assumes that the PIN
#' input contains a century marker (DDMMYYQZZZ). If FALSE, the function
#' assumes that the PIN contains only date and personal number (DDMMYYZZZ).
#' @details This method of calculating the control character was devised by
#' mathematician Erkki Pale (1962) to detect input errors but also to
#' detect errors produced by early punch card machines. The long number
#' mathematician Erkki Pale (1962) to detect input errors but also to
#' detect errors produced by early punch card machines. The long number
#' produced by writing the birth date and the personal number together are
#' divided by 31 and the remainder is used to look up the control character
#' from a separate table containing alphanumeric characters except letters
#' G, I, O, Q and Z.
#'
#' The method of calculating the control character does not need century
#'
#' The method of calculating the control character does not need century
#' character and therefore the function has an option to omit it.
#' @return Control character, either a number 0-9 or a letter.
#' @author Pyry Kantanen
......@@ -25,8 +25,8 @@
#' hetu_control_char("010101010", with.century = FALSE)
#' @export
hetu_control_char <- function(pin, with.century = TRUE) {
if (length(pin) > 1){
if (length(pin) > 1) {
x <- vapply(pin,
FUN = hetu_control_char,
with.century = with.century,
......@@ -34,94 +34,97 @@ hetu_control_char <- function(pin, with.century = TRUE) {
USE.NAMES = FALSE)
return(x)
}
checklist <- c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
"A", "B", "C", "D", "E", "F", "H", "J", "K", "L",
"M", "N", "P", "R", "S", "T", "U", "V", "W", "X", "Y")
names(checklist) <- 0:30
if (with.century == TRUE){
if (nchar(pin) != 10){
if (with.century == TRUE) {
if (nchar(pin) != 10) {
stop("Input PINs that only have 10 characters: birthdate, century marker
and personal numbers (DDMMYYQZZZ)")
}
if (!(substr(pin, start = 7, stop = 7) %in% c("-", "+", "A"))){
if (!(substr(pin, start = 7, stop = 7) %in% c("-", "+", "A"))) {
stop("7th character of your PIN needs to be a century marker (-, + or A).
If your PIN does not have it use parameter with.century == FALSE")
}
pin_ddmmyy <- substr(pin, 1, 6)
pin_zzz <- substr(pin, 8, 10)
} else if (with.century == FALSE){
if (nchar(pin) != 9){
} else if (with.century == FALSE) {
if (nchar(pin) != 9) {
stop("Input PINs that only have 9 characters: birthdate and personal
numbers (DDMMYYZZZ)")
}
pin_ddmmyy <- substr(pin, 1, 6)
pin_zzz <- substr(pin, 7, 9)
}
mod <- as.numeric(paste0(pin_ddmmyy, pin_zzz)) %% 31
extracted_control_char <- checklist[as.character(mod)]
names(extracted_control_char) <- NULL
extracted_control_char
}
#' @title Finnish Unique Identification Number Control Character Calculator
#' @description Calculate a valid control character for an incomplete
#' @description Calculate a valid control character for an incomplete
#' Finnish Unique Identification Number (FINUID, or sähköinen asiointitunnus
#' SATU).
#' @param pin An incomplete FINUID that has 8 numbers
#' @param complement Should the function print only
#' @param pin An incomplete FINUID that has 8 first numbers.
#' @param print.full Should the function print only the whole FINUID-number
#' (TRUE) or only the control character (FALSE). Default is FALSE.
#' @details This method of calculating the control character was devised by
#' mathematician Erkki Pale (1962) to detect input errors but also to
#' detect errors produced by early punch card machines. The long number
#' mathematician Erkki Pale (1962) to detect input errors but also to
#' detect errors produced by early punch card machines. The long number
#' produced by writing the birth date and the personal number together are
#' divided by 31 and the remainder is used to look up the control character
#' from a separate table containing alphanumeric characters except letters
#' G, I, O, Q and Z.
#'
#' The method of calculating the control character does not need century
#'
#' The method of calculating the control character does not need century
#' character and therefore the function has an option to omit it.
#' @return Control character, either a number 0-9 or a letter. If complete
#' is TRUE, then the function returns a complete FINUID / SATU number.
#' @seealso
#' For more detailed information about FINUID, see Finnish Digital and
#' population data services agency website:
#' @return Control character, either a number 0-9 or a letter (length 1
#' character). If parameter print.full is set to TRUE, the function returns
#' a complete FINUID / SATU number (length 9 characters).
#' @seealso
#' For more detailed information about FINUID, see Finnish Digital and
#' population data services agency website:
#' \url{https://dvv.fi/en/citizen-certificate-and-electronic-identity}
#' @author Pyry Kantanen
#' @examples
#' # The first assigned FINUID number, 10000001N.
#' satu_control_char("10000001")
#' @export
satu_control_char <- function(pin, complement = FALSE) {
satu_control_char <- function(pin, print.full = FALSE) {
if (length(pin) > 1){
if (length(pin) > 1) {
x <- vapply(pin,
FUN = satu_control_char,
complement = complement,
print.full = print.full,
FUN.VALUE = character(1),
USE.NAMES = FALSE)
return(x)
}
checklist <- c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
"A", "B", "C", "D", "E", "F", "H", "J", "K", "L",
"M", "N", "P", "R", "S", "T", "U", "V", "W", "X", "Y")
names(checklist) <- 0:30
if (nchar(pin) != 8){
if (nchar(pin) != 8) {
stop("Input FINUIDs that have 8 numbers")
}
if (suppressWarnings(is.na(as.numeric(pin))) == TRUE){
if (suppressWarnings(is.na(as.numeric(pin))) == TRUE) {
stop("Input FINUIDs that only have numbers")
}
mod <- as.numeric(pin) %% 31
extracted_control_char <- checklist[as.character(mod)]
names(extracted_control_char) <- NULL
extracted_control_char
if (complement == TRUE){
if (print.full == TRUE) {
paste0(pin, extracted_control_char)
} else {
extracted_control_char
......
## hetu_diagnostic.R
#' @title Diagnostics Tool for HETU
#' @description Produce a data frame of PINs that may require closer scrutiny.
#' @param pin Finnish personal identification number as a character vector,
#' @param pin Finnish personal identification number as a character vector,
#' or vector of identification numbers as a character vectors
#' @param extract Extract only selected part of the diagnostic information.
#' Valid values are "\code{hetu}", "\code{is.temp}", "\code{valid.p.num}",
#' "\code{valid.ctrl.char}", "\code{correct.ctrl.char}", "\code{valid.date}",
#' "\code{valid.day}", "\code{valid.month}", "\code{valid.length}",
#' "\code{valid.day}", "\code{valid.month}", "\code{valid.length}",
#' "\code{valid.century}". If \code{NULL} (default), returns all information.
#' @param subsetting Print only PINs where the validity check chosen
#' in \code{extract} returns \code{FALSE}.
#' @return A data.frame containing diagnostic checks about PINs.
#' @examples
#' diagnosis_example <- c("010101-0102", "111111-111Q",
#' "010101B0101", "320101-0101", "011301-0101",
#' diagnosis_example <- c("010101-0102", "111111-111Q",
#' "010101B0101", "320101-0101", "011301-0101",
#' "010101-01010", "010101-0011")
#' ## Print all diagnoses
#' ## Print all diagnostics for various fake personal identity codes
#' hetu_diagnostic(diagnosis_example)
#' # Extract century-related checks
#' hetu_diagnostic(diagnosis_example, extract = "valid.century")
#' # Extract only rows where valid.ctrl.char = FALSE
#' hetu_diagnostic(diagnosis_example, subsetting = TRUE, extract = "valid.day")
#'
#' @importFrom dplyr filter
#'
#' @export
hetu_diagnostic <- function(pin, extract = NULL, subsetting = FALSE) {
diagnostic_params <- c("hetu", "is.temp", "valid.p.num", "valid.ctrl.char",
"correct.ctrl.char", "valid.date", "valid.day", "valid.month",
hetu_diagnostic <- function(pin, extract = NULL) {
diagnostic_params <- c("hetu", "is.temp", "valid.p.num", "valid.ctrl.char",
"correct.ctrl.char", "valid.date", "valid.day", "valid.month",
"valid.year", "valid.length", "valid.century")
if (!is.null(extract)) {
if (!all(extract %in% diagnostic_params)) {
stop("Trying to extract invalid diagnostic(s)")
}
}
diagnostic_table <- hetu(pin, allow.temp = TRUE, diagnostic = TRUE)
if (is.null(extract)) {
output <- subset(hetu(pin,
allow.temp = TRUE,
diagnostic = TRUE),
select = diagnostic_params)
output <- diagnostic_table[, diagnostic_params]
} else {
if (subsetting == TRUE) {
output <- hetu(pin, allow.temp = TRUE, diagnostic = TRUE)
output <- dplyr::filter(output,
eval(parse(text = paste(extract, "== FALSE"))))
}
else {
output <- subset(hetu(pin,
allow.temp = TRUE,
diagnostic = TRUE),
select = c("hetu", extract))
}
output <- diagnostic_table[, c("hetu", extract)]
}
return(output)
}
#' @rdname hetu_diagnostic
#' @examples
#' diagnosis_example <- c("010101-0102", "111111-111Q",
#' "010101B0101", "320101-0101", "011301-0101",
#' diagnosis_example <- c("010101-0102", "111111-111Q",
#' "010101B0101", "320101-0101", "011301-0101",
#' "010101-01010", "010101-0011")
#' ## Print all diagnoses
#' pin_diagnostic(diagnosis_example)
#' @export
pin_diagnostic <- hetu_diagnostic
\ No newline at end of file
pin_diagnostic <- hetu_diagnostic
#' @title Age from ID
#' @description Calculate the age in full years for a given date.
#' @inheritParams hetu
#' @param date Date at which age is calculated. If a vector is provided it
#' @param date Date at which age is calculated. If a vector is provided it
#' must be of the same length as the \code{pin} argument.
#' @param timespan Timespan to use to calculate age. The possible timespans are:
#' \itemize{
......@@ -10,13 +10,13 @@
#' \item \code{weeks}
#' \item \code{days}
#' }
#' @aliases hetu_age
#' @aliases hetu_age
#' @return Age as an integer vector.
#'
#' @examples
#' ex_pin <- c("010101-0101", "111111-111C")
#' pin_age(ex_pin, date = "2012-01-01")
#'
#'
#' @importFrom checkmate assert_date assert_choice
#' @importFrom lubridate ymd interval years weeks days period
#'
......@@ -28,16 +28,16 @@ pin_age <- function(pin,
date <- as.Date(date)
checkmate::assert_date(date, any.missing = FALSE)
checkmate::assert_choice(timespan, choices = c("years",
"months",
"weeks",
checkmate::assert_choice(timespan, choices = c("years",
"months",
"weeks",
"days"))
if (length(date) == 1) {
message("The age in ", timespan, " has been calculated at ",
message("The age in ", timespan, " has been calculated at ",
as.character(date), ".")
} else if (length(date) == length(pin)){
message("The age is calculated relative to the '",
} else if (length(date) == length(pin)) {
message("The age is calculated relative to the '",
deparse(substitute(date)), "' date")
} else {
stop("Multiple dates used.")
......@@ -47,31 +47,30 @@ pin_age <- function(pin,
all_pins <- pin
all_pins[!hetuframe$valid.pin] <- NA
if (length(date) > 1){
if (length(date) > 1) {
valid_diff <- !is.na(all_pins) & !is.na(date)
} else{
valid_diff <- !is.na(all_pins)
}
pin <- all_pins[valid_diff]
pin_dates <- as.Date(hetuframe$date[valid_diff])
diff <- lubridate::interval(pin_dates, date)
timespan_lubridate <-
switch(timespan,
"years" = lubridate::years(1),
"months" = lubridate::period(months=1),
"months" = lubridate::period(months = 1),
"weeks" = lubridate::weeks(1),
"days" = lubridate::days(1))
age <- suppressMessages(as.integer(diff %/% timespan_lubridate))
if(any(date < pin_dates)) warning("Negative age(s).")
if (any(date < pin_dates)) warning("Negative age(s).")
all_age <- rep(as.integer(NA), length(all_pins))
all_age[valid_diff] <- age
all_age
}
#' @rdname pin_age
......@@ -79,4 +78,4 @@ pin_age <- function(pin,
#' ex_pin <- c("010101-0101", "111111-111C")
#' hetu_age(ex_pin, date = "2012-01-01")
#' @export
hetu_age <- pin_age
\ No newline at end of file
hetu_age <- pin_age
......@@ -2,52 +2,57 @@
#' @description Validate Finnish personal identification numbers (hetu).
#' @param pin Finnish personal identification number as a character vector, or
#' vector of identification numbers as a character vectors.
#' @param allow.temp If TRUE, temporary PINs (personal numbers 900-999) are
#' handled similarly to regular PINs (personal numbers 002-899), meaning
#' that otherwise valid temporary PIN will return a TRUE. Default
#' @param allow.temp If TRUE, temporary PINs (personal numbers 900-999) are
#' handled similarly to regular PINs (personal numbers 002-899), meaning
#' that otherwise valid temporary PIN will return a TRUE. Default
#' is \code{FALSE}.
#' @return Logical indicating whether the input string is a valid Finnish
#' personal identification number,
#' @return A logical vector indicating whether the input vector contains valid
#' Finnish personal identity codes.
#' @author Pyry Kantanen
#' @seealso \code{\link{hetu}} For extracting information from Finnish personal
#' identification numbers.
#' @examples
#' pin_ctrl("010101-0101") # TRUE
#' pin_ctrl("010101-010A") # FALSE
#' pin_ctrl(c("010101-0101", "010101-010A")) # TRUE FALSE
#' @export
pin_ctrl <- function(pin, allow.temp = FALSE) {
validity_test <- hetu(pin, extract = "valid.pin", allow.temp = allow.temp)
validity_test
}
#' @rdname pin_ctrl
#' @examples
#' hetu_ctrl("010101-0101") # TRUE
#' hetu_ctrl("010101-010A") # FALSE
#' hetu_ctrl(c("010101-0101", "010101-010A")) # TRUE FALSE
#' @export
hetu_ctrl <- pin_ctrl
#' @title Check Finnish Business ID (y-tunnus) validity
#'
#' @description
#' A function that checks whether a \code{bid} (Finnish Business ID) is valid.
#'
#' @description
#' A function that checks whether a \code{bid} (Finnish Business ID) is valid.
#' Returns \code{TRUE} or \code{FALSE}.
#'
#' @param
#'
#' @param
#' bid a vector of 1 or more business identity numbers
#'
#' @examples
#'
#' @examples
#' bid_ctrl(c("0000000-0", "0000001-9")) # TRUE TRUE
#' bid_ctrl("0737546-1") # FALSE
#' @export
bid_ctrl <- function(bid) {
# Try to create Business ID -object from the given bid, check if created
# object is of the correct class
# Try to create Business ID -object from the given bid, check if created
# object is of the correct class
if (length(bid) > 1) {
return(vapply(bid, FUN=bid_ctrl, FUN.VALUE = logical(1), USE.NAMES = FALSE))
return(vapply(bid,
FUN = bid_ctrl,
FUN.VALUE = logical(1),
USE.NAMES = FALSE))
}
if (!is.character(bid)) {
......@@ -64,7 +69,7 @@ bid_ctrl <- function(bid) {
}
# Check separator character
dash <- substr(bid, start=8, stop=8)
dash <- substr(bid, start = 8, stop = 8)
if (!dash %in% c("-")) {
warning(paste0("Invalid separator character '",
dash,
......@@ -72,13 +77,15 @@ bid_ctrl <- function(bid) {
bid))
valid.separator <- FALSE
return(valid.separator)
} else {valid.separator <- TRUE}
} else {
valid.separator <- TRUE
}
# Calculate if BID is correct using Mod 11-2
x <- substr(bid, start = 1, stop = 7)
x_control <- substr(bid, start = 9, stop = 9)
x <- as.numeric(unlist(strsplit(x, split="")))
x <- x * c(7,9,10,5,8,4,2)
x <- as.numeric(unlist(strsplit(x, split = "")))
x <- x * c(7, 9, 10, 5, 8, 4, 2)
x <- sum(x)
check <- x %% 11
if (check == 0) {
......@@ -94,22 +101,22 @@ bid_ctrl <- function(bid) {
}
#' @title Check Finnish Unique Identification Number validity
#'
#' @description
#' A function that checks whether a \code{satu} (Finnish Unique Identification
#'
#' @description
#' A function that checks whether a \code{satu} (Finnish Unique Identification
#' Number) is valid. Returns \code{TRUE} or \code{FALSE}.
#'
#' @param
#'
#' @param
#' satu a vector of 1 or more Unique Identification Numbers
#'
#' @examples
#'
#' @examples
#' satu_ctrl("10000001N") # TRUE
#' satu_ctrl(c("10000001N", "20000001B")) # TRUE FALSE
#' @export
satu_ctrl <- function(satu) {
first_eight <- substr(satu, 1, 8)
vector <- satu_control_char(first_eight, complement = TRUE)
satu_first_eight_chars <- substr(satu, 1, 8)
vector <- satu_control_char(satu_first_eight_chars, print.full = TRUE)
satu == vector
}
\ No newline at end of file
}
......@@ -4,7 +4,7 @@
#' @return Date of birth as a vector in date format.
#' @examples
#' pin_date(c("010101-0101", "111111-111C"))
#'
#'
#' @export
pin_date <- function(pin, allow.temp = FALSE) {
hetu(pin, extract = "date", allow.temp = allow.temp)
......@@ -14,4 +14,4 @@ pin_date <- function(pin, allow.temp = FALSE) {
#' @examples
#' hetu_date(c("010101-0101", "111111-111C"))
#' @export
hetu_date <- pin_date
\ No newline at end of file
hetu_date <- pin_date
#' @title Sex From ID
#' @description Extract sex from Finnish personal identification number.
#' @inheritParams hetu
#' @inheritParams hetu
#' @return Factor with label 'Male' and 'Female'.
#' @author Pyry Kantanen, Leo Lahti
#' @seealso \code{\link{hetu}} For general information extraction
#' @examples
#' pin_sex("010101-010A")
#' @export
#' pin_sex("010101-010A")
#' @export
pin_sex <- function(pin, allow.temp = TRUE) {
return(hetu(pin, extract = "sex", allow.temp = allow.temp))
# Alternative way without relying on hetu extract:
# sex_marker <- as.numeric(substr(pin, start = 10, stop = 10))
# sex <- ifelse(((sex_marker %% 2) == 0), "Female", "Male")
# output <- factor(sex, levels = c("Female", "Male"))
# return(output)
return(hetu(pin, extract = "sex", allow.temp = allow.temp))
}
#' @rdname pin_sex
#' @examples
#' hetu_sex("010101-010A")
#' hetu_sex("010101-010A")
#' @export
hetu_sex <- pin_sex
......@@ -3,7 +3,7 @@
#' @description Calculates the date of birth in date format.
#' @param pin Finnish ID number
#' @return Date of birth as a vector in date format.
#'