From 05118c607712d4ea57eaaade7638203eebfc5c05 Mon Sep 17 00:00:00 2001 From: Chris Daniels Date: Tue, 21 Jan 2020 13:36:44 +0000 Subject: [PATCH 1/4] Use numpy and modular arithmetic to generate valid numbers in one batch --- codonPython/nhsNumber.py | 41 ++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/codonPython/nhsNumber.py b/codonPython/nhsNumber.py index d465a47..58aa157 100644 --- a/codonPython/nhsNumber.py +++ b/codonPython/nhsNumber.py @@ -1,5 +1,5 @@ -import random import numpy as np +from numpy.random import default_rng def nhsNumberValidator(number: int) -> bool: @@ -48,7 +48,7 @@ def nhsNumberValidator(number: int) -> bool: def nhsNumberGenerator(to_generate: int, random_state: int = None) -> list: """ - Generates up to 1M random NHS numbers compliant with modulus 11 checks as recorded + Generates random NHS numbers compliant with modulus 11 checks as recorded in the data dictonary. https://www.datadictionary.nhs.uk/data_dictionary/attributes/n/nhs/nhs_number_de.asp?shownav=1 @@ -62,7 +62,7 @@ def nhsNumberGenerator(to_generate: int, random_state: int = None) -> list: Returns ---------- generated : list - List of randomly generated NHS numbers + List of randomly generated valid NHS numbers Examples --------- @@ -70,19 +70,32 @@ def nhsNumberGenerator(to_generate: int, random_state: int = None) -> list: [8429141456, 2625792787] """ - if random_state: - random.seed(random_state) if not isinstance(to_generate, int): raise ValueError("Please input a positive integer to generate numbers.") - if to_generate > 1000000: - raise ValueError("More than one million values requested") if to_generate < 0: raise ValueError("Please input a postitive integer to generate numbers.") + rng = default_rng(random_state) - generated = [] - while len(generated) < to_generate: - # Random 10 digit integer, starting with non-zero digit - number = random.randint(1000000000, 9999999999) - if nhsNumberValidator(number): - generated.append(number) - return generated + # The NHS numbers are generated in three stages. + # First, generate 8 digits, using numpy.randint (the middle 8 digits) + # Second, generate the check digit portions for each block of 8 digits + # Third, generate 1 digit (the 1st digit) between 1 and 8 + # increase this value by 1 if it is at or above the value which would cause a check digit of 10 + # be aware that this will not produce a fully uniform distribution over NHS numbers + # the distribution will not produce any NHS number with a leading digit (or check digit) of 1 where the + # contribution of the middle 8 digits to the check digit is 0 + # Fourth, generate the check digit from the above values + # Fifth, combine the digits into a number + base_number = rng.integers(0, 9, size=(to_generate, 8), dtype=np.int32) + check_digit_portion = np.vstack(np.dot(base_number, np.arange(9, 1, -1)) % 11) + leading_candidate = rng.integers(1, 8, size=(to_generate, 1), dtype=np.int32) + + # The resulting check digit is x_10 - k, where k is the contribution of the other digits + # Then the check digit would be 10 (invalid) if the leading digit were k+10 mod 11, or equally, k-1 + leading_digit = leading_candidate + (leading_candidate >= check_digit_portion - 1) + check_digit = (leading_digit - check_digit_portion) % 11 + result_digits = np.hstack([leading_digit, base_number, check_digit]) + + result = np.dot(result_digits, 10**np.arange(9, -1, -1, dtype=np.int64)) + + return list(result) From f875ec757c12f7b07ac18e0bd352754a1a406f4e Mon Sep 17 00:00:00 2001 From: Chris Daniels Date: Tue, 17 Mar 2020 15:10:39 +0000 Subject: [PATCH 2/4] Adjust tests to pass --- codonPython/tests/nhsNumber_test.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/codonPython/tests/nhsNumber_test.py b/codonPython/tests/nhsNumber_test.py index c1b966a..7a2e62a 100644 --- a/codonPython/tests/nhsNumber_test.py +++ b/codonPython/tests/nhsNumber_test.py @@ -5,20 +5,32 @@ @pytest.mark.parametrize( "to_generate, random_state, expected", - [(3, 42, [8429141456, 2625792787, 8235363119]), (2, 1, [9598980006, 6597925149])], + [(3, 42, [7065337065, 6104866670, 4417443181]), (2, 1, [6446801785, 4227327237])], ) def test_nhsNumberGenerator_BAU(to_generate, random_state, expected): assert expected == nhsNumberGenerator(to_generate, random_state=random_state) -@pytest.mark.parametrize("to_generate", [4.2, 1000001, -1]) +@pytest.mark.parametrize( + "to_generate, random_state", [(100, 111), (100, None), (100, 999)], +) +def test_nhsNumberGenerator_makesValid(to_generate, random_state): + assert all( + ( + nhsNumberValidator(val) + for val in nhsNumberGenerator(to_generate, random_state=random_state) + ) + ) + + +@pytest.mark.parametrize("to_generate", [4.2, -1]) def test_nhsNumberGenerator_valueErrors(to_generate): with pytest.raises(ValueError): nhsNumberGenerator(to_generate) @pytest.mark.parametrize( - "to_validate, expected", [(9598980006, True), (9598980007, False)] + "to_validate, expected", [(6771116069, True), (9598980007, False)] ) def test_nhsNumberValidator_BAU(to_validate, expected): assert expected == nhsNumberValidator(to_validate) From 923577a13382e88f0df772a66cc7cba553853f4a Mon Sep 17 00:00:00 2001 From: Chris Daniels Date: Tue, 17 Mar 2020 15:10:47 +0000 Subject: [PATCH 3/4] Coerce results to int --- codonPython/nhsNumber.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/codonPython/nhsNumber.py b/codonPython/nhsNumber.py index 58aa157..690735b 100644 --- a/codonPython/nhsNumber.py +++ b/codonPython/nhsNumber.py @@ -91,11 +91,11 @@ def nhsNumberGenerator(to_generate: int, random_state: int = None) -> list: leading_candidate = rng.integers(1, 8, size=(to_generate, 1), dtype=np.int32) # The resulting check digit is x_10 - k, where k is the contribution of the other digits - # Then the check digit would be 10 (invalid) if the leading digit were k+10 mod 11, or equally, k-1 + # Then the check digit would be 10 (invalid) if the leading digit were k+10 mod 11, or equivalently k-1 leading_digit = leading_candidate + (leading_candidate >= check_digit_portion - 1) check_digit = (leading_digit - check_digit_portion) % 11 result_digits = np.hstack([leading_digit, base_number, check_digit]) - result = np.dot(result_digits, 10**np.arange(9, -1, -1, dtype=np.int64)) - - return list(result) + result = np.dot(result_digits, 10 ** np.arange(9, -1, -1, dtype=np.int64)) + + return [int(val) for val in result] From 0e6687e3b792cd55d36d36b367bc8d8e45f6c5c2 Mon Sep 17 00:00:00 2001 From: Chris Daniels Date: Tue, 17 Mar 2020 15:25:14 +0000 Subject: [PATCH 4/4] Update doctest to match result from new algorithm --- codonPython/nhsNumber.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codonPython/nhsNumber.py b/codonPython/nhsNumber.py index 690735b..1d37e19 100644 --- a/codonPython/nhsNumber.py +++ b/codonPython/nhsNumber.py @@ -67,7 +67,7 @@ def nhsNumberGenerator(to_generate: int, random_state: int = None) -> list: Examples --------- >>> nhsNumberGenerator(2, random_state=42) - [8429141456, 2625792787] + [5065337063, 1104866676] """ if not isinstance(to_generate, int):