Source code for base32_lib.base32
# -*- coding: utf-8 -*-
#
# This file is part of base32-lib
# Copyright (C) 2019 CERN.
# Copyright (C) 2019 Northwestern University,
# Galter Health Sciences Library & Learning Center.
# base32-lib is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""Generate, encode and decode random base32 identifiers.
This encoder/decoder:
- uses Douglas Crockford Base32 encoding: https://www.crockford.com/base32.html
- allows for ISO 7064 checksum
- encodes the checksum using only characters in the base32 set
(only digits in fact)
- produces string that are URI-friendly (no '=' or '/' for instance)
This is based on:
- https://github.com/datacite/base32-url
- https://github.com/jbittel/base32-crockford
"""
import random
import string
import six
# NO i, l, o or u
ENCODING_CHARS = '0123456789abcdefghjkmnpqrstvwxyz'
DECODING_CHARS = {c: i for i, c in enumerate(ENCODING_CHARS)}
[docs]def encode(number, split_every=0, min_length=0, checksum=False):
"""Encodes `number` to URI-friendly Douglas Crockford base32 string.
:param number: number to encode
:param split_every: if provided, insert '-' every `split_every` characters
going from left to right
:param checksum: append modulo 97-10 (ISO 7064) checksum to string
:returns: A random Douglas Crockford base32 encoded string composed only
of valid URI characters.
"""
assert isinstance(number, six.integer_types)
if number < 0:
raise ValueError("Invalid 'number'. Must be >= 0.")
if split_every < 0:
raise ValueError("Invalid 'split_every'. Must be >= 0.")
encoded = ''
original_number = number
if number == 0:
encoded = '0'
else:
while number > 0:
remainder = number % 32
number //= 32 # quotient of integer division
encoded = ENCODING_CHARS[remainder] + encoded
if checksum:
# NOTE: 100 * original_number is used because datacite also uses it
computed_checksum = 97 - ((100 * original_number) % 97) + 1
encoded_checksum = "{:02d}".format(computed_checksum)
encoded += encoded_checksum
if min_length > 0:
# 0-pad beginning of string to obtain minimum desired length
encoded = encoded.zfill(min_length)
if split_every > 0:
splits = [
encoded[i:i+split_every]
for i in range(0, len(encoded), split_every)
]
encoded = '-'.join(splits)
return encoded
[docs]def generate(length=8, split_every=0, checksum=False):
"""Generate random base32 string.
:param length: non-hyphen identifier length *including* checksum
:param split_every: hyphenates every that many characters
:param checksum: computes and appends ISO-7064 checksum
:returns: identifier as a string
"""
if checksum and length < 3:
raise ValueError(
"Invalid 'length'. Must be >= 3 if checksum enabled."
)
generator = random.SystemRandom()
length_no_checksum = length - 2 if checksum else length
# takes at most length*5 bits to express, but could take less
number = generator.getrandbits(length_no_checksum * 5)
return encode(
number,
split_every=split_every,
min_length=length, # ensures desired length (*including* checksum)
checksum=checksum
)
def normalize(encoded):
"""Returns normalized encoded string.
- string is lowercased
- '-' are removed
- I,i,l,L decodes to the digit 1
- O,o decodes to the digit 0
:param encoded: string to decode
:returns: normalized string.
"""
table = (
''.maketrans('IiLlOo', '111100') if six.PY3 else
string.maketrans('IiLlOo', '111100')
)
encoded = encoded.replace('-', '').translate(table).lower()
if not all([c in ENCODING_CHARS for c in encoded]):
raise ValueError("'encoded' contains undecodable characters.")
return encoded
[docs]def decode(encoded, checksum=False):
"""Decodes `encoded` string (via above) to a number.
The string is normalized before decoding.
If `checksum` is enabled, raises a ValueError on checksum error.
:param encoded: string to decode
:param checksum: extract checksum and validate
:returns: original number.
"""
if checksum:
encoded_checksum = encoded[-2:]
encoded = encoded[:-2]
encoded = normalize(encoded)
number = 0
for i, c in enumerate(reversed(encoded)):
number += DECODING_CHARS[c] * (32**i)
if checksum:
verification_checksum = int(encoded_checksum, 10)
# NOTE: 100 * number is used because datacite also uses it
computed_checksum = 97 - ((100 * number) % 97) + 1
if verification_checksum != computed_checksum:
raise ValueError("Invalid checksum.")
return number