I probably need a perfect hash function. This code seems to do the job:
encoded_reference()
{
local -r yr=$1
local -r seqno=$2
local -ar symbolset=(a b c d e f g h j k m n p q r s t u v w x y z 2 3 4 5 6 7 8 9)
local -a seedset=("${symbolset[@]}")
local -r ln_symbolset=${#symbolset[@]}; # 31
local ln_seedset=${#seedset[@]}
local -A lookup_table=()
for sym in "${symbolset[@]}"
do
pos=$((50 % ln_seedset)); # 50 is just an arbitrary static number
lookup_table+=(["$sym"]=${seedset["$pos"]})
seedset=(${seedset[@]/${seedset[$pos]}}); # remove used elements from the seedset
ln_seedset=${#seedset[@]}
done
local yr_enc=${symbolset[$(((yr / ln_symbolset) % ln_symbolset))]}${symbolset[$(($yr % ln_symbolset))]}
local most_sig_fig=$((seqno / ln_symbolset))
local least_sig_fig=$((seqno % ln_symbolset))
# caution: if the seqno exceeds ln_symbolset², this calculation is out of range
local seq_enc=${lookup_table[${symbolset[$most_sig_fig]}]}${lookup_table[${symbolset[$least_sig_fig]}]}
printf '%s\n' "answer → ${yr_enc}-$seq_enc"
};#encoded_reference
for yr in 2024 2025 2026
do
for seqno in {1..20}
do
encoded_reference "$yr" "$seqno"
done
done
output
answer → js-wy
answer → js-w2
answer → js-w4
answer → js-w6
answer → js-w8
answer → js-wa
answer → js-wd
answer → js-wg
answer → js-wk
answer → js-wp
answer → js-ws
answer → js-wv
answer → js-w3
answer → js-w9
answer → js-we
answer → js-wm
answer → js-wt
answer → js-w5
answer → js-wf
answer → js-wr
answer → jt-wy
answer → jt-w2
answer → jt-w4
answer → jt-w6
answer → jt-w8
answer → jt-wa
answer → jt-wd
answer → jt-wg
answer → jt-wk
answer → jt-wp
answer → jt-ws
answer → jt-wv
answer → jt-w3
answer → jt-w9
answer → jt-we
answer → jt-wm
answer → jt-wt
answer → jt-w5
answer → jt-wf
answer → jt-wr
answer → ju-wy
answer → ju-w2
answer → ju-w4
answer → ju-w6
answer → ju-w8
answer → ju-wa
answer → ju-wd
answer → ju-wg
answer → ju-wk
answer → ju-wp
answer → ju-ws
answer → ju-wv
answer → ju-w3
answer → ju-w9
answer → ju-we
answer → ju-wm
answer → ju-wt
answer → ju-w5
answer → ju-wf
answer → ju-wr
This is close to ideal, but I just thought of another problem: what if a year-seq pair were to derive an encoded number like “fy-ou” or “us-uk” or “sh-it”? A bias that nearly ensures a digit is used would help avoid generating offending words. But I guess I’m getting well into over-engineering territory.
This is the decode function if anyone is interested:
decoded_reference()