$delimiter = '-'; # this line gets added on at the beginning :: latin; # convert other scripts to Latin :: nfkd; # separate base letters from remaining accents # add special rules # map ring (like a-ring) to 'a' \u030A}[:Ll:] → a; [:Lu:]{\u030A → A; \u030A → a; # map umlaut (like a-umlaut) to 'e' \u0308}[:Ll:] → e; [:Lu:]{\u0308 → E; \u0308 → e; # individual cases æ → ae ; Æ}[:Ll:] → Ae ; Æ → AE ; ɓ → b ; Ɓ → B ; Đ}[:Ll:] → Dh ; Đ → DH ; ð → dh ; Ð → D ; Ɗ → D ; đ → d̵ ; ɗ → d̔ ; ə → e ; Ə → E ; ħ → h̵ ; Ħ → H̵ ; ı → i ; Ƙ → K ; ƙ → k̔ ; ĸ → k ; ł → l̷ ; Ł → L̷ ; ø → oe ; Ø}[:Ll:] → Oe ; Ø → OE ; œ → oe ; Œ}[:Ll:] → Oe ; Œ → OE ; ß → ss ; ẞ}[:Ll:] → Ss ; # won't ever occur, but just in case ẞ → SS ; ț → ţ ; Ț → Ţ ; þ → th ; Þ}[:Ll:] → Th ; Þ → TH ; :: [[:m:][:lm:]] remove; # remove accents, 'modifiers' [^-a-zA-Z0-9\r\n]+ > $delimiter; # substituted delimiter for everything else # note that I have \r\n allowed here, for testing