#!/bin/awk -f # tavuta: add finnish hyphenation to input BEGIN { mark = "­" # hyphenation mark INIT = 0 VOWEL = 1 CONS = 2 } function vowel(c){ return c ~ "[aeéiouüyåäöAEÉIOUÜYÅÄÖ]" } function letter(c){ return c ~ "[a-zA-ZéüåäöÉÜÅÄÖ]" } { out = "" state = INIT ok = 0 # single vowel first syllable not ok n = split($0, a, "") for(i = 1; i <= n; i++){ c = a[i] if(state == INIT){ out = out c if(vowel(c)) state = VOWEL else if(letter(c)) # word starts with cons ok++ } else if(state == VOWEL){ if(vowel(c)){ out = out c ok++ } else if(letter(c)){ cons = c # consonants state = CONS if(i == n) # last in line out = out c } else { out = out c state = INIT ok = 0 } } else if(vowel(c)){ # state == CONS len = length(cons) out = out \ substr(cons, 1, len-1) \ (ok ? mark : "") \ substr(cons, len) \ c state = VOWEL ok++ } else if(letter(c)){ # state == CONS cons = cons c ok++ if(i == n) # last in line out = out cons } else { # state == CONS out = out cons c state = INIT ok = 0 } } print out }