| #!/bin/bash |
| # Check common misspellings |
| # input file format: |
| # word->word1, ... |
| # Source: http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines |
| |
| hunspell=../../src/tools/hunspell |
| hlang=${HUNSPELL:-en_US} |
| alang=${ASPELL:-en_US} |
| input=${INPUT:-List_of_common_misspellings.txt} |
| |
| # remove bad words recognised by Hunspell as good |
| cat $input | sed 's/[-]>/ /' | $hunspell -d $hlang -1 -L | |
| |
| # remove items with dash for Aspell |
| grep '^[^-]* ' | |
| |
| # remove spaces from end of lines |
| sed 's/ *$//' >$input.1 |
| |
| # remove bad words recognised by Aspell as good |
| cut -f 1 -d ' ' $input.1 | aspell -l $alang --list | |
| awk 'FILENAME=="-"{a[$1]=1;next}a[$1]{print$0}' - $input.1 | |
| |
| # change commas with tabs |
| sed 's/, */ /g' >$input.2 |
| |
| # remove lines with unrecognised suggestions (except suggestion with spaces) |
| cut -d ' ' -f 2- $input.2 | tr "\t" "\n" | grep -v ' ' >x.1 |
| cat x.1 | $hunspell -l -d $hlang >x.2 |
| cat x.1 | aspell -l $alang --list >>x.2 |
| cat x.2 | awk 'BEGIN{FS="\t"} |
| FILENAME=="-"{a[$1]=1;next}a[$2]!=1 && a[$3]!=1{print $0}' - $input.2 >$input.3 |
| |
| cut -f 1 -d ' ' $input.3 | aspell -l $alang -a | grep -v ^$ | sed -n '2,$p' | |
| sed 's/^.*: //;s/, / /g' >$input.4 |
| |
| cat $input.3 | $hunspell -d $hlang -a -1 | grep -v ^$ | sed -n '2,$p' | |
| sed 's/^.*: //;s/, / /g' >$input.5 |
| |