#!/usr/bin/awk -f
#
# Utility to compare MS-LANGID definitions with those defined in ../../inc/lang.hxx
# Run in tools/source/intntl
#
# outputs new #define LANGUAGE_... 0x... and also some commented out substrings
# that were matched in already existing defines.
#
# Expects input from the saved page of
# http://www.microsoft.com/globaldev/reference/lcid-all.mspx
# filtered through ``html2text -nobs ...'', generated table:
# blank,name,hex,dec fields:
#    |Afrikaans_-_South_Africa____________|0436|1078_|
# Best if file cleaned up to _only_ contain the table entries, but not
# necessary, entries are filtered. Check output.
#
# complete command line:
# lynx -dump -source http://www.microsoft.com/globaldev/reference/lcid-all.mspx | html2text -nobs | awk -f lcid.awk >outfile
#
# Author: Eike Rathke <er@openoffice.org>
#

BEGIN {
    while ((getline < "../../inc/lang.hxx") > 0)
    {
        if ($0 ~ /^#define[ ]*LANGUAGE_[_A-Za-z0-9]*[ ]*0x[0-9a-fA-F]/)
        {
            # lang[HEX]=NAME 
            lang[toupper(substr($3,3))] = toupper($2)
            #print substr($3,3) "=" $2
        }
    }
    # html2text table follows
    FS = "\|"
}

(NF < 5) { next }

($3 !~ /^[0-9a-fA-F][0-9a-fA-F]*$/) { filtered[$3] = $0; next }

# all[HEX]=string
{ all[toupper($3)] = $2 }

# new hex: newlang[HEX]=string
!(toupper($3) in lang) { newlang[toupper($3)] = $2 }

END {
    for (x in newlang)
    {
        split(newlang[x],arr,/[^A-Za-z0-9]/)
        def = ""
        for (a in arr)
        {
            if (length(arr[a]))
            {
                if (def)
                    def = def "_"
                aup = toupper(arr[a])
                def = def aup
                for (l in lang)
                {
                    if (lang[l] ~ aup)
                        printf( "// %-50s %s\n", arr[a] ": " lang[l], l)
                }
            }
        }
        printf( "#define LANGUAGE_%-26s 0x%s\n", def, x)
    }
    print "\n// --- reverse check follows ----------------------------------\n"
    for (x in lang)
    {
        if (!(x in all))
            print "// not in input file:   " x "  " lang[x]
    }
    print "\n// --- filtered table entries follow (if any) -----------------\n"
    for (x in filtered)
        print "// filtered:   " x "  " filtered[x]
}
