| #define CHARSET_MAX 41 |
| |
| static const char * |
| getTok(const char **pp) |
| { |
| enum { inAtom, inString, init, inComment }; |
| int state = init; |
| const char *tokStart = 0; |
| for (;;) { |
| switch (**pp) { |
| case '\0': |
| return 0; |
| case ' ': |
| case '\r': |
| case '\t': |
| case '\n': |
| if (state == inAtom) |
| return tokStart; |
| break; |
| case '(': |
| if (state == inAtom) |
| return tokStart; |
| if (state != inString) |
| state++; |
| break; |
| case ')': |
| if (state > init) |
| --state; |
| else if (state != inString) |
| return 0; |
| break; |
| case ';': |
| case '/': |
| case '=': |
| if (state == inAtom) |
| return tokStart; |
| if (state == init) |
| return (*pp)++; |
| break; |
| case '\\': |
| ++*pp; |
| if (**pp == '\0') |
| return 0; |
| break; |
| case '"': |
| switch (state) { |
| case inString: |
| ++*pp; |
| return tokStart; |
| case inAtom: |
| return tokStart; |
| case init: |
| tokStart = *pp; |
| state = inString; |
| break; |
| } |
| break; |
| default: |
| if (state == init) { |
| tokStart = *pp; |
| state = inAtom; |
| } |
| break; |
| } |
| ++*pp; |
| } |
| /* not reached */ |
| } |
| |
| /* key must be lowercase ASCII */ |
| |
| static int |
| matchkey(const char *start, const char *end, const char *key) |
| { |
| if (!start) |
| return 0; |
| for (; start != end; start++, key++) |
| if (*start != *key && *start != 'A' + (*key - 'a')) |
| return 0; |
| return *key == '\0'; |
| } |
| |
| void |
| getXMLCharset(const char *buf, char *charset) |
| { |
| const char *next, *p; |
| |
| charset[0] = '\0'; |
| next = buf; |
| p = getTok(&next); |
| if (matchkey(p, next, "text")) |
| strcpy(charset, "us-ascii"); |
| else if (!matchkey(p, next, "application")) |
| return; |
| p = getTok(&next); |
| if (!p || *p != '/') |
| return; |
| p = getTok(&next); |
| if (matchkey(p, next, "xml")) |
| isXml = 1; |
| p = getTok(&next); |
| while (p) { |
| if (*p == ';') { |
| p = getTok(&next); |
| if (matchkey(p, next, "charset")) { |
| p = getTok(&next); |
| if (p && *p == '=') { |
| p = getTok(&next); |
| if (p) { |
| char *s = charset; |
| if (*p == '"') { |
| while (++p != next - 1) { |
| if (*p == '\\') |
| ++p; |
| if (s == charset + CHARSET_MAX - 1) { |
| charset[0] = '\0'; |
| break; |
| } |
| *s++ = *p; |
| } |
| *s++ = '\0'; |
| } |
| else { |
| if (next - p > CHARSET_MAX - 1) |
| break; |
| while (p != next) |
| *s++ = *p++; |
| *s = 0; |
| break; |
| } |
| } |
| } |
| } |
| } |
| else |
| p = getTok(&next); |
| } |
| } |
| |
| int |
| main(int argc, char **argv) |
| { |
| char buf[CHARSET_MAX]; |
| getXMLCharset(argv[1], buf); |
| printf("charset = \"%s\"\n", buf); |
| return 0; |
| } |