Project import
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..8c2f3df
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,102 @@
+#
+# Copyright (c) 2010-2011 Nest, Inc.
+# All rights reserved.
+#
+# This document is the property of Nest. It is considered
+# confidential and proprietary information.
+#
+# This document may not be reproduced or transmitted in any form,
+# in whole or in part, without the express written permission of
+# Nest.
+#
+# Description:
+# This Makefile is used to build lpeg, a parsing expression grammar for Lua
+#
+
+.NOTPARALLEL:
+
+BuildConfigSpecialized := No
+BuildProductSpecialized := No
+EnableShared = Yes
+
+include pre.mak
+
+PackageName := lpeg
+
+PackageExtension := tar.gz
+PackageSeparator := -
+
+PackageArchive := $(PackageName).$(PackageExtension)
+PackageSourceDir := $(PackageName)$(PackageSeparator)$(PackageVersion)
+
+PackageBuildMakefile = $(call GenerateBuildPaths,makefile)
+
+LuaDir := sw/tps/lua
+LuaIncDirs := $(call GenerateResultPaths,$(LuaDir),include)
+LuaLibDir := $(call GenerateResultPaths,$(LuaDir),lib)
+
+CleanPaths += $(PackageLicenseFile)
+
+all: $(PackageDefaultGoal)
+
+# Generate the package license contents.
+
+$(PackageSourceDir)/LICENSE: source
+
+$(PackageLicenseFile): $(PackageSourceDir)/LICENSE
+ $(copy-result)
+
+# Extract the source from the archive and apply patches, if any.
+
+$(PackageSourceDir): $(PackageArchive) $(PackagePatchPaths)
+ $(expand-and-patch-package)
+
+# Prepare the sources.
+
+.PHONY: source
+source: | ${PackageSourceDir}
+
+# Patch the sources, if necessary.
+
+.PHONY: patch
+patch: source
+
+# Generate the package build makefile.
+
+$(PackageBuildMakefile): | $(PackageSourceDir) $(BuildDirectory)
+ $(call create-links,$(CURDIR)/$(PackageSourceDir),$(BuildDirectory))
+
+# Configure the source for building.
+
+.PHONY: configure
+configure: patch | ${PackageBuildMakefile}
+
+# Build the source.
+#
+# We have to unset MAKEFLAGS since they confuse the package build otherwise.
+
+.PHONY: build
+build: configure | $(BuildDirectory)
+ $(Verbose)unset MAKEFLAGS && \
+ $(MAKE) $(JOBSFLAG) -C $(BuildDirectory) \
+ CC="$(CC) -fPIC" LD="$(LD)" RANLIB=$(RANLIB) STRIP=$(STRIP) \
+ INSTALL="$(INSTALL) $(INSTALLFLAGS)" \
+ LUADIR=$(LuaIncDirs) \
+ lpeg.so
+
+# Stage the build to a temporary installation area.
+#
+# We have to unset MAKEFLAGS since they confuse the package build otherwise.
+
+$(ResultDirectory)/lpeg.so: $(BuildDirectory)/lpeg.so | ${ResultDirectory}
+ $(copy-result)
+
+.PHONY: stage
+stage: build $(ResultDirectory)/lpeg.so | $(ResultDirectory)
+
+clean:
+ $(Verbose)$(RM) $(RMFLAGS) -r $(PackageSourceDir)
+ $(Verbose)$(RM) $(RMFLAGS) -r $(BuildDirectory)
+ $(Verbose)$(RM) $(RMFLAGS) -r $(ResultDirectory)
+
+include post.mak
diff --git a/lpeg-0.10.2/HISTORY b/lpeg-0.10.2/HISTORY
new file mode 100644
index 0000000..0b8b757
--- /dev/null
+++ b/lpeg-0.10.2/HISTORY
@@ -0,0 +1,77 @@
+HISTORY for LPeg 0.10
+
+* Changes from version 0.9 to 0.10
+ -------------------------------
+ + backtrack stack has configurable size
+ + better error messages
+ + Notation for non-terminals in 're' back to A instead o <A>
+ + experimental look-behind pattern
+ + support for external extensions
+ + works with Lua 5.2
+ + consumes less C stack
+
+ - "and" predicates do not keep captures
+
+* Changes from version 0.8 to 0.9
+ -------------------------------
+ + The accumulator capture was replaced by a fold capture;
+ programs that used the old 'lpeg.Ca' will need small changes.
+ + Some support for character classes from old C locales.
+ + A new named-group capture.
+
+* Changes from version 0.7 to 0.8
+ -------------------------------
+ + New "match-time" capture.
+ + New "argument capture" that allows passing arguments into the pattern.
+ + Better documentation for 're'.
+ + Several small improvements for 're'.
+ + The 're' module has an incompatibility with previous versions:
+ now, any use of a non-terminal must be enclosed in angle brackets
+ (like <B>).
+
+* Changes from version 0.6 to 0.7
+ -------------------------------
+ + Several improvements in module 're':
+ - better documentation;
+ - support for most captures (all but accumulator);
+ - limited repetitions p{n,m}.
+ + Small improvements in efficiency.
+ + Several small bugs corrected (special thanks to Hans Hagen
+ and Taco Hoekwater).
+
+* Changes from version 0.5 to 0.6
+ -------------------------------
+ + Support for non-numeric indices in grammars.
+ + Some bug fixes (thanks to the luatex team).
+ + Some new optimizations; (thanks to Mike Pall).
+ + A new page layout (thanks to Andre Carregal).
+ + Minimal documentation for module 're'.
+
+* Changes from version 0.4 to 0.5
+ -------------------------------
+ + Several optimizations.
+ + lpeg.P now accepts booleans.
+ + Some new examples.
+ + A proper license.
+ + Several small improvements.
+
+* Changes from version 0.3 to 0.4
+ -------------------------------
+ + Static check for loops in repetitions and grammars.
+ + Removed label option in captures.
+ + The implementation of captures uses less memory.
+
+* Changes from version 0.2 to 0.3
+ -------------------------------
+ + User-defined patterns in Lua.
+ + Several new captures.
+
+* Changes from version 0.1 to 0.2
+ -------------------------------
+ + Several small corrections.
+ + Handles embedded zeros like any other character.
+ + Capture "name" can be any Lua value.
+ + Unlimited number of captures.
+ + Match gets an optional initial position.
+
+(end of HISTORY)
diff --git a/lpeg-0.10.2/lpeg-128.gif b/lpeg-0.10.2/lpeg-128.gif
new file mode 100644
index 0000000..bbf5e78
--- /dev/null
+++ b/lpeg-0.10.2/lpeg-128.gif
Binary files differ
diff --git a/lpeg-0.10.2/lpeg.c b/lpeg-0.10.2/lpeg.c
new file mode 100644
index 0000000..d46d463
--- /dev/null
+++ b/lpeg-0.10.2/lpeg.c
@@ -0,0 +1,2405 @@
+/*
+** $Id: lpeg.c,v 1.114 2011/02/16 15:02:20 roberto Exp $
+** LPeg - PEG pattern matching for Lua
+** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
+** written by Roberto Ierusalimschy
+*/
+
+
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "lua.h"
+#include "lauxlib.h"
+
+#include "lpeg.h"
+
+
+#define VERSION "0.10"
+#define PATTERN_T "lpeg-pattern"
+#define MAXSTACKIDX "lpeg-maxstack"
+
+
+/*
+** compatibility with Lua 5.2
+*/
+#if (LUA_VERSION_NUM == 502)
+
+#undef lua_equal
+#define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ)
+
+#undef lua_getfenv
+#define lua_getfenv lua_getuservalue
+#undef lua_setfenv
+#define lua_setfenv lua_setuservalue
+
+#undef lua_objlen
+#define lua_objlen lua_rawlen
+
+#undef luaL_register
+#define luaL_register(L,n,f) \
+ { if ((n) == NULL) luaL_setfuncs(L,f,0); else luaL_newlib(L,f); }
+
+#endif
+
+
+
+/* initial size for call/backtrack stack */
+#define INITBACK 100
+
+/* default maximum size for call/backtrack stack */
+#define MAXBACK INITBACK
+
+/* size for call/backtrack stack for verifier */
+#define MAXBACKVER 200
+
+/* initial size for capture's list */
+#define INITCAPSIZE 32
+
+
+/* index, on Lua stack, for subject */
+#define SUBJIDX 2
+
+/* number of fixed arguments to 'match' (before capture arguments) */
+#define FIXEDARGS 3
+
+/* index, on Lua stack, for substitution value cache */
+#define subscache(cs) ((cs)->ptop + 1)
+
+/* index, on Lua stack, for capture list */
+#define caplistidx(ptop) ((ptop) + 2)
+
+/* index, on Lua stack, for pattern's fenv */
+#define penvidx(ptop) ((ptop) + 3)
+
+/* index, on Lua stack, for backtracking stack */
+#define stackidx(ptop) ((ptop) + 4)
+
+
+
+typedef unsigned char byte;
+
+
+#define CHARSETSIZE ((UCHAR_MAX/CHAR_BIT) + 1)
+
+
+typedef byte Charset[CHARSETSIZE];
+
+
+/* Virtual Machine's instructions */
+typedef enum Opcode {
+ IAny, IChar, ISet, ISpan,
+ IBack,
+ IRet, IEnd,
+ IChoice, IJmp, ICall, IOpenCall,
+ ICommit, IPartialCommit, IBackCommit, IFailTwice, IFail, IGiveup,
+ IFunc,
+ IFullCapture, IEmptyCapture, IEmptyCaptureIdx,
+ IOpenCapture, ICloseCapture, ICloseRunTime
+} Opcode;
+
+
+#define ISJMP 0x1
+#define ISCHECK 0x2
+#define ISFIXCHECK 0x4
+#define ISNOFAIL 0x8
+#define ISCAPTURE 0x10
+#define ISMOVABLE 0x20
+#define ISFENVOFF 0x40
+
+static const int opproperties[] = {
+ /* IAny */ ISCHECK | ISFIXCHECK | ISJMP,
+ /* IChar */ ISCHECK | ISFIXCHECK | ISJMP,
+ /* ISet */ ISCHECK | ISFIXCHECK | ISJMP,
+ /* ISpan */ ISNOFAIL,
+ /* IBack */ 0,
+ /* IRet */ 0,
+ /* IEnd */ 0,
+ /* IChoice */ ISJMP,
+ /* IJmp */ ISJMP | ISNOFAIL,
+ /* ICall */ ISJMP,
+ /* IOpenCall */ ISFENVOFF,
+ /* ICommit */ ISJMP,
+ /* IPartialCommit */ ISJMP,
+ /* IBackCommit */ ISJMP,
+ /* IFailTwice */ 0,
+ /* IFail */ 0,
+ /* IGiveup */ 0,
+ /* IFunc */ ISCHECK | ISJMP,
+ /* IFullCapture */ ISCAPTURE | ISNOFAIL | ISFENVOFF,
+ /* IEmptyCapture */ ISCAPTURE | ISNOFAIL | ISMOVABLE,
+ /* IEmptyCaptureIdx */ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF,
+ /* IOpenCapture */ ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF,
+ /* ICloseCapture */ ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF,
+ /* ICloseRunTime */ ISCAPTURE | ISFENVOFF
+};
+
+
+typedef union Instruction {
+ struct Inst {
+ byte code;
+ byte aux;
+ short offset;
+ } i;
+ PattFunc f;
+ int iv;
+ byte buff[1];
+} Instruction;
+
+static const Instruction giveup = {{IGiveup, 0, 0}};
+
+#define getkind(op) ((op)->i.aux & 0xF)
+#define getoff(op) (((op)->i.aux >> 4) & 0xF)
+
+#define dest(p,x) ((x) + ((p)+(x))->i.offset)
+
+#define MAXOFF 0xF
+#define MAXAUX 0xFF
+
+/* maximum size (in elements) for a pattern */
+#define MAXPATTSIZE (SHRT_MAX - 10)
+
+
+#define isprop(op,p) (opproperties[(op)->i.code] & (p))
+#define isjmp(op) (isprop(op, ISJMP) && (op)->i.offset != 0)
+#define iscapture(op) isprop(op, ISCAPTURE)
+#define ischeck(op) (isprop(op, ISCHECK) && (op)->i.offset == 0)
+#define isfixcheck(op) (isprop(op, ISFIXCHECK) && (op)->i.offset == 0)
+#define istest(op) (isprop(op, ISCHECK) && (op)->i.offset != 0)
+#define isnofail(op) isprop(op, ISNOFAIL)
+#define ismovable(op) isprop(op, ISMOVABLE)
+#define isfenvoff(op) isprop(op, ISFENVOFF)
+
+
+/* kinds of captures */
+typedef enum CapKind {
+ Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction,
+ Cquery, Cstring, Csubst, Cfold, Cruntime, Cgroup
+} CapKind;
+
+#define iscapnosize(k) ((k) == Cposition || (k) == Cconst)
+
+
+typedef struct Capture {
+ const char *s; /* position */
+ short idx;
+ byte kind;
+ byte siz;
+} Capture;
+
+
+/* size (in elements) for an instruction plus extra l bytes */
+#define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1)
+
+
+/* size (in elements) for a ISet instruction */
+#define CHARSETINSTSIZE instsize(CHARSETSIZE)
+
+/* size (in elements) for a IFunc instruction */
+#define funcinstsize(p) ((p)->i.aux + 2)
+
+
+#define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) b; }
+
+
+#define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7))))
+#define setchar(st,c) ((st)[(c) >> 3] |= (1 << ((c) & 7)))
+
+
+static int target (Instruction *p, int i);
+
+
+static int sizei (const Instruction *i) {
+ switch((Opcode)i->i.code) {
+ case ISet: case ISpan: return CHARSETINSTSIZE;
+ case IFunc: return funcinstsize(i);
+ default: return 1;
+ }
+}
+
+
+static const char *val2str (lua_State *L, int idx) {
+ const char *k = lua_tostring(L, idx);
+ if (k != NULL)
+ return lua_pushfstring(L, "rule '%s'", k);
+ else
+ return lua_pushfstring(L, "rule <a %s>", luaL_typename(L, idx));
+}
+
+
+static int getposition (lua_State *L, int t, int i) {
+ int res;
+ lua_getfenv(L, -1);
+ lua_rawgeti(L, -1, i); /* get key from pattern's environment */
+ lua_gettable(L, t); /* get position from positions table */
+ res = lua_tointeger(L, -1);
+ if (res == 0) { /* key has no registered position? */
+ lua_rawgeti(L, -2, i); /* get key again */
+ return luaL_error(L, "%s is not defined in given grammar", val2str(L, -1));
+ }
+ lua_pop(L, 2); /* remove environment and position */
+ return res;
+}
+
+
+/*
+** {======================================================
+** Printing patterns (for debugging)
+** =======================================================
+*/
+
+
+static void printcharset (const Charset st) {
+ int i;
+ printf("[");
+ for (i = 0; i <= UCHAR_MAX; i++) {
+ int first = i;
+ while (testchar(st, i) && i <= UCHAR_MAX) i++;
+ if (i - 1 == first) /* unary range? */
+ printf("(%02x)", first);
+ else if (i - 1 > first) /* non-empty range? */
+ printf("(%02x-%02x)", first, i - 1);
+ }
+ printf("]");
+}
+
+
+static void printcapkind (int kind) {
+ const char *const modes[] = {
+ "close", "position", "constant", "backref",
+ "argument", "simple", "table", "function",
+ "query", "string", "substitution", "fold",
+ "runtime", "group"};
+ printf("%s", modes[kind]);
+}
+
+
+static void printjmp (const Instruction *op, const Instruction *p) {
+ printf("-> ");
+ if (p->i.offset == 0) printf("FAIL");
+ else printf("%d", (int)(dest(0, p) - op));
+}
+
+
+static void printinst (const Instruction *op, const Instruction *p) {
+ const char *const names[] = {
+ "any", "char", "set", "span", "back",
+ "ret", "end",
+ "choice", "jmp", "call", "open_call",
+ "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
+ "func",
+ "fullcapture", "emptycapture", "emptycaptureidx", "opencapture",
+ "closecapture", "closeruntime"
+ };
+ printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
+ switch ((Opcode)p->i.code) {
+ case IChar: {
+ printf("'%c'", p->i.aux);
+ printjmp(op, p);
+ break;
+ }
+ case IAny: {
+ printf("* %d", p->i.aux);
+ printjmp(op, p);
+ break;
+ }
+ case IFullCapture: case IOpenCapture:
+ case IEmptyCapture: case IEmptyCaptureIdx:
+ case ICloseCapture: case ICloseRunTime: {
+ printcapkind(getkind(p));
+ printf("(n = %d) (off = %d)", getoff(p), p->i.offset);
+ break;
+ }
+ case ISet: {
+ printcharset((p+1)->buff);
+ printjmp(op, p);
+ break;
+ }
+ case ISpan: {
+ printcharset((p+1)->buff);
+ break;
+ }
+ case IOpenCall: {
+ printf("-> %d", p->i.offset);
+ break;
+ }
+ case IChoice: {
+ printjmp(op, p);
+ printf(" (%d)", p->i.aux);
+ break;
+ }
+ case IJmp: case ICall: case ICommit:
+ case IPartialCommit: case IBackCommit: {
+ printjmp(op, p);
+ break;
+ }
+ default: break;
+ }
+ printf("\n");
+}
+
+
+static void printpatt (Instruction *p) {
+ Instruction *op = p;
+ for (;;) {
+ printinst(op, p);
+ if ((Opcode)p->i.code == IEnd) break;
+ p += sizei(p);
+ }
+}
+
+
+#if 0
+static void printcap (Capture *cap) {
+ printcapkind(cap->kind);
+ printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s);
+}
+
+
+static void printcaplist (Capture *cap) {
+ for (; cap->s; cap++) printcap(cap);
+}
+#endif
+
+/* }====================================================== */
+
+
+/*
+** {======================================================
+** Virtual Machine
+** =======================================================
+*/
+
+
+typedef struct Stack {
+ const char *s;
+ const Instruction *p;
+ int caplevel;
+} Stack;
+
+
+#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop)))
+
+
+static int runtimecap (lua_State *L, Capture *close, Capture *ocap,
+ const char *o, const char *s, int ptop);
+
+
+static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) {
+ Capture *newc;
+ if (captop >= INT_MAX/((int)sizeof(Capture) * 2))
+ luaL_error(L, "too many captures");
+ newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture));
+ memcpy(newc, cap, captop * sizeof(Capture));
+ lua_replace(L, caplistidx(ptop));
+ return newc;
+}
+
+
+static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
+ Stack *stack = getstackbase(L, ptop);
+ Stack *newstack;
+ int n = *stacklimit - stack;
+ int max, newn;
+ lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
+ max = lua_tointeger(L, -1);
+ lua_pop(L, 1);
+ if (n >= max)
+ luaL_error(L, "too many pending calls/choices");
+ newn = 2*n; if (newn > max) newn = max;
+ newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
+ memcpy(newstack, stack, n * sizeof(Stack));
+ lua_replace(L, stackidx(ptop));
+ *stacklimit = newstack + newn;
+ return newstack + n;
+
+}
+
+
+static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
+ int i;
+ assert(base[0].kind == Cruntime && base[0].siz == 0);
+ base[0].idx = fd; /* first returned capture */
+ for (i = 1; i < n; i++) { /* add extra captures */
+ base[i].siz = 1; /* mark it as closed */
+ base[i].s = s;
+ base[i].kind = Cruntime;
+ base[i].idx = fd + i; /* stack index */
+ }
+ base[n].kind = Cclose; /* add closing entry */
+ base[n].siz = 1;
+ base[n].s = s;
+}
+
+
+#define condfailed(p) { int f = p->i.offset; if (f) p+=f; else goto fail; }
+
+static const char *match (lua_State *L,
+ const char *o, const char *s, const char *e,
+ Instruction *op, Capture *capture, int ptop) {
+ Stack stackbase[INITBACK];
+ Stack *stacklimit = stackbase + INITBACK;
+ Stack *stack = stackbase; /* point to first empty slot in stack */
+ int capsize = INITCAPSIZE;
+ int captop = 0; /* point to first empty slot in captures */
+ const Instruction *p = op;
+ stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
+ lua_pushlightuserdata(L, stackbase);
+ for (;;) {
+#if defined(DEBUG)
+ printf("s: |%s| stck: %d c: %d ",
+ s, stack - getstackbase(L, ptop), captop);
+ printinst(op, p);
+#endif
+ switch ((Opcode)p->i.code) {
+ case IEnd: {
+ assert(stack == getstackbase(L, ptop) + 1);
+ capture[captop].kind = Cclose;
+ capture[captop].s = NULL;
+ return s;
+ }
+ case IGiveup: {
+ assert(stack == getstackbase(L, ptop));
+ return NULL;
+ }
+ case IRet: {
+ assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL);
+ p = (--stack)->p;
+ continue;
+ }
+ case IAny: {
+ int n = p->i.aux;
+ if (n <= e - s) { p++; s += n; }
+ else condfailed(p);
+ continue;
+ }
+ case IChar: {
+ if ((byte)*s == p->i.aux && s < e) { p++; s++; }
+ else condfailed(p);
+ continue;
+ }
+ case ISet: {
+ int c = (byte)*s;
+ if (testchar((p+1)->buff, c) && s < e)
+ { p += CHARSETINSTSIZE; s++; }
+ else condfailed(p);
+ continue;
+ }
+ case IBack: {
+ int n = p->i.aux;
+ if (n > s - o) goto fail;
+ s -= n; p++;
+ continue;
+ }
+ case ISpan: {
+ for (; s < e; s++) {
+ int c = (byte)*s;
+ if (!testchar((p+1)->buff, c)) break;
+ }
+ p += CHARSETINSTSIZE;
+ continue;
+ }
+ case IFunc: {
+ const char *r = (p+1)->f(s, e, o, (p+2)->buff);
+ if (r != NULL) { s = r; p += funcinstsize(p); }
+ else condfailed(p);
+ continue;
+ }
+ case IJmp: {
+ p += p->i.offset;
+ continue;
+ }
+ case IChoice: {
+ if (stack == stacklimit)
+ stack = doublestack(L, &stacklimit, ptop);
+ stack->p = dest(0, p);
+ stack->s = s - p->i.aux;
+ stack->caplevel = captop;
+ stack++;
+ p++;
+ continue;
+ }
+ case ICall: {
+ if (stack == stacklimit)
+ stack = doublestack(L, &stacklimit, ptop);
+ stack->s = NULL;
+ stack->p = p + 1; /* save return address */
+ stack++;
+ p += p->i.offset;
+ continue;
+ }
+ case ICommit: {
+ assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
+ stack--;
+ p += p->i.offset;
+ continue;
+ }
+ case IPartialCommit: {
+ assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
+ (stack - 1)->s = s;
+ (stack - 1)->caplevel = captop;
+ p += p->i.offset;
+ continue;
+ }
+ case IBackCommit: {
+ assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
+ s = (--stack)->s;
+ captop = stack->caplevel;
+ p += p->i.offset;
+ continue;
+ }
+ case IFailTwice:
+ assert(stack > getstackbase(L, ptop));
+ stack--;
+ /* go through */
+ case IFail:
+ fail: { /* pattern failed: try to backtrack */
+ do { /* remove pending calls */
+ assert(stack > getstackbase(L, ptop));
+ s = (--stack)->s;
+ } while (s == NULL);
+ captop = stack->caplevel;
+ p = stack->p;
+ continue;
+ }
+ case ICloseRunTime: {
+ int fr = lua_gettop(L) + 1; /* stack index of first result */
+ int ncap = runtimecap(L, capture + captop, capture, o, s, ptop);
+ lua_Integer res = lua_tointeger(L, fr) - 1; /* offset */
+ int n = lua_gettop(L) - fr; /* number of new captures */
+ if (res == -1) { /* may not be a number */
+ if (!lua_toboolean(L, fr)) { /* false value? */
+ lua_settop(L, fr - 1); /* remove results */
+ goto fail; /* and fail */
+ }
+ else if (lua_isboolean(L, fr)) /* true? */
+ res = s - o; /* keep current position */
+ }
+ if (res < s - o || res > e - o)
+ luaL_error(L, "invalid position returned by match-time capture");
+ s = o + res; /* update current position */
+ captop -= ncap; /* remove nested captures */
+ lua_remove(L, fr); /* remove first result (offset) */
+ if (n > 0) { /* captures? */
+ if ((captop += n + 1) >= capsize) {
+ capture = doublecap(L, capture, captop, ptop);
+ capsize = 2 * captop;
+ }
+ adddyncaptures(s, capture + captop - n - 1, n, fr);
+ }
+ p++;
+ continue;
+ }
+ case ICloseCapture: {
+ const char *s1 = s - getoff(p);
+ assert(captop > 0);
+ if (capture[captop - 1].siz == 0 &&
+ s1 - capture[captop - 1].s < UCHAR_MAX) {
+ capture[captop - 1].siz = s1 - capture[captop - 1].s + 1;
+ p++;
+ continue;
+ }
+ else {
+ capture[captop].siz = 1; /* mark entry as closed */
+ goto capture;
+ }
+ }
+ case IEmptyCapture: case IEmptyCaptureIdx:
+ capture[captop].siz = 1; /* mark entry as closed */
+ goto capture;
+ case IOpenCapture:
+ capture[captop].siz = 0; /* mark entry as open */
+ goto capture;
+ case IFullCapture:
+ capture[captop].siz = getoff(p) + 1; /* save capture size */
+ capture: {
+ capture[captop].s = s - getoff(p);
+ capture[captop].idx = p->i.offset;
+ capture[captop].kind = getkind(p);
+ if (++captop >= capsize) {
+ capture = doublecap(L, capture, captop, ptop);
+ capsize = 2 * captop;
+ }
+ p++;
+ continue;
+ }
+ case IOpenCall: {
+ lua_rawgeti(L, penvidx(ptop), p->i.offset);
+ luaL_error(L, "reference to %s outside a grammar", val2str(L, -1));
+ }
+ default: assert(0); return NULL;
+ }
+ }
+}
+
+/* }====================================================== */
+
+
+/*
+** {======================================================
+** Verifier
+** =======================================================
+*/
+
+
+/*
+** check whether pattern may go from 'p' to 'e' without consuming any
+** input. Raises an error if it detects a left recursion. 'op' points
+** the beginning of the pattern. If pattern belongs to a grammar,
+** 'rule' is the stack index where is its corresponding key (only for
+** error messages) and 'posttable' is the stack index with a table
+** mapping rule keys to the position of their code in the pattern.
+*/
+static int verify (lua_State *L, Instruction *op, const Instruction *p,
+ Instruction *e, int postable, int rule) {
+ static const char dummy[] = "";
+ Stack back[MAXBACKVER];
+ int backtop = 0; /* point to first empty slot in back */
+ while (p != e) {
+ switch ((Opcode)p->i.code) {
+ case IRet: {
+ p = back[--backtop].p;
+ continue;
+ }
+ case IChoice: {
+ if (backtop >= MAXBACKVER)
+ return luaL_error(L, "too many pending calls/choices");
+ back[backtop].p = dest(0, p);
+ back[backtop++].s = dummy;
+ p++;
+ continue;
+ }
+ case ICall: {
+ assert((p + 1)->i.code != IRet); /* no tail call */
+ if (backtop >= MAXBACKVER)
+ return luaL_error(L, "too many pending calls/choices");
+ back[backtop].s = NULL;
+ back[backtop++].p = p + 1;
+ goto dojmp;
+ }
+ case IOpenCall: {
+ int i;
+ if (postable == 0) /* grammar still not fixed? */
+ goto fail; /* to be verified later */
+ for (i = 0; i < backtop; i++) {
+ if (back[i].s == NULL && back[i].p == p + 1)
+ return luaL_error(L, "%s is left recursive", val2str(L, rule));
+ }
+ if (backtop >= MAXBACKVER)
+ return luaL_error(L, "too many pending calls/choices");
+ back[backtop].s = NULL;
+ back[backtop++].p = p + 1;
+ p = op + getposition(L, postable, p->i.offset);
+ continue;
+ }
+ case IBackCommit:
+ case ICommit: {
+ assert(backtop > 0 && p->i.offset > 0);
+ backtop--;
+ goto dojmp;
+ }
+ case IPartialCommit: {
+ assert(backtop > 0);
+ if (p->i.offset > 0) goto dojmp; /* forward jump */
+ else { /* loop will be detected when checking corresponding rule */
+ assert(postable != 0);
+ backtop--;
+ p++; /* just go on now */
+ continue;
+ }
+ }
+ case IBack: {
+ if (p->i.aux == 1 && isfixcheck(p + 1)) { /* char test? */
+ p++; /* skip back instruction */
+ p += sizei(p); /* skip char test */
+ }
+ else { /* standard lookbehind code */
+ assert((Opcode)(p - 1)->i.code == IChoice); /* look behind */
+ backtop--;
+ p += (p - 1)->i.offset;
+ assert((Opcode)(p - 1)->i.code == IFail); /* look behind */
+ }
+ continue;
+ }
+ case IAny:
+ case IChar:
+ case ISet: {
+ const Instruction *next = p + sizei(p);
+ if ((Opcode)next->i.code == IBack)
+ p = next + 1; /* continue after the back instruction */
+ else if (p->i.offset == 0) goto fail;
+ else /* jump */
+ p += p->i.offset;
+ continue;
+ }
+ case IJmp:
+ dojmp: {
+ p += p->i.offset;
+ continue;
+ }
+ case IFailTwice: /* 'not' predicate */
+ goto fail; /* body could have failed; try to backtrack it */
+ case IFail: {
+ if (p > op && (p - 1)->i.code == IBackCommit) { /* 'and' predicate? */
+ p++; /* pretend it succeeded and go ahead */
+ continue;
+ }
+ /* else failed: go through */
+ }
+ fail: { /* pattern failed: try to backtrack */
+ do {
+ if (backtop-- == 0)
+ return 1; /* no more backtracking */
+ } while (back[backtop].s == NULL);
+ p = back[backtop].p;
+ continue;
+ }
+ case ISpan:
+ case IOpenCapture: case ICloseCapture:
+ case IEmptyCapture: case IEmptyCaptureIdx:
+ case IFullCapture: {
+ p += sizei(p);
+ continue;
+ }
+ case ICloseRunTime: {
+ goto fail; /* be liberal in this case */
+ }
+ case IFunc: {
+ const char *r = (p+1)->f(dummy, dummy, dummy, (p+2)->buff);
+ if (r != NULL) { p += funcinstsize(p); }
+ else condfailed(p);
+ continue;
+ }
+ case IEnd: /* cannot happen (should stop before it) */
+ default: assert(0); return 0;
+ }
+ }
+ assert(backtop == 0);
+ return 0;
+}
+
+
+static void checkrule (lua_State *L, Instruction *op, int from, int to,
+ int postable, int rule) {
+ int i;
+ int lastopen = 0; /* more recent OpenCall seen in the code */
+ for (i = from; i < to; i += sizei(op + i)) {
+ if (op[i].i.code == IPartialCommit && op[i].i.offset < 0) { /* loop? */
+ int start = dest(op, i);
+ assert(op[start - 1].i.code == IChoice &&
+ dest(op, start - 1) == target(op, i + 1));
+ if (start <= lastopen) { /* loop does contain an open call? */
+ if (!verify(L, op, op + start, op + i, postable, rule)) /* check body */
+ luaL_error(L, "possible infinite loop in %s", val2str(L, rule));
+ }
+ }
+ else if (op[i].i.code == IOpenCall)
+ lastopen = i;
+ }
+ assert(op[i - 1].i.code == IRet);
+ verify(L, op, op + from, op + to - 1, postable, rule);
+}
+
+
+
+
+/* }====================================================== */
+
+
+/*
+** {======================================================
+** Building Patterns
+** =======================================================
+*/
+
+enum charsetanswer { NOINFO, ISCHARSET, VALIDSTARTS };
+
+typedef struct CharsetTag {
+ enum charsetanswer tag;
+ Charset cs;
+} CharsetTag;
+
+
+static Instruction *getpatt (lua_State *L, int idx, int *size);
+
+
+static void check2test (Instruction *p, int n) {
+ assert(ischeck(p) && n != 0);
+ p->i.offset = n;
+}
+
+
+/*
+** invert array slice p[0]-p[e] (both inclusive)
+*/
+static void invert (Instruction *p, int e) {
+ int i;
+ for (i = 0; i < e; i++, e--) {
+ Instruction temp = p[i];
+ p[i] = p[e];
+ p[e] = temp;
+ }
+}
+
+
+/*
+** rotate array slice p[0]-p[e] (both inclusive) 'n' steps
+** to the 'left'
+*/
+static void rotate (Instruction *p, int e, int n) {
+ invert(p, n - 1);
+ invert(p + n, e - n);
+ invert(p, e);
+}
+
+
+#define op_step(p) ((p)->i.code == IAny ? (p)->i.aux : 1)
+
+
+static int skipchecks (Instruction *p, int up, int *pn) {
+ int i, n = 0;
+ for (i = 0; isfixcheck(p + i); i += sizei(p + i)) {
+ int st = op_step(p + i);
+ if (n + st > MAXOFF - up) break;
+ n += st;
+ }
+ *pn = n;
+ return i;
+}
+
+
+#define ismovablecap(op) (ismovable(op) && getoff(op) < MAXOFF)
+
+static void optimizecaptures (Instruction *p) {
+ int i;
+ int limit = 0;
+ for (i = 0; p[i].i.code != IEnd; i += sizei(p + i)) {
+ if (isjmp(p + i) && dest(p, i) >= limit)
+ limit = dest(p, i) + 1; /* do not optimize jump targets */
+ else if (i >= limit && ismovablecap(p + i) && isfixcheck(p + i + 1)) {
+ int end, n, j; /* found a border capture|check */
+ int maxoff = getoff(p + i);
+ int start = i;
+ /* find first capture in the group */
+ while (start > limit && ismovablecap(p + start - 1)) {
+ start--;
+ if (getoff(p + start) > maxoff) maxoff = getoff(p + start);
+ }
+ end = skipchecks(p + i + 1, maxoff, &n) + i; /* find last check */
+ if (n == 0) continue; /* first check is too big to move across */
+ assert(n <= MAXOFF && start <= i && i < end);
+ for (j = start; j <= i; j++)
+ p[j].i.aux += (n << 4); /* correct offset of captures to be moved */
+ rotate(p + start, end - start, i - start + 1); /* move them up */
+ i = end;
+ assert(isfixcheck(p + start) && iscapture(p + i));
+ }
+ }
+}
+
+
+static int target (Instruction *p, int i) {
+ while (p[i].i.code == IJmp) i += p[i].i.offset;
+ return i;
+}
+
+
+static void optimizejumps (Instruction *p) {
+ int i;
+ for (i = 0; p[i].i.code != IEnd; i += sizei(p + i)) {
+ if (isjmp(p + i))
+ p[i].i.offset = target(p, dest(p, i)) - i;
+ }
+}
+
+
+static void optimizechoice (Instruction *p) {
+ assert(p->i.code == IChoice);
+ if (isfixcheck(p + 1)) {
+ int lc = sizei(p + 1);
+ rotate(p, lc, 1);
+ assert(isfixcheck(p) && (p + lc)->i.code == IChoice);
+ (p + lc)->i.aux = op_step(p);
+ check2test(p, (p + lc)->i.offset);
+ (p + lc)->i.offset -= lc;
+ }
+}
+
+
+/*
+** A 'headfail' pattern is a pattern that can only fails in its first
+** instruction, which must be a check.
+*/
+static int isheadfail (Instruction *p) {
+ if (!ischeck(p)) return 0;
+ /* check that other operations cannot fail */
+ for (p += sizei(p); p->i.code != IEnd; p += sizei(p))
+ if (!isnofail(p)) return 0;
+ return 1;
+}
+
+
+#define checkpattern(L, idx) ((Instruction *)luaL_checkudata(L, idx, PATTERN_T))
+
+
+/*
+** Return the number of elements in the ktable of a pattern.
+** in Lua 5.2, default "environment" for patterns is nil, not
+** a table. Treat it as an empty table.
+*/
+static int ktablelen (lua_State *L, int idx) {
+ if (!lua_istable(L, idx)) return 0;
+ else return lua_objlen(L, idx);
+}
+
+
+/*
+** join the elements of the ktable from pattern 'p1' into the ktable of
+** the pattern at the top of the stack ('p'). If 'p1' has no elements,
+** 'p' keeps its original ktable. If 'p' has no elements, it shares
+** 'p1' ktable. Otherwise, this function creates a new ktable for 'p'.
+** Return the offset of original 'p' elements in the new ktable.
+*/
+static int jointable (lua_State *L, int p1) {
+ int n, n1, i;
+ lua_getfenv(L, p1);
+ n1 = ktablelen(L, -1); /* number of elements in p1's env */
+ lua_getfenv(L, -2);
+ if (n1 == 0 || lua_equal(L, -2, -1)) {
+ lua_pop(L, 2);
+ return 0; /* no need to change anything */
+ }
+ n = ktablelen(L, -1); /* number of elements in p's env */
+ if (n == 0) {
+ lua_pop(L, 1); /* removes p env */
+ lua_setfenv(L, -2); /* p now shares p1's env */
+ return 0; /* no need to correct anything */
+ }
+ lua_createtable(L, n + n1, 0);
+ /* stack: p; p1 env; p env; new p env */
+ for (i = 1; i <= n; i++) {
+ lua_rawgeti(L, -2, i);
+ lua_rawseti(L, -2, i);
+ }
+ for (i = 1; i <= n1; i++) {
+ lua_rawgeti(L, -3, i);
+ lua_rawseti(L, -2, n + i);
+ }
+ lua_setfenv(L, -4); /* new table becomes p env */
+ lua_pop(L, 2); /* remove p1 env and old p env */
+ return n;
+}
+
+
+#define copypatt(p1,p2,sz) memcpy(p1, p2, (sz) * sizeof(Instruction));
+
+#define pattsize(L,idx) (lua_objlen(L, idx)/sizeof(Instruction) - 1)
+
+
+static int addpatt (lua_State *L, Instruction *p, int p1idx) {
+ Instruction *p1 = (Instruction *)lua_touserdata(L, p1idx);
+ int sz = pattsize(L, p1idx);
+ int corr = jointable(L, p1idx);
+ copypatt(p, p1, sz + 1);
+ if (corr != 0) {
+ Instruction *px;
+ for (px = p; px < p + sz; px += sizei(px)) {
+ if (isfenvoff(px) && px->i.offset != 0)
+ px->i.offset += corr;
+ }
+ }
+ return sz;
+}
+
+
+static void setinstaux (Instruction *i, Opcode op, int offset, int aux) {
+ assert(aux <= MAXAUX);
+ i->i.code = op;
+ i->i.offset = offset;
+ i->i.aux = aux;
+}
+
+#define setinst(i,op,off) setinstaux(i,op,off,0)
+
+#define setinstcap(i,op,idx,k,n) setinstaux(i,op,idx,((k) | ((n) << 4)))
+
+
+/*
+** create a new ktable for pattern at the stack top, mapping
+** '1' to the value at stack position 'vidx'.
+*/
+static int value2fenv (lua_State *L, int vidx) {
+ lua_createtable(L, 1, 0);
+ lua_pushvalue(L, vidx);
+ lua_rawseti(L, -2, 1);
+ lua_setfenv(L, -2);
+ return 1;
+}
+
+
+static Instruction *newpatt (lua_State *L, size_t n) {
+ Instruction *p;
+ if (n >= MAXPATTSIZE - 1)
+ luaL_error(L, "pattern too big");
+ p = (Instruction *)lua_newuserdata(L, (n + 1) * sizeof(Instruction));
+ luaL_getmetatable(L, PATTERN_T);
+ lua_setmetatable(L, -2);
+ setinst(p + n, IEnd, 0);
+ return p;
+}
+
+
+static void fillcharset (Instruction *p, Charset cs) {
+ switch (p[0].i.code) {
+ case ISet: {
+ loopset(i, cs[i] = p[1].buff[i]);
+ break;
+ }
+ case IChar: {
+ loopset(i, cs[i] = 0);
+ setchar(cs, p[0].i.aux);
+ break;
+ }
+ default: { /* any char may start unhandled instructions */
+ loopset(i, cs[i] = 0xff);
+ break;
+ }
+ }
+}
+
+
+/*
+** Function 'tocharset' gets information about which chars may be a
+** valid start for a pattern.
+*/
+
+static enum charsetanswer tocharset (Instruction *p, CharsetTag *c) {
+ if (isfixcheck(p)) {
+ fillcharset(p, c->cs);
+ if ((p + sizei(p))->i.code == IEnd && op_step(p) == 1)
+ c->tag = ISCHARSET;
+ else
+ c->tag = VALIDSTARTS;
+ }
+ else
+ c->tag = NOINFO;
+ return c->tag;
+}
+
+
+static int exclusiveset (Charset c1, Charset c2) {
+ /* non-empty intersection? */
+ loopset(i, {if ((c1[i] & c2[i]) != 0) return 0;});
+ return 1; /* no intersection */
+}
+
+
+static int exclusive (CharsetTag *c1, CharsetTag *c2) {
+ if (c1->tag == NOINFO || c2->tag == NOINFO)
+ return 0; /* one of them is not filled */
+ else return exclusiveset(c1->cs, c2->cs);
+}
+
+
+static Instruction *newcharset (lua_State *L) {
+ Instruction *p = newpatt(L, CHARSETINSTSIZE);
+ p[0].i.code = ISet;
+ p[0].i.offset = 0;
+ loopset(i, p[1].buff[i] = 0);
+ return p;
+}
+
+
+static int set_l (lua_State *L) {
+ size_t l;
+ const char *s = luaL_checklstring(L, 1, &l);
+ if (l == 1)
+ getpatt(L, 1, NULL); /* a unit set is equivalent to a literal */
+ else {
+ Instruction *p = newcharset(L);
+ while (l--) {
+ setchar(p[1].buff, (byte)(*s));
+ s++;
+ }
+ }
+ return 1;
+}
+
+
+static int range_l (lua_State *L) {
+ int arg;
+ int top = lua_gettop(L);
+ Instruction *p = newcharset(L);
+ for (arg = 1; arg <= top; arg++) {
+ int c;
+ size_t l;
+ const char *r = luaL_checklstring(L, arg, &l);
+ luaL_argcheck(L, l == 2, arg, "range must have two characters");
+ for (c = (byte)r[0]; c <= (byte)r[1]; c++)
+ setchar(p[1].buff, c);
+ }
+ return 1;
+}
+
+
+static int nter_l (lua_State *L) {
+ Instruction *p;
+ luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected");
+ p = newpatt(L, 1);
+ setinst(p, IOpenCall, value2fenv(L, 1));
+ return 1;
+}
+
+
+
+static int testpattern (lua_State *L, int idx) {
+ if (lua_touserdata(L, idx)) { /* value is a userdata? */
+ if (lua_getmetatable(L, idx)) { /* does it have a metatable? */
+ luaL_getmetatable(L, PATTERN_T);
+ if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */
+ lua_pop(L, 2); /* remove both metatables */
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+
+static Instruction *fix_l (lua_State *L, int t) {
+ Instruction *p;
+ int i;
+ int totalsize = 2; /* include initial call and jump */
+ int n = 0; /* number of rules */
+ int base = lua_gettop(L);
+ lua_newtable(L); /* to store relative positions of each rule */
+ lua_pushinteger(L, 1); /* default initial rule */
+ /* collect patterns and compute sizes */
+ lua_pushnil(L);
+ while (lua_next(L, t) != 0) {
+ int l;
+ if (lua_tonumber(L, -2) == 1 && lua_isstring(L, -1)) {
+ lua_replace(L, base + 2); /* use this value as initial rule */
+ continue;
+ }
+ if (!testpattern(L, -1))
+ luaL_error(L, "%s is not a pattern", val2str(L, -2));
+ l = pattsize(L, -1) + 1; /* space for pattern + ret */
+ if (totalsize >= MAXPATTSIZE - l)
+ luaL_error(L, "grammar too large");
+ luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules");
+ lua_insert(L, -2); /* put key on top */
+ lua_pushvalue(L, -1); /* duplicate key (for lua_next) */
+ lua_pushvalue(L, -1); /* duplicate key (to index positions table)) */
+ lua_pushinteger(L, totalsize); /* position for this rule */
+ lua_settable(L, base + 1); /* store key=>position in positions table */
+ totalsize += l;
+ n++;
+ }
+ luaL_argcheck(L, n > 0, t, "empty grammar");
+ p = newpatt(L, totalsize); /* create new pattern */
+ p++; /* save space for call */
+ setinst(p++, IJmp, totalsize - 1); /* after call, jumps to the end */
+ for (i = 1; i <= n; i++) { /* copy all rules into new pattern */
+ p += addpatt(L, p, base + 1 + i*2);
+ setinst(p++, IRet, 0);
+ }
+ p -= totalsize; /* back to first position */
+ totalsize = 2; /* go through each rule's position */
+ for (i = 1; i <= n; i++) { /* check all rules */
+ int l = pattsize(L, base + 1 + i*2) + 1;
+ checkrule(L, p, totalsize, totalsize + l, base + 1, base + 2 + i*2);
+ totalsize += l;
+ }
+ lua_pushvalue(L, base + 2); /* get initial rule */
+ lua_gettable(L, base + 1); /* get its position in postions table */
+ i = lua_tonumber(L, -1); /* convert to number */
+ lua_pop(L, 1);
+ if (i == 0) /* is it defined? */
+ luaL_error(L, "initial rule not defined in given grammar");
+ setinst(p, ICall, i); /* first instruction calls initial rule */
+ /* correct calls */
+ for (i = 0; i < totalsize; i += sizei(p + i)) {
+ if (p[i].i.code == IOpenCall) {
+ int pos = getposition(L, base + 1, p[i].i.offset);
+ p[i].i.code = (p[target(p, i + 1)].i.code == IRet) ? IJmp : ICall;
+ p[i].i.offset = pos - i;
+ }
+ }
+ optimizejumps(p);
+ lua_replace(L, t); /* put new pattern in old's position */
+ lua_settop(L, base); /* remove rules and positions table */
+ return p;
+}
+
+
+static Instruction *any (lua_State *L, int n, int extra, int *offsetp) {
+ int offset = offsetp ? *offsetp : 0;
+ Instruction *p = newpatt(L, (n - 1)/UCHAR_MAX + extra + 1);
+ Instruction *p1 = p + offset;
+ for (; n > UCHAR_MAX; n -= UCHAR_MAX)
+ setinstaux(p1++, IAny, 0, UCHAR_MAX);
+ setinstaux(p1++, IAny, 0, n);
+ if (offsetp) *offsetp = p1 - p;
+ return p;
+}
+
+
+static Instruction *getpatt (lua_State *L, int idx, int *size) {
+ Instruction *p;
+ switch (lua_type(L, idx)) {
+ case LUA_TSTRING: {
+ size_t i, len;
+ const char *s = lua_tolstring(L, idx, &len);
+ p = newpatt(L, len);
+ for (i = 0; i < len; i++)
+ setinstaux(p + i, IChar, 0, (byte)s[i]);
+ lua_replace(L, idx);
+ break;
+ }
+ case LUA_TNUMBER: {
+ int n = lua_tointeger(L, idx);
+ if (n == 0) /* empty pattern? */
+ p = newpatt(L, 0);
+ else if (n > 0)
+ p = any(L, n, 0, NULL);
+ else if (-n <= UCHAR_MAX) {
+ p = newpatt(L, 2);
+ setinstaux(p, IAny, 2, -n);
+ setinst(p + 1, IFail, 0);
+ }
+ else {
+ int offset = 2; /* space for ITestAny & IChoice */
+ p = any(L, -n - UCHAR_MAX, 3, &offset);
+ setinstaux(p, IAny, offset + 1, UCHAR_MAX);
+ setinstaux(p + 1, IChoice, offset, UCHAR_MAX);
+ setinst(p + offset, IFailTwice, 0);
+ }
+ lua_replace(L, idx);
+ break;
+ }
+ case LUA_TBOOLEAN: {
+ if (lua_toboolean(L, idx)) /* true? */
+ p = newpatt(L, 0); /* empty pattern (always succeeds) */
+ else {
+ p = newpatt(L, 1);
+ setinst(p, IFail, 0);
+ }
+ lua_replace(L, idx);
+ break;
+ }
+ case LUA_TTABLE: {
+ p = fix_l(L, idx);
+ break;
+ }
+ case LUA_TFUNCTION: {
+ p = newpatt(L, 2);
+ setinstcap(p, IOpenCapture, value2fenv(L, idx), Cruntime, 0);
+ setinstcap(p + 1, ICloseRunTime, 0, Cclose, 0);
+ lua_replace(L, idx);
+ break;
+ }
+ default: {
+ p = checkpattern(L, idx);
+ break;
+ }
+ }
+ if (size) *size = pattsize(L, idx);
+ return p;
+}
+
+
+static int getpattl (lua_State *L, int idx) {
+ int size;
+ getpatt(L, idx, &size);
+ return size;
+}
+
+
+static int pattern_l (lua_State *L) {
+ lua_settop(L, 1);
+ getpatt(L, 1, NULL);
+ return 1;
+}
+
+
+#define isany(p) ((p)->i.code == IAny && ((p) + 1)->i.code == IEnd)
+#define isfail(p) ((p)->i.code == IFail)
+#define issucc(p) ((p)->i.code == IEnd)
+
+static int concat_l (lua_State *L) {
+ /* p1; p2; */
+ int l1, l2;
+ Instruction *p1 = getpatt(L, 1, &l1);
+ Instruction *p2 = getpatt(L, 2, &l2);
+ if (isfail(p1) || issucc(p2))
+ lua_pushvalue(L, 1); /* fail * x == fail; x * true == x */
+ else if (isfail(p2) || issucc(p1))
+ lua_pushvalue(L, 2); /* x * fail == fail; true * x == x */
+ else if (isany(p1) && isany(p2))
+ any(L, p1->i.aux + p2->i.aux, 0, NULL);
+ else {
+ Instruction *op = newpatt(L, l1 + l2);
+ Instruction *p = op + addpatt(L, op, 1);
+ addpatt(L, p, 2);
+ optimizecaptures(op);
+ }
+ return 1;
+}
+
+
+static int diff_l (lua_State *L) {
+ int l1, l2;
+ Instruction *p1 = getpatt(L, 1, &l1);
+ Instruction *p2 = getpatt(L, 2, &l2);
+ CharsetTag st1, st2;
+ if (tocharset(p1, &st1) == ISCHARSET && tocharset(p2, &st2) == ISCHARSET) {
+ Instruction *p = newcharset(L);
+ loopset(i, p[1].buff[i] = st1.cs[i] & ~st2.cs[i]);
+ }
+ else if (isheadfail(p2)) {
+ Instruction *p = newpatt(L, l2 + 1 + l1);
+ p += addpatt(L, p, 2);
+ check2test(p - l2, l2 + 1);
+ setinst(p++, IFail, 0);
+ addpatt(L, p, 1);
+ }
+ else { /* !e2 . e1 */
+ /* !e -> choice L1; e; failtwice; L1: ... */
+ Instruction *p = newpatt(L, 1 + l2 + 1 + l1);
+ Instruction *pi = p;
+ setinst(p++, IChoice, 1 + l2 + 1);
+ p += addpatt(L, p, 2);
+ setinst(p++, IFailTwice, 0);
+ addpatt(L, p, 1);
+ optimizechoice(pi);
+ }
+ return 1;
+}
+
+
+static int unm_l (lua_State *L) {
+ Instruction *p = getpatt(L, 1, NULL);
+ if (isfail(p)) { /* -false? */
+ newpatt(L, 0); /* true */
+ return 1;
+ }
+ else if (issucc(p)) { /* -true? */
+ Instruction *p1 = newpatt(L, 1); /* false */
+ setinst(p1, IFail, 0);
+ return 1;
+ }
+ else { /* -A == '' - A */
+ lua_pushliteral(L, "");
+ lua_insert(L, 1);
+ return diff_l(L);
+ }
+}
+
+
+static int pattand_l (lua_State *L) {
+ int l1;
+ CharsetTag st1;
+ Instruction *p1 = getpatt(L, 1, &l1);
+ if (isfail(p1) || issucc(p1))
+ lua_pushvalue(L, 1); /* &fail == fail; &true == true */
+ else if (tocharset(p1, &st1) == ISCHARSET) {
+ Instruction *p = newpatt(L, l1 + 1);
+ copypatt(p, p1, l1); p += l1;
+ setinstaux(p, IBack, 0, 1);
+ }
+ else { /* Choice L1; p1; BackCommit L2; L1: Fail; L2: */
+ Instruction *p = newpatt(L, 1 + l1 + 2);
+ setinst(p++, IChoice, 1 + l1 + 1);
+ p += addpatt(L, p, 1);
+ setinst(p++, IBackCommit, 2);
+ setinst(p, IFail, 0);
+ }
+ return 1;
+}
+
+
+static int nocalls (const Instruction *p) {
+ for (; (Opcode)p->i.code != IEnd; p += sizei(p))
+ if ((Opcode)p->i.code == IOpenCall) return 0;
+ return 1;
+}
+
+
+static int pattbehind (lua_State *L) {
+ int l1;
+ CharsetTag st1;
+ Instruction *p1 = getpatt(L, 1, &l1);
+ int n = luaL_optint(L, 2, 1);
+ luaL_argcheck(L, n <= MAXAUX, 2, "lookbehind delta too large");
+ if (!nocalls(p1))
+ luaL_error(L, "lookbehind pattern cannot contain non terminals");
+ if (isfail(p1) || issucc(p1))
+ lua_pushvalue(L, 1); /* <fail == fail; <true == true */
+ else if (n == 1 && tocharset(p1, &st1) == ISCHARSET) {
+ Instruction *p = newpatt(L, 1 + l1);
+ setinstaux(p, IBack, 0, 1); p++;
+ copypatt(p, p1, l1);
+ }
+ else { /* Choice L1; Back; p1; BackCommit L2; L1: fail; L2: */
+ Instruction *p = newpatt(L, 2 + l1 + 2);
+ setinst(p++, IChoice, 2 + l1 + 1);
+ setinstaux(p++, IBack, 0, n);
+ p += addpatt(L, p, 1);
+ setinst(p++, IBackCommit, 2);
+ setinst(p, IFail, 0);
+ }
+ return 1;
+}
+
+
+
+static int firstpart (Instruction *p, int l) {
+ if (istest(p)) {
+ int e = p[0].i.offset - 1;
+ if ((p[e].i.code == IJmp || p[e].i.code == ICommit) &&
+ e + p[e].i.offset == l)
+ return e + 1;
+ }
+ else if (p[0].i.code == IChoice) {
+ int e = p[0].i.offset - 1;
+ if (p[e].i.code == ICommit && e + p[e].i.offset == l)
+ return e + 1;
+ }
+ return 0;
+}
+
+
+static Instruction *auxnew (lua_State *L, Instruction **op, int *size,
+ int extra) {
+ *op = newpatt(L, *size + extra);
+ jointable(L, 1);
+ *size += extra;
+ return *op + *size - extra;
+}
+
+
+static int nofail (Instruction *p, int l) {
+ int i;
+ for (i = 0; i < l; i += sizei(p + i)) {
+ if (!isnofail(p + i)) return 0;
+ }
+ return 1;
+}
+
+
+static int interfere (Instruction *p1, int l1, CharsetTag *st2) {
+ if (nofail(p1, l1)) /* p1 cannot fail? */
+ return 0; /* nothing can intefere with it */
+ if (st2->tag == NOINFO) return 1;
+ assert(p1->i.offset != 0);
+ switch (p1->i.code) {
+ case IChar: return testchar(st2->cs, p1->i.aux);
+ case ISet: return !exclusiveset(st2->cs, (p1 + 1)->buff);
+ default: assert(p1->i.code == IAny); return 1;
+ }
+}
+
+
+static Instruction *basicUnion (lua_State *L, Instruction *p1, int l1,
+ int l2, int *size, CharsetTag *st2) {
+ Instruction *op;
+ CharsetTag st1;
+ tocharset(p1, &st1);
+ if (st1.tag == ISCHARSET && st2->tag == ISCHARSET) {
+ Instruction *p = auxnew(L, &op, size, CHARSETINSTSIZE);
+ setinst(p, ISet, 0);
+ loopset(i, p[1].buff[i] = st1.cs[i] | st2->cs[i]);
+ }
+ else if (exclusive(&st1, st2) || isheadfail(p1)) {
+ Instruction *p = auxnew(L, &op, size, l1 + 1 + l2);
+ copypatt(p, p1, l1);
+ check2test(p, l1 + 1);
+ p += l1;
+ setinst(p++, IJmp, l2 + 1);
+ addpatt(L, p, 2);
+ }
+ else {
+ /* choice L1; e1; commit L2; L1: e2; L2: ... */
+ Instruction *p = auxnew(L, &op, size, 1 + l1 + 1 + l2);
+ setinst(p++, IChoice, 1 + l1 + 1);
+ copypatt(p, p1, l1); p += l1;
+ setinst(p++, ICommit, 1 + l2);
+ addpatt(L, p, 2);
+ optimizechoice(p - (1 + l1 + 1));
+ }
+ return op;
+}
+
+
+static Instruction *separateparts (lua_State *L, Instruction *p1, int l1,
+ int l2, int *size, CharsetTag *st2) {
+ int sp = firstpart(p1, l1);
+ if (sp == 0) /* first part is entire p1? */
+ return basicUnion(L, p1, l1, l2, size, st2);
+ else if ((p1 + sp - 1)->i.code == ICommit || !interfere(p1, sp, st2)) {
+ Instruction *p;
+ int init = *size;
+ int end = init + sp;
+ *size = end;
+ p = separateparts(L, p1 + sp, l1 - sp, l2, size, st2);
+ copypatt(p + init, p1, sp);
+ (p + end - 1)->i.offset = *size - (end - 1);
+ return p;
+ }
+ else { /* must change back to non-optimized choice */
+ Instruction *p;
+ int init = *size;
+ int end = init + sp + 1; /* needs one extra instruction (choice) */
+ int sizefirst = sizei(p1); /* size of p1's first instruction (the test) */
+ *size = end;
+ p = separateparts(L, p1 + sp, l1 - sp, l2, size, st2);
+ copypatt(p + init, p1, sizefirst); /* copy the test */
+ (p + init)->i.offset++; /* correct jump (because of new instruction) */
+ init += sizefirst;
+ setinstaux(p + init, IChoice, sp - sizefirst + 1, 1); init++;
+ copypatt(p + init, p1 + sizefirst, sp - sizefirst - 1);
+ init += sp - sizefirst - 1;
+ setinst(p + init, ICommit, *size - (end - 1));
+ return p;
+ }
+}
+
+
+static int union_l (lua_State *L) {
+ int l1, l2;
+ int size = 0;
+ Instruction *p1 = getpatt(L, 1, &l1);
+ Instruction *p2 = getpatt(L, 2, &l2);
+ CharsetTag st2;
+ if (isfail(p1)) /* check for simple identities */
+ lua_pushvalue(L, 2); /* fail / a == a */
+ else if (isfail(p2) || issucc(p1))
+ lua_pushvalue(L, 1); /* a / fail == a; true / a == true */
+ else {
+ tocharset(p2, &st2);
+ separateparts(L, p1, l1, l2, &size, &st2);
+ }
+ return 1;
+}
+
+
+static int repeatcharset (lua_State *L, Charset cs, int l1, int n) {
+ /* e; ...; e; span; */
+ int i;
+ Instruction *p = newpatt(L, n*l1 + CHARSETINSTSIZE);
+ for (i = 0; i < n; i++) {
+ p += addpatt(L, p, 1);
+ }
+ setinst(p, ISpan, 0);
+ loopset(k, p[1].buff[k] = cs[k]);
+ return 1;
+}
+
+
+static Instruction *repeatheadfail (lua_State *L, int l1, int n) {
+ /* e; ...; e; L2: e'(L1); jump L2; L1: ... */
+ int i;
+ Instruction *p = newpatt(L, (n + 1)*l1 + 1);
+ Instruction *op = p;
+ for (i = 0; i < n; i++) {
+ p += addpatt(L, p, 1);
+ }
+ p += addpatt(L, p, 1);
+ check2test(p - l1, l1 + 1);
+ setinst(p, IJmp, -l1);
+ return op;
+}
+
+
+static Instruction *repeats (lua_State *L, Instruction *p1, int l1, int n) {
+ /* e; ...; e; choice L1; L2: e; partialcommit L2; L1: ... */
+ int i;
+ Instruction *op = newpatt(L, (n + 1)*l1 + 2);
+ Instruction *p = op;
+ if (!verify(L, p1, p1, p1 + l1, 0, 0))
+ luaL_error(L, "loop body may accept empty string");
+ for (i = 0; i < n; i++) {
+ p += addpatt(L, p, 1);
+ }
+ setinst(p++, IChoice, 1 + l1 + 1);
+ p += addpatt(L, p, 1);
+ setinst(p, IPartialCommit, -l1);
+ return op;
+}
+
+
+static void optionalheadfail (lua_State *L, int l1, int n) {
+ Instruction *op = newpatt(L, n * l1);
+ Instruction *p = op;
+ int i;
+ for (i = 0; i < n; i++) {
+ p += addpatt(L, p, 1);
+ check2test(p - l1, (n - i)*l1);
+ }
+}
+
+
+static void optionals (lua_State *L, int l1, int n) {
+ /* choice L1; e; partialcommit L2; L2: ... e; L1: commit L3; L3: ... */
+ int i;
+ Instruction *op = newpatt(L, n*(l1 + 1) + 1);
+ Instruction *p = op;
+ setinst(p++, IChoice, 1 + n*(l1 + 1));
+ for (i = 0; i < n; i++) {
+ p += addpatt(L, p, 1);
+ setinst(p++, IPartialCommit, 1);
+ }
+ setinst(p - 1, ICommit, 1); /* correct last commit */
+ optimizechoice(op);
+}
+
+
+static int star_l (lua_State *L) {
+ int l1;
+ int n = luaL_checkint(L, 2);
+ Instruction *p1 = getpatt(L, 1, &l1);
+ if (n >= 0) {
+ CharsetTag st;
+ Instruction *op;
+ if (tocharset(p1, &st) == ISCHARSET)
+ return repeatcharset(L, st.cs, l1, n);
+ if (isheadfail(p1))
+ op = repeatheadfail(L, l1, n);
+ else
+ op = repeats(L, p1, l1, n);
+ optimizecaptures(op);
+ optimizejumps(op);
+ }
+ else {
+ if (isheadfail(p1))
+ optionalheadfail(L, l1, -n);
+ else
+ optionals(L, l1, -n);
+ }
+ return 1;
+}
+
+
+static int getlabel (lua_State *L, int labelidx) {
+ if (labelidx == 0) return 0;
+ else return value2fenv(L, labelidx);
+}
+
+
+static int capture_aux (lua_State *L, int kind, int labelidx) {
+ int l1, n;
+ Instruction *p1 = getpatt(L, 1, &l1);
+ int lc = skipchecks(p1, 0, &n);
+ if (lc == l1) { /* got whole pattern? */
+ /* may use a IFullCapture instruction at its end */
+ Instruction *p = newpatt(L, l1 + 1);
+ int label = getlabel(L, labelidx);
+ p += addpatt(L, p, 1);
+ setinstcap(p, IFullCapture, label, kind, n);
+ }
+ else { /* must use open-close pair */
+ Instruction *op = newpatt(L, 1 + l1 + 1);
+ Instruction *p = op;
+ setinstcap(p++, IOpenCapture, getlabel(L, labelidx), kind, 0);
+ p += addpatt(L, p, 1);
+ setinstcap(p, ICloseCapture, 0, Cclose, 0);
+ optimizecaptures(op);
+ }
+ return 1;
+}
+
+
+static int capture_l (lua_State *L) { return capture_aux(L, Csimple, 0); }
+static int tcapture_l (lua_State *L) { return capture_aux(L, Ctable, 0); }
+static int capsubst_l (lua_State *L) { return capture_aux(L, Csubst, 0); }
+
+static int rcapture_l (lua_State *L) {
+ switch (lua_type(L, 2)) {
+ case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2);
+ case LUA_TTABLE: return capture_aux(L, Cquery, 2);
+ case LUA_TSTRING: return capture_aux(L, Cstring, 2);
+ default: return luaL_argerror(L, 2, "invalid replacement value");
+ }
+}
+
+
+static int fold_l (lua_State *L) {
+ luaL_checktype(L, 2, LUA_TFUNCTION);
+ return capture_aux(L, Cfold, 2);
+}
+
+
+static int group_l (lua_State *L) {
+ if (lua_isnoneornil(L, 2))
+ return capture_aux(L, Cgroup, 0);
+ else {
+ luaL_checkstring(L, 2);
+ return capture_aux(L, Cgroup, 2);
+ }
+}
+
+
+static int position_l (lua_State *L) {
+ Instruction *p = newpatt(L, 1);
+ setinstcap(p, IEmptyCapture, 0, Cposition, 0);
+ return 1;
+}
+
+
+static int backref_l (lua_State *L) {
+ Instruction *p = newpatt(L, 1);
+ int n = getlabel(L, 1);
+ setinstcap(p, IEmptyCaptureIdx, n, Cbackref, 0);
+ return 1;
+}
+
+
+static int argcap_l (lua_State *L) {
+ int n = luaL_checkint(L, 1);
+ Instruction *p = newpatt(L, 1);
+ luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index");
+ setinstcap(p, IEmptyCapture, n, Carg, 0);
+ return 1;
+}
+
+
+static int matchtime_l (lua_State *L) {
+ int l1 = getpattl(L, 1);
+ Instruction *op = newpatt(L, 1 + l1 + 1);
+ Instruction *p = op;
+ luaL_checktype(L, 2, LUA_TFUNCTION);
+ setinstcap(p++, IOpenCapture, value2fenv(L, 2), Cruntime, 0);
+ p += addpatt(L, p, 1);
+ setinstcap(p, ICloseRunTime, 0, Cclose, 0);
+ optimizecaptures(op);
+ return 1;
+}
+
+
+static int capconst_l (lua_State *L) {
+ int i, j;
+ int n = lua_gettop(L);
+ Instruction *p = newpatt(L, n > 1 ? n + 2 : n);
+ lua_createtable(L, n, 0); /* new environment for the new pattern */
+ if (n > 1) setinstcap(p++, IOpenCapture, 0, Cgroup, 0);
+ for (i = j = 1; i <= n; i++) {
+ if (lua_isnil(L, i))
+ setinstcap(p++, IEmptyCaptureIdx, 0, Cconst, 0);
+ else {
+ setinstcap(p++, IEmptyCaptureIdx, j, Cconst, 0);
+ lua_pushvalue(L, i);
+ lua_rawseti(L, -2, j++);
+ }
+ }
+ if (n > 1) setinstcap(p++, ICloseCapture, 0, Cclose, 0);
+ lua_setfenv(L, -2); /* set environment */
+ return 1;
+}
+
+
+/* }====================================================== */
+
+
+/*
+** {======================================================
+** User-Defined Patterns
+** =======================================================
+*/
+
+static void l_newpf (lua_State *L, PattFunc f, const void *ud, size_t l) {
+ int n = instsize(l) + 1;
+ Instruction *p = newpatt(L, n);
+ if (n > MAXAUX) luaL_error(L, "pattern data too long");
+ p[0].i.code = IFunc;
+ p[0].i.aux = n - 2;
+ p[0].i.offset = 0;
+ p[1].f = f;
+ memcpy(p[2].buff, ud, l);
+}
+
+/* }====================================================== */
+
+
+/*
+** {======================================================
+** Captures
+** =======================================================
+*/
+
+
+typedef struct CapState {
+ Capture *cap; /* current capture */
+ Capture *ocap; /* (original) capture list */
+ lua_State *L;
+ int ptop; /* index of last argument to 'match' */
+ const char *s; /* original string */
+ int valuecached; /* value stored in cache slot */
+} CapState;
+
+
+#define captype(cap) ((cap)->kind)
+
+#define isclosecap(cap) (captype(cap) == Cclose)
+
+#define closeaddr(c) ((c)->s + (c)->siz - 1)
+
+#define isfullcap(cap) ((cap)->siz != 0)
+
+#define getfromenv(cs,v) lua_rawgeti((cs)->L, penvidx((cs)->ptop), v)
+#define pushluaval(cs) getfromenv(cs, (cs)->cap->idx)
+
+#define pushsubject(cs, c) lua_pushlstring((cs)->L, (c)->s, (c)->siz - 1)
+
+
+#define updatecache(cs,v) { if ((v) != (cs)->valuecached) updatecache_(cs,v); }
+
+
+static void updatecache_ (CapState *cs, int v) {
+ getfromenv(cs, v);
+ lua_replace(cs->L, subscache(cs));
+ cs->valuecached = v;
+}
+
+
+static int pushcapture (CapState *cs);
+
+
+static Capture *findopen (Capture *cap) {
+ int n = 0;
+ for (;;) {
+ cap--;
+ if (isclosecap(cap)) n++;
+ else if (!isfullcap(cap))
+ if (n-- == 0) return cap;
+ }
+}
+
+
+static Capture *nextcap (Capture *cap) {
+ if (isfullcap(cap)) return cap + 1;
+ else {
+ int n = 0;
+ for (;;) {
+ cap++;
+ if (isclosecap(cap)) {
+ if (n-- == 0) return cap + 1;
+ }
+ else if (!isfullcap(cap)) n++;
+ }
+ }
+}
+
+
+static int pushallvalues (CapState *cs, int addextra) {
+ Capture *co = cs->cap;
+ int n = 0;
+ if (isfullcap(cs->cap++)) {
+ pushsubject(cs, co); /* push whole match */
+ return 1;
+ }
+ while (!isclosecap(cs->cap))
+ n += pushcapture(cs);
+ if (addextra || n == 0) { /* need extra? */
+ lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */
+ n++;
+ }
+ cs->cap++; /* skip close entry */
+ return n;
+}
+
+
+static Capture *findback (CapState *cs, Capture *cap) {
+ lua_State *L = cs->L;
+ for (;;) {
+ if (cap == cs->ocap) { /* not found */
+ const char *s = lua_tostring(L, -1);
+ if (s == NULL) s = lua_pushfstring(L, "(a %s)", luaL_typename(L, -1));
+ luaL_error(L, "back reference '%s' not found", s);
+ }
+ cap--;
+ if (isclosecap(cap))
+ cap = findopen(cap);
+ else if (!isfullcap(cap))
+ continue; /* opening an enclosing capture: skip and get previous */
+ if (captype(cap) == Cgroup) {
+ getfromenv(cs, cap->idx); /* get group name */
+ if (lua_equal(L, -2, -1)) { /* right group? */
+ lua_pop(L, 2); /* remove reference name and group name */
+ return cap;
+ }
+ else lua_pop(L, 1); /* remove group name */
+ }
+ }
+}
+
+
+static int backrefcap (CapState *cs) {
+ int n;
+ Capture *curr = cs->cap;
+ pushluaval(cs); /* reference name */
+ cs->cap = findback(cs, curr);
+ n = pushallvalues(cs, 0);
+ cs->cap = curr + 1;
+ return n;
+}
+
+
+static int tablecap (CapState *cs) {
+ lua_State *L = cs->L;
+ int n = 0;
+ lua_newtable(L);
+ if (isfullcap(cs->cap++))
+ return 1; /* table is empty */
+ while (!isclosecap(cs->cap)) {
+ if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */
+ int k;
+ pushluaval(cs); /* push group name */
+ k = pushallvalues(cs, 0);
+ if (k == 0) { /* no value? */
+ lua_pop(L, 1); /* remove group name */
+ continue; /* and go on */
+ }
+ else if (k > 1)
+ lua_pop(L, k - 1); /* keep just one value */
+ lua_settable(L, -3);
+ }
+ else {
+ int i;
+ int k = pushcapture(cs);
+ for (i = k; i > 0; i--)
+ lua_rawseti(L, -(i + 1), n + i);
+ n += k;
+ }
+ }
+ cs->cap++; /* skip close entry */
+ return 1;
+}
+
+
+static int querycap (CapState *cs) {
+ int idx = cs->cap->idx;
+ int n = pushallvalues(cs, 0);
+ if (n > 1) /* extra captures? */
+ lua_pop(cs->L, n - 1); /* throw them away */
+ updatecache(cs, idx);
+ lua_gettable(cs->L, subscache(cs));
+ if (!lua_isnil(cs->L, -1))
+ return 1;
+ else {
+ lua_pop(cs->L, 1); /* remove value */
+ return 0;
+ }
+}
+
+
+static int foldcap (CapState *cs) {
+ int n;
+ lua_State *L = cs->L;
+ int idx = cs->cap->idx;
+ if (isfullcap(cs->cap++) || isclosecap(cs->cap) || (n = pushcapture(cs)) == 0)
+ return luaL_error(L, "no initial value for fold capture");
+ if (n > 1)
+ lua_pop(L, n - 1); /* leave only one result */
+ while (!isclosecap(cs->cap)) {
+ updatecache(cs, idx);
+ lua_pushvalue(L, subscache(cs)); /* get folding function */
+ lua_insert(L, -2); /* put it before accumulator */
+ n = pushcapture(cs); /* get other captures */
+ lua_call(L, n + 1, 1); /* call folding function */
+ }
+ cs->cap++; /* skip close entry */
+ return 1;
+}
+
+
+static int functioncap (CapState *cs) {
+ int n;
+ int top = lua_gettop(cs->L);
+ pushluaval(cs);
+ n = pushallvalues(cs, 0);
+ lua_call(cs->L, n, LUA_MULTRET);
+ return lua_gettop(cs->L) - top;
+}
+
+
+static int runtimecap (lua_State *L, Capture *close, Capture *ocap,
+ const char *o, const char *s, int ptop) {
+ CapState cs;
+ int n;
+ Capture *open = findopen(close);
+ assert(captype(open) == Cruntime);
+ close->kind = Cclose;
+ close->s = s;
+ cs.ocap = ocap; cs.cap = open; cs.L = L;
+ cs.s = o; cs.valuecached = 0; cs.ptop = ptop;
+ luaL_checkstack(L, 4, "too many runtime captures");
+ pushluaval(&cs);
+ lua_pushvalue(L, SUBJIDX); /* push original subject */
+ lua_pushinteger(L, s - o + 1); /* current position */
+ n = pushallvalues(&cs, 0);
+ lua_call(L, n + 2, LUA_MULTRET);
+ return close - open;
+}
+
+
+
+typedef struct StrAux {
+ int isstring;
+ union {
+ Capture *cp;
+ struct {
+ const char *s;
+ const char *e;
+ } s;
+ } u;
+} StrAux;
+
+#define MAXSTRCAPS 10
+
+static int getstrcaps (CapState *cs, StrAux *cps, int n) {
+ int k = n++;
+ cps[k].isstring = 1;
+ cps[k].u.s.s = cs->cap->s;
+ if (!isfullcap(cs->cap++)) {
+ while (!isclosecap(cs->cap)) {
+ if (n >= MAXSTRCAPS) /* too many captures? */
+ cs->cap = nextcap(cs->cap); /* skip it */
+ else if (captype(cs->cap) == Csimple)
+ n = getstrcaps(cs, cps, n);
+ else {
+ cps[n].isstring = 0;
+ cps[n].u.cp = cs->cap;
+ cs->cap = nextcap(cs->cap);
+ n++;
+ }
+ }
+ cs->cap++; /* skip close */
+ }
+ cps[k].u.s.e = closeaddr(cs->cap - 1);
+ return n;
+}
+
+
+/*
+** add next capture (which should be a string) to buffer
+*/
+static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
+
+
+static void stringcap (luaL_Buffer *b, CapState *cs) {
+ StrAux cps[MAXSTRCAPS];
+ int n;
+ size_t len, i;
+ const char *c;
+ updatecache(cs, cs->cap->idx);
+ c = lua_tolstring(cs->L, subscache(cs), &len);
+ n = getstrcaps(cs, cps, 0) - 1;
+ for (i = 0; i < len; i++) {
+ if (c[i] != '%' || c[++i] < '0' || c[i] > '9')
+ luaL_addchar(b, c[i]);
+ else {
+ int l = c[i] - '0';
+ if (l > n)
+ luaL_error(cs->L, "invalid capture index (%d)", l);
+ else if (cps[l].isstring)
+ luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s);
+ else {
+ Capture *curr = cs->cap;
+ cs->cap = cps[l].u.cp;
+ if (addonestring(b, cs, "capture") == 0)
+ luaL_error(cs->L, "no values in capture index %d", l);
+ cs->cap = curr;
+ }
+ }
+ }
+}
+
+
+static void substcap (luaL_Buffer *b, CapState *cs) {
+ const char *curr = cs->cap->s;
+ if (isfullcap(cs->cap)) /* no nested captures? */
+ luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */
+ else {
+ cs->cap++;
+ while (!isclosecap(cs->cap)) {
+ const char *next = cs->cap->s;
+ luaL_addlstring(b, curr, next - curr); /* add text up to capture */
+ if (addonestring(b, cs, "replacement") == 0) /* no capture value? */
+ curr = next; /* keep original text in final result */
+ else
+ curr = closeaddr(cs->cap - 1); /* continue after match */
+ }
+ luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */
+ }
+ cs->cap++; /* go to next capture */
+}
+
+
+static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
+ switch (captype(cs->cap)) {
+ case Cstring:
+ stringcap(b, cs); /* add capture directly to buffer */
+ return 1;
+ case Csubst:
+ substcap(b, cs); /* add capture directly to buffer */
+ return 1;
+ default: {
+ lua_State *L = cs->L;
+ int n = pushcapture(cs);
+ if (n > 0) {
+ if (n > 1) lua_pop(L, n - 1); /* only one result */
+ if (!lua_isstring(L, -1))
+ luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1));
+ luaL_addvalue(b);
+ }
+ return n;
+ }
+ }
+}
+
+
+static int pushcapture (CapState *cs) {
+ luaL_checkstack(cs->L, 4, "too many captures");
+ switch (captype(cs->cap)) {
+ case Cposition: {
+ lua_pushinteger(cs->L, cs->cap->s - cs->s + 1);
+ cs->cap++;
+ return 1;
+ }
+ case Cconst: {
+ pushluaval(cs);
+ cs->cap++;
+ return 1;
+ }
+ case Carg: {
+ int arg = (cs->cap++)->idx;
+ if (arg + FIXEDARGS > cs->ptop)
+ return luaL_error(cs->L, "reference to absent argument #%d", arg);
+ lua_pushvalue(cs->L, arg + FIXEDARGS);
+ return 1;
+ }
+ case Csimple: {
+ int k = pushallvalues(cs, 1);
+ if (k > 1)
+ lua_insert(cs->L, -k); /* whole match is first result */
+ return k;
+ }
+ case Cruntime: {
+ int n = 0;
+ while (!isclosecap(cs->cap++)) {
+ luaL_checkstack(cs->L, 4, "too many captures");
+ lua_pushvalue(cs->L, (cs->cap - 1)->idx);
+ n++;
+ }
+ return n;
+ }
+ case Cstring: {
+ luaL_Buffer b;
+ luaL_buffinit(cs->L, &b);
+ stringcap(&b, cs);
+ luaL_pushresult(&b);
+ return 1;
+ }
+ case Csubst: {
+ luaL_Buffer b;
+ luaL_buffinit(cs->L, &b);
+ substcap(&b, cs);
+ luaL_pushresult(&b);
+ return 1;
+ }
+ case Cgroup: {
+ if (cs->cap->idx == 0) /* anonymous group? */
+ return pushallvalues(cs, 0); /* add all nested values */
+ else { /* named group: add no values */
+ cs->cap = nextcap(cs->cap); /* skip capture */
+ return 0;
+ }
+ }
+ case Cbackref: return backrefcap(cs);
+ case Ctable: return tablecap(cs);
+ case Cfunction: return functioncap(cs);
+ case Cquery: return querycap(cs);
+ case Cfold: return foldcap(cs);
+ default: assert(0); return 0;
+ }
+}
+
+
+static int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
+ Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
+ int n = 0;
+ if (!isclosecap(capture)) { /* is there any capture? */
+ CapState cs;
+ cs.ocap = cs.cap = capture; cs.L = L;
+ cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
+ do { /* collect their values */
+ n += pushcapture(&cs);
+ } while (!isclosecap(cs.cap));
+ }
+ if (n == 0) { /* no capture values? */
+ lua_pushinteger(L, r - s + 1); /* return only end position */
+ n = 1;
+ }
+ return n;
+}
+
+/* }====================================================== */
+
+
+static int version_l (lua_State *L) {
+ lua_pushstring(L, VERSION);
+ return 1;
+}
+
+
+static int type_l (lua_State *L) {
+ if (testpattern(L, 1))
+ lua_pushliteral(L, "pattern");
+ else
+ lua_pushnil(L);
+ return 1;
+}
+
+
+static void createcat (lua_State *L, const char *catname, int (catf) (int)) {
+ Instruction *p = newcharset(L);
+ int i;
+ for (i = 0; i < CHAR_MAX; i++)
+ if (catf(i)) setchar(p[1].buff, i);
+ lua_setfield(L, -2, catname);
+}
+
+
+static int locale_l (lua_State *L) {
+ if (lua_isnoneornil(L, 1)) {
+ lua_settop(L, 0);
+ lua_createtable(L, 0, 12);
+ }
+ else {
+ luaL_checktype(L, 1, LUA_TTABLE);
+ lua_settop(L, 1);
+ }
+ createcat(L, "alnum", isalnum);
+ createcat(L, "alpha", isalpha);
+ createcat(L, "cntrl", iscntrl);
+ createcat(L, "digit", isdigit);
+ createcat(L, "graph", isgraph);
+ createcat(L, "lower", islower);
+ createcat(L, "print", isprint);
+ createcat(L, "punct", ispunct);
+ createcat(L, "space", isspace);
+ createcat(L, "upper", isupper);
+ createcat(L, "xdigit", isxdigit);
+ return 1;
+}
+
+
+static int setmax (lua_State *L) {
+ luaL_optinteger(L, 1, -1);
+ lua_settop(L, 1);
+ lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
+ return 0;
+}
+
+
+static int printpat_l (lua_State *L) {
+ Instruction *p = getpatt(L, 1, NULL);
+ int n, i;
+ lua_getfenv(L, 1);
+ n = ktablelen(L, -1);
+ printf("[");
+ for (i = 1; i <= n; i++) {
+ printf("%d = ", i);
+ lua_rawgeti(L, -1, i);
+ if (lua_isstring(L, -1))
+ printf("%s ", lua_tostring(L, -1));
+ else
+ printf("%s ", lua_typename(L, lua_type(L, -1)));
+ lua_pop(L, 1);
+ }
+ printf("]\n");
+ printpatt(p);
+ return 0;
+}
+
+
+static int matchl (lua_State *L) {
+ Capture capture[INITCAPSIZE];
+ const char *r;
+ size_t l;
+ Instruction *p = getpatt(L, 1, NULL);
+ const char *s = luaL_checklstring(L, SUBJIDX, &l);
+ int ptop = lua_gettop(L);
+ lua_Integer ii = luaL_optinteger(L, 3, 1);
+ size_t i = (ii > 0) ?
+ (((size_t)ii <= l) ? (size_t)ii - 1 : l) :
+ (((size_t)-ii <= l) ? l - ((size_t)-ii) : 0);
+ lua_pushnil(L); /* subscache */
+ lua_pushlightuserdata(L, capture); /* caplistidx */
+ lua_getfenv(L, 1); /* penvidx */
+ r = match(L, s, s + i, s + l, p, capture, ptop);
+ if (r == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+ return getcaptures(L, s, r, ptop);
+}
+
+
+static struct luaL_Reg pattreg[] = {
+ {"match", matchl},
+ {"print", printpat_l},
+ {"locale", locale_l},
+ {"setmaxstack", setmax},
+ {"B", pattbehind},
+ {"C", capture_l},
+ {"Cf", fold_l},
+ {"Cc", capconst_l},
+ {"Cg", group_l},
+ {"Cp", position_l},
+ {"Cb", backref_l},
+ {"Carg", argcap_l},
+ {"Cmt", matchtime_l},
+ {"Cs", capsubst_l},
+ {"Ct", tcapture_l},
+ {"P", pattern_l},
+ {"R", range_l},
+ {"S", set_l},
+ {"V", nter_l},
+ {"type", type_l},
+ {"version", version_l},
+ {NULL, NULL}
+};
+
+
+static struct luaL_Reg metapattreg[] = {
+ {"__add", union_l},
+ {"__pow", star_l},
+ {"__sub", diff_l},
+ {"__mul", concat_l},
+ {"__div", rcapture_l},
+ {"__unm", unm_l},
+ {"__len", pattand_l},
+ {NULL, NULL}
+};
+
+
+int luaopen_lpeg (lua_State *L);
+int luaopen_lpeg (lua_State *L) {
+ lua_pushcfunction(L, (lua_CFunction)&l_newpf); /* new-pattern function */
+ lua_setfield(L, LUA_REGISTRYINDEX, KEYNEWPATT); /* register it */
+ luaL_newmetatable(L, PATTERN_T);
+ lua_pushnumber(L, MAXBACK);
+ lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
+ luaL_register(L, NULL, metapattreg);
+ luaL_register(L, "lpeg", pattreg);
+ lua_pushliteral(L, "__index");
+ lua_pushvalue(L, -2);
+ lua_settable(L, -4);
+ return 1;
+}
+
diff --git a/lpeg-0.10.2/lpeg.h b/lpeg-0.10.2/lpeg.h
new file mode 100644
index 0000000..13d7ace
--- /dev/null
+++ b/lpeg-0.10.2/lpeg.h
@@ -0,0 +1,38 @@
+/*
+** $Id: lpeg.h,v 1.1 2009/12/23 16:15:36 roberto Exp $
+** LPeg - PEG pattern matching for Lua
+** Copyright 2009, Lua.org & PUC-Rio (see 'lpeg.html' for license)
+** written by Roberto Ierusalimschy
+*/
+
+#ifndef lpeg_h
+#define lpeg_h
+
+#include "lua.h"
+
+
+#define KEYNEWPATT "lpeg.newpf"
+
+
+/*
+** type of extension functions that define new "patterns" for LPEG
+** It should return the new current position or NULL if match fails
+*/
+typedef const char *(*PattFunc) (const char *s, /* current position */
+ const char *e, /* string end */
+ const char *o, /* string start */
+ const void *ud); /* user data */
+
+/*
+** function to create new patterns based on 'PattFunc' functions.
+** This function is available at *registry[KEYNEWPATT]. (Notice
+** the extra indirection; the userdata at the registry points to
+** a variable that points to the function. In ANSI C a void* cannot
+** point to a function.)
+*/
+typedef void (*Newpf) (lua_State *L,
+ PattFunc f, /* pattern */
+ const void *ud, /* (user) data to be passed to 'f' */
+ size_t l); /* size of data to be passed to 'f' */
+
+#endif
diff --git a/lpeg-0.10.2/lpeg.html b/lpeg-0.10.2/lpeg.html
new file mode 100644
index 0000000..bb92b27
--- /dev/null
+++ b/lpeg-0.10.2/lpeg.html
@@ -0,0 +1,1423 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+ <title>LPeg - Parsing Expression Grammars For Lua</title>
+ <link rel="stylesheet"
+ href="http://www.inf.puc-rio.br/~roberto/lpeg/doc.css"
+ type="text/css"/>
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+</head>
+<body>
+
+<!-- $Id: lpeg.html,v 1.64 2011/02/16 15:01:58 roberto Exp $ -->
+
+<div id="container">
+
+<div id="product">
+ <div id="product_logo">
+ <a href="http://www.inf.puc-rio.br/~roberto/lpeg/">
+ <img alt="LPeg logo" src="lpeg-128.gif"/></a>
+
+ </div>
+ <div id="product_name"><big><strong>LPeg</strong></big></div>
+ <div id="product_description">
+ Parsing Expression Grammars For Lua, version 0.10
+ </div>
+</div> <!-- id="product" -->
+
+<div id="main">
+
+<div id="navigation">
+<h1>LPeg</h1>
+
+<ul>
+ <li><strong>Home</strong>
+ <ul>
+ <li><a href="#intro">Introduction</a></li>
+ <li><a href="#func">Functions</a></li>
+ <li><a href="#basic">Basic Constructions</a></li>
+ <li><a href="#grammar">Grammars</a></li>
+ <li><a href="#captures">Captures</a></li>
+ <li><a href="#ex">Some Examples</a></li>
+ <li><a href="re.html">The <code>re</code> Module</a></li>
+ <li><a href="#download">Download</a></li>
+ <li><a href="#license">License</a></li>
+ </ul>
+ </li>
+</ul>
+</div> <!-- id="navigation" -->
+
+<div id="content">
+
+
+<h2><a name="intro">Introduction</a></h2>
+
+<p>
+<em>LPeg</em> is a new pattern-matching library for Lua,
+based on
+<a href="http://pdos.csail.mit.edu/%7Ebaford/packrat/">
+Parsing Expression Grammars</a> (PEGs).
+This text is a reference manual for the library.
+For a more formal treatment of LPeg,
+as well as some discussion about its implementation,
+see
+<a href="http://www.inf.puc-rio.br/~roberto/docs/peg.pdf">
+A Text Pattern-Matching Tool based on Parsing Expression Grammars</a>.
+(You may also be interested in my
+<a href="http://vimeo.com/1485123">talk about LPeg</a>
+given at the III Lua Workshop.)
+</p>
+
+<p>
+Following the Snobol tradition,
+LPeg defines patterns as first-class objects.
+That is, patterns are regular Lua values
+(represented by userdata).
+The library offers several functions to create
+and compose patterns.
+With the use of metamethods,
+several of these functions are provided as infix or prefix
+operators.
+On the one hand,
+the result is usually much more verbose than the typical
+encoding of patterns using the so called
+<em>regular expressions</em>
+(which typically are not regular expressions in the formal sense).
+On the other hand,
+first-class patterns allow much better documentation
+(as it is easy to comment the code,
+to break complex definitions in smaller parts, etc.)
+and are extensible,
+as we can define new functions to create and compose patterns.
+</p>
+
+<p>
+For a quick glance of the library,
+the following table summarizes its basic operations
+for creating patterns:
+</p>
+<table border="1">
+<tbody><tr><td><b>Operator</b></td><td><b>Description</b></td></tr>
+<tr><td><a href="#op-p"><code>lpeg.P(string)</code></a></td>
+ <td>Matches <code>string</code> literally</td></tr>
+<tr><td><a href="#op-p"><code>lpeg.P(n)</code></a></td>
+ <td>Matches exactly <code>n</code> characters</td></tr>
+<tr><td><a href="#op-s"><code>lpeg.S(string)</code></a></td>
+ <td>Matches any character in <code>string</code> (Set)</td></tr>
+<tr><td><a href="#op-r"><code>lpeg.R("<em>xy</em>")</code></a></td>
+ <td>Matches any character between <em>x</em> and <em>y</em> (Range)</td></tr>
+<tr><td><a href="#op-pow"><code>patt^n</code></a></td>
+ <td>Matches at least <code>n</code> repetitions of <code>patt</code></td></tr>
+<tr><td><a href="#op-pow"><code>patt^-n</code></a></td>
+ <td>Matches at most <code>n</code> repetitions of <code>patt</code></td></tr>
+<tr><td><a href="#op-mul"><code>patt1 * patt2</code></a></td>
+ <td>Matches <code>patt1</code> followed by <code>patt2</code></td></tr>
+<tr><td><a href="#op-add"><code>patt1 + patt2</code></a></td>
+ <td>Matches <code>patt1</code> or <code>patt2</code>
+ (ordered choice)</td></tr>
+<tr><td><a href="#op-sub"><code>patt1 - patt2</code></a></td>
+ <td>Matches <code>patt1</code> if <code>patt2</code> does not match</td></tr>
+<tr><td><a href="#op-unm"><code>-patt</code></a></td>
+ <td>Equivalent to <code>("" - patt)</code></td></tr>
+<tr><td><a href="#op-len"><code>#patt</code></a></td>
+ <td>Matches <code>patt</code> but consumes no input</td></tr>
+<tr><td><a href="#op-behind"><code>lpeg.B(patt, n)</code></a></td>
+ <td>Matches <code>patt</code> <code>n</code> characters behind
+ the current position, consuming no input</td></tr>
+</tbody></table>
+
+<p>As a very simple example,
+<code>lpeg.R("09")^1</code> creates a pattern that
+matches a non-empty sequence of digits.
+As a not so simple example,
+<code>-lpeg.P(1)</code>
+(which can be written as <code>lpeg.P(-1)</code>
+or simply <code>-1</code> for operations expecting a pattern)
+matches an empty string only if it cannot match a single character;
+so, it succeeds only at the subject's end.
+</p>
+
+<p>
+LPeg also offers the <a href="re.html"><code>re</code> module</a>,
+which implements patterns following a regular-expression style
+(e.g., <code>[09]+</code>).
+(This module is 250 lines of Lua code,
+and of course uses LPeg to parse regular expressions and
+translate them to regular LPeg patterns.)
+</p>
+
+
+<h2><a name="func">Functions</a></h2>
+
+
+<h3><a name="f-match"></a><code>lpeg.match (pattern, subject [, init])</code></h3>
+<p>
+The matching function.
+It attempts to match the given pattern against the subject string.
+If the match succeeds,
+returns the index in the subject of the first character after the match,
+or the <a href="#captures">captured values</a>
+(if the pattern captured any value).
+</p>
+
+<p>
+An optional numeric argument <code>init</code> makes the match
+starts at that position in the subject string.
+As usual in Lua libraries,
+a negative value counts from the end.
+</p>
+
+<p>
+Unlike typical pattern-matching functions,
+<code>match</code> works only in <em>anchored</em> mode;
+that is, it tries to match the pattern with a prefix of
+the given subject string (at position <code>init</code>),
+not with an arbitrary substring of the subject.
+So, if we want to find a pattern anywhere in a string,
+we must either write a loop in Lua or write a pattern that
+matches anywhere.
+This second approach is easy and quite efficient;
+see <a href="#ex">examples</a>.
+</p>
+
+<h3><a name="f-type"></a><code>lpeg.type (value)</code></h3>
+<p>
+If the given value is a pattern,
+returns the string <code>"pattern"</code>.
+Otherwise returns nil.
+</p>
+
+<h3><a name="f-version"></a><code>lpeg.version ()</code></h3>
+<p>
+Returns a string with the running version of LPeg.
+</p>
+
+<h3><a name="f-setstack"></a><code>lpeg.setmaxstack (max)</code></h3>
+<p>
+Sets the maximum size for the backtrack stack used by LPeg to
+track calls and choices.
+Most well-written patterns need little backtrack levels and
+therefore you seldom need to change this maximum;
+but a few useful patterns may need more space.
+Before changing this maximum you should try to rewrite your
+pattern to avoid the need for extra space.
+</p>
+
+
+<h2><a name="basic">Basic Constructions</a></h2>
+
+<p>
+The following operations build patterns.
+All operations that expect a pattern as an argument
+may receive also strings, tables, numbers, booleans, or functions,
+which are translated to patterns according to
+the rules of function <a href="#op-p"><code>lpeg.P</code></a>.
+</p>
+
+
+
+<h3><a name="op-p"></a><code>lpeg.P (value)</code></h3>
+<p>
+Converts the given value into a proper pattern,
+according to the following rules:
+</p>
+<ul>
+
+<li><p>
+If the argument is a pattern,
+it is returned unmodified.
+</p></li>
+
+<li><p>
+If the argument is a string,
+it is translated to a pattern that matches literally the string.
+</p></li>
+
+<li><p>
+If the argument is a non-negative number <em>n</em>,
+the result is a pattern that matches exactly <em>n</em> characters.
+</p></li>
+
+<li><p>
+If the argument is a negative number <em>-n</em>,
+the result is a pattern that
+succeeds only if the input string does not have <em>n</em> characters:
+<code>lpeg.P(-n)</code>
+is equivalent to <code>-lpeg.P(n)</code>
+(see the <a href="#op-unm">unary minus operation</a>).
+</p></li>
+
+<li><p>
+If the argument is a boolean,
+the result is a pattern that always succeeds or always fails
+(according to the boolean value),
+without consuming any input.
+</p></li>
+
+<li><p>
+If the argument is a table,
+it is interpreted as a grammar
+(see <a href="#grammar">Grammars</a>).
+</p></li>
+
+<li><p>
+If the argument is a function,
+returns a pattern equivalent to a
+<a href="#matchtime">match-time capture</a> over the empty string.
+</p></li>
+
+</ul>
+
+
+<h3><a name="op-behind"></a><code>lpeg.B(patt [, n])</code></h3>
+<p>
+Returns a pattern that
+matches only if the input string matches <code>patt</code>
+starting <code>n</code> positions behind the current position.
+(The default value for <code>n</code> is 1.)
+If the current position is less than or equal to <code>n</code>,
+this pattern fails.
+</p>
+
+<p>
+Like the <a href="#op-len">and predicate</a>,
+this pattern never consumes any input,
+independently of success or failure,
+and it never produces any capture.
+</p>
+
+<p>
+The pattern <code>patt</code> cannot contain any open reference
+to grammar rules (see <a href="#grammar">grammars</a>).
+</p>
+
+<p>
+(This is an experimental feature.
+There is a good chance it will change in future versions.)
+</p>
+
+
+<h3><a name="op-r"></a><code>lpeg.R ({range})</code></h3>
+<p>
+Returns a pattern that matches any single character
+belonging to one of the given <em>ranges</em>.
+Each <code>range</code> is a string <em>xy</em> of length 2,
+representing all characters with code
+between the codes of <em>x</em> and <em>y</em>
+(both inclusive).
+</p>
+
+<p>
+As an example, the pattern
+<code>lpeg.R("09")</code> matches any digit,
+and <code>lpeg.R("az", "AZ")</code> matches any ASCII letter.
+</p>
+
+
+<h3><a name="op-s"></a><code>lpeg.S (string)</code></h3>
+<p>
+Returns a pattern that matches any single character that
+appears in the given string.
+(The <code>S</code> stands for <em>Set</em>.)
+</p>
+
+<p>
+As an example, the pattern
+<code>lpeg.S("+-*/")</code> matches any arithmetic operator.
+</p>
+
+<p>
+Note that, if <code>s</code> is a character
+(that is, a string of length 1),
+then <code>lpeg.P(s)</code> is equivalent to <code>lpeg.S(s)</code>
+which is equivalent to <code>lpeg.R(s..s)</code>.
+Note also that both <code>lpeg.S("")</code> and <code>lpeg.R()</code>
+are patterns that always fail.
+</p>
+
+
+<h3><a name="op-v"></a><code>lpeg.V (v)</code></h3>
+<p>
+This operation creates a non-terminal (a <em>variable</em>)
+for a grammar.
+The created non-terminal refers to the rule indexed by <code>v</code>
+in the enclosing grammar.
+(See <a href="#grammar">Grammars</a> for details.)
+</p>
+
+
+<h3><a name="op-locale"></a><code>lpeg.locale ([table])</code></h3>
+<p>
+Returns a table with patterns for matching some character classes
+according to the current locale.
+The table has fields named
+<code>alnum</code>,
+<code>alpha</code>,
+<code>cntrl</code>,
+<code>digit</code>,
+<code>graph</code>,
+<code>lower</code>,
+<code>print</code>,
+<code>punct</code>,
+<code>space</code>,
+<code>upper</code>, and
+<code>xdigit</code>,
+each one containing a correspondent pattern.
+Each pattern matches any single character that belongs to its class.
+</p>
+
+<p>
+If called with an argument <code>table</code>,
+then it creates those fields inside the given table and
+returns that table.
+</p>
+
+
+<h3><a name="op-len"></a><code>#patt</code></h3>
+<p>
+Returns a pattern that
+matches only if the input string matches <code>patt</code>,
+but without consuming any input,
+independently of success or failure.
+(This pattern is called an <em>and predicate</em>
+and it is equivalent to
+<em>&patt</em> in the original PEG notation.)
+</p>
+
+
+<p>
+This pattern never produces any capture.
+</p>.
+
+
+<h3><a name="op-unm"></a><code>-patt</code></h3>
+<p>
+Returns a pattern that
+matches only if the input string does not match <code>patt</code>.
+It does not consume any input,
+independently of success or failure.
+(This pattern is equivalent to
+<em>!patt</em> in the original PEG notation.)
+</p>
+
+<p>
+As an example, the pattern
+<code>-lpeg.P(1)</code> matches only the end of string.
+</p>
+
+<p>
+This pattern never produces any captures,
+because either <code>patt</code> fails
+or <code>-patt</code> fails.
+(A failing pattern never produces captures.)
+</p>
+
+
+<h3><a name="op-add"></a><code>patt1 + patt2</code></h3>
+<p>
+Returns a pattern equivalent to an <em>ordered choice</em>
+of <code>patt1</code> and <code>patt2</code>.
+(This is denoted by <em>patt1 / patt2</em> in the original PEG notation,
+not to be confused with the <code>/</code> operation in LPeg.)
+It matches either <code>patt1</code> or <code>patt2</code>,
+with no backtracking once one of them succeeds.
+The identity element for this operation is the pattern
+<code>lpeg.P(false)</code>,
+which always fails.
+</p>
+
+<p>
+If both <code>patt1</code> and <code>patt2</code> are
+character sets,
+this operation is equivalent to set union.
+</p>
+<pre class="example">
+lower = lpeg.R("az")
+upper = lpeg.R("AZ")
+letter = lower + upper
+</pre>
+
+
+<h3><a name="op-sub"></a><code>patt1 - patt2</code></h3>
+<p>
+Returns a pattern equivalent to <em>!patt2 patt1</em>.
+This pattern asserts that the input does not match
+<code>patt2</code> and then matches <code>patt1</code>.
+</p>
+
+<p>
+When succeeded,
+this pattern produces all captures from <code>patt1</code>.
+It never produces any capture from <code>patt2</code>
+(as either <code>patt2</code> fails or
+<code>patt1 - patt2</code> fails).
+</p>
+
+<p>
+If both <code>patt1</code> and <code>patt2</code> are
+character sets,
+this operation is equivalent to set difference.
+Note that <code>-patt</code> is equivalent to <code>"" - patt</code>
+(or <code>0 - patt</code>).
+If <code>patt</code> is a character set,
+<code>1 - patt</code> is its complement.
+</p>
+
+
+<h3><a name="op-mul"></a><code>patt1 * patt2</code></h3>
+<p>
+Returns a pattern that matches <code>patt1</code>
+and then matches <code>patt2</code>,
+starting where <code>patt1</code> finished.
+The identity element for this operation is the
+pattern <code>lpeg.P(true)</code>,
+which always succeeds.
+</p>
+
+<p>
+(LPeg uses the <code>*</code> operator
+[instead of the more obvious <code>..</code>]
+both because it has
+the right priority and because in formal languages it is
+common to use a dot for denoting concatenation.)
+</p>
+
+
+<h3><a name="op-pow"></a><code>patt^n</code></h3>
+<p>
+If <code>n</code> is nonnegative,
+this pattern is
+equivalent to <em>patt<sup>n</sup> patt*</em>.
+It matches at least <code>n</code> occurrences of <code>patt</code>.
+</p>
+
+<p>
+Otherwise, when <code>n</code> is negative,
+this pattern is equivalent to <em>(patt?)<sup>-n</sup></em>.
+That is, it matches at most <code>|n|</code>
+occurrences of <code>patt</code>.
+</p>
+
+<p>
+In particular, <code>patt^0</code> is equivalent to <em>patt*</em>,
+<code>patt^1</code> is equivalent to <em>patt+</em>,
+and <code>patt^-1</code> is equivalent to <em>patt?</em>
+in the original PEG notation.
+</p>
+
+<p>
+In all cases,
+the resulting pattern is greedy with no backtracking
+(also called a <em>possessive</em> repetition).
+That is, it matches only the longest possible sequence
+of matches for <code>patt</code>.
+</p>
+
+
+
+<h2><a name="grammar">Grammars</a></h2>
+
+<p>
+With the use of Lua variables,
+it is possible to define patterns incrementally,
+with each new pattern using previously defined ones.
+However, this technique does not allow the definition of
+recursive patterns.
+For recursive patterns,
+we need real grammars.
+</p>
+
+<p>
+LPeg represents grammars with tables,
+where each entry is a rule.
+</p>
+
+<p>
+The call <code>lpeg.V(v)</code>
+creates a pattern that represents the nonterminal
+(or <em>variable</em>) with index <code>v</code> in a grammar.
+Because the grammar still does not exist when
+this function is evaluated,
+the result is an <em>open reference</em> to the respective rule.
+</p>
+
+<p>
+A table is <em>fixed</em> when it is converted to a pattern
+(either by calling <code>lpeg.P</code> or by using it wherein a
+pattern is expected).
+Then every open reference created by <code>lpeg.V(v)</code>
+is corrected to refer to the rule indexed by <code>v</code> in the table.
+</p>
+
+<p>
+When a table is fixed,
+the result is a pattern that matches its <em>initial rule</em>.
+The entry with index 1 in the table defines its initial rule.
+If that entry is a string,
+it is assumed to be the name of the initial rule.
+Otherwise, LPeg assumes that the entry 1 itself is the initial rule.
+</p>
+
+<p>
+As an example,
+the following grammar matches strings of a's and b's that
+have the same number of a's and b's:
+</p>
+<pre class="example">
+equalcount = lpeg.P{
+ "S"; -- initial rule name
+ S = "a" * lpeg.V"B" + "b" * lpeg.V"A" + "",
+ A = "a" * lpeg.V"S" + "b" * lpeg.V"A" * lpeg.V"A",
+ B = "b" * lpeg.V"S" + "a" * lpeg.V"B" * lpeg.V"B",
+} * -1
+</pre>
+<p>
+It is equivalent to the following grammar in standard PEG notation:
+</p>
+<pre class="example">
+ S <- 'a' B / 'b' A / ''
+ A <- 'a' S / 'b' A A
+ B <- 'b' S / 'a' B B
+</pre>
+
+
+<h2><a name="captures">Captures</a></h2>
+
+<p>
+A <em>capture</em> is a pattern that creates values
+(the so called <em>semantic information</em>) when it matches.
+LPeg offers several kinds of captures,
+which produces values based on matches and combine these values to
+produce new values.
+Each capture may produce zero or more values.
+</p>
+
+<p>
+The following table summarizes the basic captures:
+</p>
+<table border="1">
+<tbody><tr><td><b>Operation</b></td><td><b>What it Produces</b></td></tr>
+<tr><td><a href="#cap-c"><code>lpeg.C(patt)</code></a></td>
+ <td>the match for <code>patt</code> plus all captures
+ made by <code>patt</code></td></tr>
+<tr><td><a href="#cap-arg"><code>lpeg.Carg(n)</code></a></td>
+ <td>the value of the n<sup>th</sup> extra argument to
+ <code>lpeg.match</code> (matches the empty string)</td></tr>
+<tr><td><a href="#cap-b"><code>lpeg.Cb(name)</code></a></td>
+ <td>the values produced by the previous
+ group capture named <code>name</code>
+ (matches the empty string)</td></tr>
+<tr><td><a href="#cap-cc"><code>lpeg.Cc(values)</code></a></td>
+ <td>the given values (matches the empty string)</td></tr>
+<tr><td><a href="#cap-f"><code>lpeg.Cf(patt, func)</code></a></td>
+ <td>a <em>folding</em> of the captures from <code>patt</code></td></tr>
+<tr><td><a href="#cap-g"><code>lpeg.Cg(patt [, name])</code></a></td>
+ <td>the values produced by <code>patt</code>,
+ optionally tagged with <code>name</code></td></tr>
+<tr><td><a href="#cap-p"><code>lpeg.Cp()</code></a></td>
+ <td>the current position (matches the empty string)</td></tr>
+<tr><td><a href="#cap-s"><code>lpeg.Cs(patt)</code></a></td>
+ <td>the match for <code>patt</code>
+ with the values from nested captures replacing their matches</td></tr>
+<tr><td><a href="#cap-t"><code>lpeg.Ct(patt)</code></a></td>
+ <td>a table with all captures from <code>patt</code></td></tr>
+<tr><td><a href="#cap-string"><code>patt / string</code></a></td>
+ <td><code>string</code>, with some marks replaced by captures
+ of <code>patt</code></td></tr>
+<tr><td><a href="#cap-query"><code>patt / table</code></a></td>
+ <td><code>table[c]</code>, where <code>c</code> is the (first)
+ capture of <code>patt</code></td></tr>
+<tr><td><a href="#cap-func"><code>patt / function</code></a></td>
+ <td>the returns of <code>function</code> applied to the captures
+ of <code>patt</code></td></tr>
+<tr><td><a href="#matchtime"><code>lpeg.Cmt(patt, function)</code></a></td>
+ <td>the returns of <code>function</code> applied to the captures
+ of <code>patt</code>; the application is done at match time</td></tr>
+</tbody></table>
+
+<p>
+A capture pattern produces its values every time it succeeds.
+For instance,
+a capture inside a loop produces as many values as matched by the loop.
+A capture produces a value only when it succeeds.
+For instance,
+the pattern <code>lpeg.C(lpeg.P"a"^-1)</code>
+produces the empty string when there is no <code>"a"</code>
+(because the pattern <code>"a"?</code> succeeds),
+while the pattern <code>lpeg.C("a")^-1</code>
+does not produce any value when there is no <code>"a"</code>
+(because the pattern <code>"a"</code> fails).
+</p>
+
+<p>
+Usually,
+LPeg evaluates all captures only after (and if) the entire match succeeds.
+During the <em>match time</em> it only gathers enough information
+to produce the capture values later.
+As a particularly important consequence,
+most captures cannot affect the way a pattern matches a subject.
+The only exception to this rule is the
+so-called <a href="#matchtime"><em>match-time capture</em></a>.
+When a match-time capture matches,
+it forces the immediate evaluation of all its nested captures
+and then calls its corresponding function,
+which tells whether the match succeeds and also
+what values are produced.
+</p>
+
+<h3><a name="cap-c"></a><code>lpeg.C (patt)</code></h3>
+<p>
+Creates a <em>simple capture</em>,
+which captures the substring of the subject that matches <code>patt</code>.
+The captured value is a string.
+If <code>patt</code> has other captures,
+their values are returned after this one.
+</p>
+
+
+<h3><a name="cap-arg"></a><code>lpeg.Carg (n)</code></h3>
+<p>
+Creates an <em>argument capture</em>.
+This pattern matches the empty string and
+produces the value given as the n<sup>th</sup> extra
+argument given in the call to <code>lpeg.match</code>.
+</p>
+
+
+<h3><a name="cap-b"></a><code>lpeg.Cb (name)</code></h3>
+<p>
+Creates a <em>back capture</em>.
+This pattern matches the empty string and
+produces the values produced by the <em>most recent</em>
+<a href="#cap-g">group capture</a> named <code>name</code>.
+</p>
+
+<p>
+<em>Most recent</em> means the last
+<em>complete</em>
+<em>outermost</em>
+group capture with the given name.
+A <em>Complete</em> capture means that the entire pattern
+corresponding to the capture has matched.
+An <em>Outermost</em> capture means that the capture is not inside
+another complete capture.
+</p>
+
+
+<h3><a name="cap-cc"></a><code>lpeg.Cc ([value, ...])</code></h3>
+<p>
+Creates a <em>constant capture</em>.
+This pattern matches the empty string and
+produces all given values as its captured values.
+</p>
+
+
+<h3><a name="cap-f"></a><code>lpeg.Cf (patt, func)</code></h3>
+<p>
+Creates a <em>fold capture</em>.
+If <code>patt</code> produces a list of captures
+<em>C<sub>1</sub> C<sub>2</sub> ... C<sub>n</sub></em>,
+this capture will produce the value
+<em>func(...func(func(C<sub>1</sub>, C<sub>2</sub>), C<sub>3</sub>)...,
+ C<sub>n</sub>)</em>,
+that is, it will <em>fold</em>
+(or <em>accumulate</em>, or <em>reduce</em>)
+the captures from <code>patt</code> using function <code>func</code>.
+</p>
+
+<p>
+This capture assumes that <code>patt</code> should produce
+at least one capture with at least one value (of any type),
+which becomes the initial value of an <em>accumulator</em>.
+(If you need a specific initial value,
+you may prefix a <a href="#cap-cc">constant capture</a> to <code>patt</code>.)
+For each subsequent capture
+LPeg calls <code>func</code>
+with this accumulator as the first argument and all values produced
+by the capture as extra arguments;
+the value returned by this call
+becomes the new value for the accumulator.
+The final value of the accumulator becomes the captured value.
+</p>
+
+<p>
+As an example,
+the following pattern matches a list of numbers separated
+by commas and returns their addition:
+</p>
+<pre class="example">
+-- matches a numeral and captures its value
+number = lpeg.R"09"^1 / tonumber
+
+-- matches a list of numbers, captures their values
+list = number * ("," * number)^0
+
+-- auxiliary function to add two numbers
+function add (acc, newvalue) return acc + newvalue end
+
+-- folds the list of numbers adding them
+sum = lpeg.Cf(list, add)
+
+-- example of use
+print(sum:match("10,30,43")) --> 83
+</pre>
+
+
+<h3><a name="cap-g"></a><code>lpeg.Cg (patt [, name])</code></h3>
+<p>
+Creates a <em>group capture</em>.
+It groups all values returned by <code>patt</code>
+into a single capture.
+The group may be anonymous (if no name is given)
+or named with the given name.
+</p>
+
+<p>
+An anonymous group serves to join values from several captures into
+a single capture.
+A named group has a different behavior.
+In most situations, a named group returns no values at all.
+Its values are only relevant for a following
+<a href="#cap-b">back capture</a> or when used
+inside a <a href="#cap-t">table capture</a>.
+</p>
+
+
+<h3><a name="cap-p"></a><code>lpeg.Cp ()</code></h3>
+<p>
+Creates a <em>position capture</em>.
+It matches the empty string and
+captures the position in the subject where the match occurs.
+The captured value is a number.
+</p>
+
+
+<h3><a name="cap-s"></a><code>lpeg.Cs (patt)</code></h3>
+<p>
+Creates a <em>substitution capture</em>,
+which captures the substring of the subject that matches <code>patt</code>,
+with <em>substitutions</em>.
+For any capture inside <code>patt</code> with a value,
+the substring that matched the capture is replaced by the capture value
+(which should be a string).
+The final captured value is the string resulting from
+all replacements.
+</p>
+
+
+<h3><a name="cap-t"></a><code>lpeg.Ct (patt)</code></h3>
+<p>
+Creates a <em>table capture</em>.
+This capture creates a table and puts all values from all anonymous captures
+made by <code>patt</code> inside this table in successive integer keys,
+starting at 1.
+Moreover,
+for each named capture group created by <code>patt</code>,
+the first value of the group is put into the table
+with the group name as its key.
+The captured value is only the table.
+</p>
+
+
+<h3><a name="cap-string"></a><code>patt / string</code></h3>
+<p>
+Creates a <em>string capture</em>.
+It creates a capture string based on <code>string</code>.
+The captured value is a copy of <code>string</code>,
+except that the character <code>%</code> works as an escape character:
+any sequence in <code>string</code> of the form <code>%<em>n</em></code>,
+with <em>n</em> between 1 and 9,
+stands for the match of the <em>n</em>-th capture in <code>patt</code>.
+The sequence <code>%0</code> stands for the whole match.
+The sequence <code>%%</code> stands for a single <code>%</code>.
+
+
+</p><h3><a name="cap-query"></a><code>patt / table</code></h3>
+<p>
+Creates a <em>query capture</em>.
+It indexes the given table using as key the first value captured by
+<code>patt</code>,
+or the whole match if <code>patt</code> produced no value.
+The value at that index is the final value of the capture.
+If the table does not have that key,
+there is no captured value.
+</p>
+
+
+<h3><a name="cap-func"></a><code>patt / function</code></h3>
+<p>
+Creates a <em>function capture</em>.
+It calls the given function passing all captures made by
+<code>patt</code> as arguments,
+or the whole match if <code>patt</code> made no capture.
+The values returned by the function
+are the final values of the capture.
+In particular,
+if <code>function</code> returns no value,
+there is no captured value.
+</p>
+
+
+<h3><a name="matchtime"></a><code>lpeg.Cmt(patt, function)</code></h3>
+<p>
+Creates a <em>match-time capture</em>.
+Unlike all other captures,
+this one is evaluated immediately when a match occurs.
+It forces the immediate evaluation of all its nested captures
+and then calls <code>function</code>.
+</p>
+
+<p>
+The given function gets as arguments the entire subject,
+the current position (after the match of <code>patt</code>),
+plus any capture values produced by <code>patt</code>.
+</p>
+
+<p>
+The first value returned by <code>function</code>
+defines how the match happens.
+If the call returns a number,
+the match succeeds
+and the returned number becomes the new current position.
+(Assuming a subject <em>s</em> and current position <em>i</em>,
+the returned number must be in the range <em>[i, len(s) + 1]</em>.)
+If the call returns <b>true</b>,
+the match succeeds without consuming any input.
+(So, to return <b>true</b> is equivalent to return <em>i</em>.)
+If the call returns <b>false</b>, <b>nil</b>, or no value,
+the match fails.
+</p>
+
+<p>
+Any extra values returned by the function become the
+values produced by the capture.
+</p>
+
+
+
+
+<h2><a name="ex">Some Examples</a></h2>
+
+<h3>Using a Pattern</h3>
+<p>
+This example shows a very simple but complete program
+that builds and uses a pattern:
+</p>
+<pre class="example">
+local lpeg = require "lpeg"
+
+-- matches a word followed by end-of-string
+p = lpeg.R"az"^1 * -1
+
+print(p:match("hello")) --> 6
+print(lpeg.match(p, "hello")) --> 6
+print(p:match("1 hello")) --> nil
+</pre>
+<p>
+The pattern is simply a sequence of one or more lower-case letters
+followed by the end of string (-1).
+The program calls <code>match</code> both as a method
+and as a function.
+In both sucessful cases,
+the match returns
+the index of the first character after the match,
+which is the string length plus one.
+</p>
+
+
+<h3>Name-value lists</h3>
+<p>
+This example parses a list of name-value pairs and returns a table
+with those pairs:
+</p>
+<pre class="example">
+lpeg.locale(lpeg) -- adds locale entries into 'lpeg' table
+
+local space = lpeg.space^0
+local name = lpeg.C(lpeg.alpha^1) * space
+local sep = lpeg.S(",;") * space
+local pair = lpeg.Cg(name * "=" * space * name) * sep^-1
+local list = lpeg.Cf(lpeg.Ct("") * pair^0, rawset)
+t = list:match("a=b, c = hi; next = pi") --> { a = "b", c = "hi", next = "pi" }
+</pre>
+<p>
+Each pair has the format <code>name = name</code> followed by
+an optional separator (a comma or a semicolon).
+The <code>pair</code> pattern encloses the pair in a group pattern,
+so that the names become the values of a single capture.
+The <code>list</code> pattern then folds these captures.
+It starts with an empty table,
+created by a table capture matching an empty string;
+then for each capture (a pair of names) it applies <code>rawset</code>
+over the accumulator (the table) and the capture values (the pair of names).
+<code>rawset</code> returns the table itself,
+so the accumulator is always the table.
+</p>
+
+<h3>Splitting a string</h3>
+<p>
+The following code builds a pattern that
+splits a string using a given pattern
+<code>sep</code> as a separator:
+</p>
+<pre class="example">
+function split (s, sep)
+ sep = lpeg.P(sep)
+ local elem = lpeg.C((1 - sep)^0)
+ local p = elem * (sep * elem)^0
+ return lpeg.match(p, s)
+end
+</pre>
+<p>
+First the function ensures that <code>sep</code> is a proper pattern.
+The pattern <code>elem</code> is a repetition of zero of more
+arbitrary characters as long as there is not a match against
+the separator. It also captures its result.
+The pattern <code>p</code> matches a list of elements separated
+by <code>sep</code>.
+</p>
+
+<p>
+If the split results in too many values,
+it may overflow the maximum number of values
+that can be returned by a Lua function.
+In this case,
+we should collect these values in a table:
+</p>
+<pre class="example">
+function split (s, sep)
+ sep = lpeg.P(sep)
+ local elem = lpeg.C((1 - sep)^0)
+ local p = lpeg.Ct(elem * (sep * elem)^0) -- make a table capture
+ return lpeg.match(p, s)
+end
+</pre>
+
+
+<h3>Searching for a pattern</h3>
+<p>
+The primitive <code>match</code> works only in anchored mode.
+If we want to find a pattern anywhere in a string,
+we must write a pattern that matches anywhere.
+</p>
+
+<p>
+Because patterns are composable,
+we can write a function that,
+given any arbitrary pattern <code>p</code>,
+returns a new pattern that searches for <code>p</code>
+anywhere in a string.
+There are several ways to do the search.
+One way is like this:
+</p>
+<pre class="example">
+function anywhere (p)
+ return lpeg.P{ p + 1 * lpeg.V(1) }
+end
+</pre>
+<p>
+This grammar has a straight reading:
+it matches <code>p</code> or skips one character and tries again.
+</p>
+
+<p>
+If we want to know where the pattern is in the string
+(instead of knowing only that it is there somewhere),
+we can add position captures to the pattern:
+</p>
+<pre class="example">
+local I = lpeg.Cp()
+function anywhere (p)
+ return lpeg.P{ I * p * I + 1 * lpeg.V(1) }
+end
+</pre>
+
+<p>
+Another option for the search is like this:
+</p>
+<pre class="example">
+local I = lpeg.Cp()
+function anywhere (p)
+ return (1 - lpeg.P(p))^0 * I * p * I
+end
+</pre>
+<p>
+Again the pattern has a straight reading:
+it skips as many characters as possible while not matching <code>p</code>,
+and then matches <code>p</code> (plus appropriate captures).
+</p>
+
+<p>
+If we want to look for a pattern only at word boundaries,
+we can use the following transformer:
+</p>
+
+<pre class="example">
+local t = lpeg.locale()
+
+function atwordboundary (p)
+ return lpeg.P{
+ [1] = p + t.alpha^0 * (1 - t.alpha)^1 * lpeg.V(1)
+ }
+end
+</pre>
+
+
+<h3><a name="balanced"></a>Balanced parentheses</h3>
+<p>
+The following pattern matches only strings with balanced parentheses:
+</p>
+<pre class="example">
+b = lpeg.P{ "(" * ((1 - lpeg.S"()") + lpeg.V(1))^0 * ")" }
+</pre>
+<p>
+Reading the first (and only) rule of the given grammar,
+we have that a balanced string is
+an open parenthesis,
+followed by zero or more repetitions of either
+a non-parenthesis character or
+a balanced string (<code>lpeg.V(1)</code>),
+followed by a closing parenthesis.
+</p>
+
+
+<h3>Global substitution</h3>
+<p>
+The next example does a job somewhat similar to <code>string.gsub</code>.
+It receives a pattern and a replacement value,
+and substitutes the replacement value for all occurrences of the pattern
+in a given string:
+</p>
+<pre class="example">
+function gsub (s, patt, repl)
+ patt = lpeg.P(patt)
+ patt = lpeg.Cs((patt / repl + 1)^0)
+ return lpeg.match(patt, s)
+end
+</pre>
+<p>
+As in <code>string.gsub</code>,
+the replacement value can be a string,
+a function, or a table.
+</p>
+
+
+<h3><a name="CSV"></a>Comma-Separated Values (CSV)</h3>
+<p>
+This example breaks a string into comma-separated values,
+returning all fields:
+</p>
+<pre class="example">
+local field = '"' * lpeg.Cs(((lpeg.P(1) - '"') + lpeg.P'""' / '"')^0) * '"' +
+ lpeg.C((1 - lpeg.S',\n"')^0)
+
+local record = field * (',' * field)^0 * (lpeg.P'\n' + -1)
+
+function csv (s)
+ return lpeg.match(record, s)
+end
+</pre>
+<p>
+A field is either a quoted field
+(which may contain any character except an individual quote,
+which may be written as two quotes that are replaced by one)
+or an unquoted field
+(which cannot contain commas, newlines, or quotes).
+A record is a list of fields separated by commas,
+ending with a newline or the string end (-1).
+</p>
+
+<p>
+As it is,
+the previous pattern returns each field as a separated result.
+If we add a table capture in the definition of <code>record</code>,
+the pattern will return instead a single table
+containing all fields:
+</p>
+<pre>
+local record = lpeg.Ct(field * (',' * field)^0) * (lpeg.P'\n' + -1)
+</pre>
+
+
+<h3>UTF-8 and Latin 1</h3>
+<p>
+It is not difficult to use LPeg to convert a string from
+UTF-8 encoding to Latin 1 (ISO 8859-1):
+</p>
+
+<pre class="example">
+-- convert a two-byte UTF-8 sequence to a Latin 1 character
+local function f2 (s)
+ local c1, c2 = string.byte(s, 1, 2)
+ return string.char(c1 * 64 + c2 - 12416)
+end
+
+local utf8 = lpeg.R("\0\127")
+ + lpeg.R("\194\195") * lpeg.R("\128\191") / f2
+
+local decode_pattern = lpeg.Cs(utf8^0) * -1
+</pre>
+<p>
+In this code,
+the definition of UTF-8 is already restricted to the
+Latin 1 range (from 0 to 255).
+Any encoding outside this range (as well as any invalid encoding)
+will not match that pattern.
+</p>
+
+<p>
+As the definition of <code>decode_pattern</code> demands that
+the pattern matches the whole input (because of the -1 at its end),
+any invalid string will simply fail to match,
+without any useful information about the problem.
+We can improve this situation redefining <code>decode_pattern</code>
+as follows:
+</p>
+<pre class="example">
+local function er (_, i) error("invalid encoding at position " .. i) end
+
+local decode_pattern = lpeg.Cs(utf8^0) * (-1 + lpeg.P(er))
+</pre>
+<p>
+Now, if the pattern <code>utf8^0</code> stops
+before the end of the string,
+an appropriate error function is called.
+</p>
+
+
+<h3>UTF-8 and Unicode</h3>
+<p>
+We can extend the previous patterns to handle all Unicode code points.
+Of course,
+we cannot translate them to Latin 1 or any other one-byte encoding.
+Instead, our translation results in a array with the code points
+represented as numbers.
+The full code is here:
+</p>
+<pre class="example">
+-- decode a two-byte UTF-8 sequence
+local function f2 (s)
+ local c1, c2 = string.byte(s, 1, 2)
+ return c1 * 64 + c2 - 12416
+end
+
+-- decode a three-byte UTF-8 sequence
+local function f3 (s)
+ local c1, c2, c3 = string.byte(s, 1, 3)
+ return (c1 * 64 + c2) * 64 + c3 - 925824
+end
+
+-- decode a four-byte UTF-8 sequence
+local function f4 (s)
+ local c1, c2, c3, c4 = string.byte(s, 1, 4)
+ return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168
+end
+
+local cont = lpeg.R("\128\191") -- continuation byte
+
+local utf8 = lpeg.R("\0\127") / string.byte
+ + lpeg.R("\194\223") * cont / f2
+ + lpeg.R("\224\239") * cont * cont / f3
+ + lpeg.R("\240\244") * cont * cont * cont / f4
+
+local decode_pattern = lpeg.Ct(utf8^0) * -1
+</pre>
+
+
+<h3>Lua's long strings</h3>
+<p>
+A long string in Lua starts with the pattern <code>[=*[</code>
+and ends at the first occurrence of <code>]=*]</code> with
+exactly the same number of equal signs.
+If the opening brackets are followed by a newline,
+this newline is discharged
+(that is, it is not part of the string).
+</p>
+
+<p>
+To match a long string in Lua,
+the pattern must capture the first repetition of equal signs and then,
+whenever it finds a candidate for closing the string,
+check whether it has the same number of equal signs.
+</p>
+
+<pre class="example">
+equals = lpeg.P"="^0
+open = "[" * lpeg.Cg(equals, "init") * "[" * lpeg.P"\n"^-1
+close = "]" * lpeg.C(equals) * "]"
+closeeq = lpeg.Cmt(close * lpeg.Cb("init"), function (s, i, a, b) return a == b end)
+string = open * lpeg.C((lpeg.P(1) - closeeq)^0) * close /
+ function (s, o) return s end
+</pre>
+
+<p>
+The <code>open</code> pattern matches <code>[=*[</code>,
+capturing the repetitions of equal signs in a group named <code>init</code>;
+it also discharges an optional newline, if present.
+The <code>close</code> pattern matches <code>]=*]</code>,
+also capturing the repetitions of equal signs.
+The <code>closeeq</code> pattern first matches <code>close</code>;
+then it uses a back capture to recover the capture made
+by the previous <code>open</code>,
+which is named <code>init</code>;
+finally it uses a match-time capture to check
+whether both captures are equal.
+The <code>string</code> pattern starts with an <code>open</code>,
+then it goes as far as possible until matching <code>closeeq</code>,
+and then matches the final <code>close</code>.
+The final function capture simply discards
+the capture made by <code>close</code>.
+</p>
+
+
+<h3>Arithmetic expressions</h3>
+<p>
+This example is a complete parser and evaluator for simple
+arithmetic expressions.
+We write it in two styles.
+The first approach first builds a syntax tree and then
+traverses this tree to compute the expression value:
+</p>
+<pre class="example">
+-- Lexical Elements
+local Space = lpeg.S(" \n\t")^0
+local Number = lpeg.C(lpeg.P"-"^-1 * lpeg.R("09")^1) * Space
+local FactorOp = lpeg.C(lpeg.S("+-")) * Space
+local TermOp = lpeg.C(lpeg.S("*/")) * Space
+local Open = "(" * Space
+local Close = ")" * Space
+
+-- Grammar
+local Exp, Term, Factor = lpeg.V"Exp", lpeg.V"Term", lpeg.V"Factor"
+G = lpeg.P{ Exp,
+ Exp = lpeg.Ct(Factor * (FactorOp * Factor)^0);
+ Factor = lpeg.Ct(Term * (TermOp * Term)^0);
+ Term = Number + Open * Exp * Close;
+}
+
+G = Space * G * -1
+
+-- Evaluator
+function eval (x)
+ if type(x) == "string" then
+ return tonumber(x)
+ else
+ local op1 = eval(x[1])
+ for i = 2, #x, 2 do
+ local op = x[i]
+ local op2 = eval(x[i + 1])
+ if (op == "+") then op1 = op1 + op2
+ elseif (op == "-") then op1 = op1 - op2
+ elseif (op == "*") then op1 = op1 * op2
+ elseif (op == "/") then op1 = op1 / op2
+ end
+ end
+ return op1
+ end
+end
+
+-- Parser/Evaluator
+function evalExp (s)
+ local t = lpeg.match(G, s)
+ if not t then error("syntax error", 2) end
+ return eval(t)
+end
+
+-- small example
+print(evalExp"3 + 5*9 / (1+1) - 12") --> 13.5
+</pre>
+
+<p>
+The second style computes the expression value on the fly,
+without building the syntax tree.
+The following grammar takes this approach.
+(It assumes the same lexical elements as before.)
+</p>
+<pre class="example">
+-- Auxiliary function
+function eval (v1, op, v2)
+ if (op == "+") then return v1 + v2
+ elseif (op == "-") then return v1 - v2
+ elseif (op == "*") then return v1 * v2
+ elseif (op == "/") then return v1 / v2
+ end
+end
+
+-- Grammar
+local V = lpeg.V
+G = lpeg.P{ "Exp",
+ Exp = lpeg.Cf(V"Factor" * lpeg.Cg(FactorOp * V"Factor")^0, eval);
+ Factor = lpeg.Cf(V"Term" * lpeg.Cg(TermOp * V"Term")^0, eval);
+ Term = Number / tonumber + Open * V"Exp" * Close;
+}
+
+-- small example
+print(lpeg.match(G, "3 + 5*9 / (1+1) - 12")) --> 13.5
+</pre>
+<p>
+Note the use of the fold (accumulator) capture.
+To compute the value of an expression,
+the accumulator starts with the value of the first factor,
+and then applies <code>eval</code> over
+the accumulator, the operator,
+and the new factor for each repetition.
+</p>
+
+
+
+<h2><a name="download"></a>Download</h2>
+
+<p>LPeg
+<a href="http://www.inf.puc-rio.br/~roberto/lpeg/lpeg-0.10.2.tar.gz">source code</a>.</p>
+
+
+<h2><a name="license">License</a></h2>
+
+<p>
+Copyright © 2008 Lua.org, PUC-Rio.
+</p>
+<p>
+Permission is hereby granted, free of charge,
+to any person obtaining a copy of this software and
+associated documentation files (the "Software"),
+to deal in the Software without restriction,
+including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software,
+and to permit persons to whom the Software is
+furnished to do so,
+subject to the following conditions:
+</p>
+
+<p>
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions of the Software.
+</p>
+
+<p>
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+</p>
+
+</div> <!-- id="content" -->
+
+</div> <!-- id="main" -->
+
+<div id="about">
+<p><small>
+$Id: lpeg.html,v 1.64 2011/02/16 15:01:58 roberto Exp $
+</small></p>
+</div> <!-- id="about" -->
+
+</div> <!-- id="container" -->
+
+</body>
+</html>
diff --git a/lpeg-0.10.2/makefile b/lpeg-0.10.2/makefile
new file mode 100644
index 0000000..87169a2
--- /dev/null
+++ b/lpeg-0.10.2/makefile
@@ -0,0 +1,42 @@
+LIBNAME = lpeg
+LUADIR = /usr/include/lua5.1/
+
+COPT = -O2 -DNDEBUG
+
+CWARNS = -Wall -Wextra -pedantic \
+ -Waggregate-return \
+ -Wbad-function-cast \
+ -Wcast-align \
+ -Wcast-qual \
+ -Wdeclaration-after-statement \
+ -Wdisabled-optimization \
+ -Wmissing-prototypes \
+ -Wnested-externs \
+ -Wpointer-arith \
+ -Wshadow \
+ -Wsign-compare \
+ -Wstrict-prototypes \
+ -Wundef \
+ -Wwrite-strings \
+ # -Wunreachable-code \
+
+
+CFLAGS = $(CWARNS) $(COPT) -ansi -I$(LUADIR)
+CC = gcc
+
+# For Linux
+DLLFLAGS = -shared -fpic
+ENV =
+
+# For Mac OS
+# ENV = MACOSX_DEPLOYMENT_TARGET=10.4
+# DLLFLAGS = -bundle -undefined dynamic_lookup
+
+lpeg.so: lpeg.o
+ env $(ENV) $(CC) $(DLLFLAGS) lpeg.o -o lpeg.so
+
+lpeg.o: makefile lpeg.c lpeg.h
+
+test: test.lua re.lua lpeg.so
+ test.lua
+
diff --git a/lpeg-0.10.2/re.html b/lpeg-0.10.2/re.html
new file mode 100644
index 0000000..d67e23a
--- /dev/null
+++ b/lpeg-0.10.2/re.html
@@ -0,0 +1,486 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html>
+<head>
+ <title>LPeg.re - Regex syntax for LPEG</title>
+ <link rel="stylesheet"
+ href="http://www.inf.puc-rio.br/~roberto/lpeg/doc.css"
+ type="text/css"/>
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+</head>
+<body>
+
+<!-- $Id: re.html,v 1.17 2011/01/10 15:08:06 roberto Exp $ -->
+
+<div id="container">
+
+<div id="product">
+ <div id="product_logo">
+ <a href="http://www.inf.puc-rio.br/~roberto/lpeg/">
+ <img alt="LPeg logo" src="lpeg-128.gif"/>
+ </a>
+ </div>
+ <div id="product_name"><big><strong>LPeg.re</strong></big></div>
+ <div id="product_description">
+ Regex syntax for LPEG
+ </div>
+</div> <!-- id="product" -->
+
+<div id="main">
+
+<div id="navigation">
+<h1>re</h1>
+
+<ul>
+ <li><a href="#basic">Basic Constructions</a></li>
+ <li><a href="#func">Functions</a></li>
+ <li><a href="#ex">Some Examples</a></li>
+ <li><a href="#license">License</a></li>
+ </ul>
+ </li>
+</ul>
+</div> <!-- id="navigation" -->
+
+<div id="content">
+
+<h2><a name="basic"></a>The <code>re</code> Module</h2>
+
+<p>
+The <code>re</code> module
+(provided by file <code>re.lua</code> in the distribution)
+supports a somewhat conventional regex syntax
+for pattern usage within <a href="lpeg.html">LPeg</a>.
+</p>
+
+<p>
+The next table summarizes <code>re</code>'s syntax.
+A <code>p</code> represents an arbitrary pattern;
+<code>num</code> represents a number (<code>[0-9]+</code>);
+<code>name</code> represents an identifier
+(<code>[a-zA-Z][a-zA-Z0-9_]*</code>).
+Constructions are listed in order of decreasing precedence.
+<table border="1">
+<tbody><tr><td><b>Syntax</b></td><td><b>Description</b></td></tr>
+<tr><td><code>( p )</code></td> <td>grouping</td></tr>
+<tr><td><code>'string'</code></td> <td>literal string</td></tr>
+<tr><td><code>"string"</code></td> <td>literal string</td></tr>
+<tr><td><code>[class]</code></td> <td>character class</td></tr>
+<tr><td><code>.</code></td> <td>any character</td></tr>
+<tr><td><code>%name</code></td>
+ <td>pattern <code>defs[name]</code> or a pre-defined pattern</td></tr>
+<tr><td><code>name</code></td><td>non terminal</td></tr>
+<tr><td><code><name></code></td><td>non terminal</td></tr>
+<tr><td><code>{}</code></td> <td>position capture</td></tr>
+<tr><td><code>{ p }</code></td> <td>simple capture</td></tr>
+<tr><td><code>{: p :}</code></td> <td>anonymous group capture</td></tr>
+<tr><td><code>{:name: p :}</code></td> <td>named group capture</td></tr>
+<tr><td><code>{~ p ~}</code></td> <td>substitution capture</td></tr>
+<tr><td><code>=name</code></td> <td>back reference
+</td></tr>
+<tr><td><code>p ?</code></td> <td>optional match</td></tr>
+<tr><td><code>p *</code></td> <td>zero or more repetitions</td></tr>
+<tr><td><code>p +</code></td> <td>one or more repetitions</td></tr>
+<tr><td><code>p^num</code></td> <td>exactly <code>n</code> repetitions</td></tr>
+<tr><td><code>p^+num</code></td>
+ <td>at least <code>n</code> repetitions</td></tr>
+<tr><td><code>p^-num</code></td>
+ <td>at most <code>n</code> repetitions</td></tr>
+<tr><td><code>p -> 'string'</code></td> <td>string capture</td></tr>
+<tr><td><code>p -> "string"</code></td> <td>string capture</td></tr>
+<tr><td><code>p -> {}</code></td> <td>table capture</td></tr>
+<tr><td><code>p -> name</code></td> <td>function/query/string capture
+equivalent to <code>p / defs[name]</code></td></tr>
+<tr><td><code>p => name</code></td> <td>match-time capture
+equivalent to <code>lpeg.Cmt(p, defs[name])</code></td></tr>
+<tr><td><code>& p</code></td> <td>and predicate</td></tr>
+<tr><td><code>! p</code></td> <td>not predicate</td></tr>
+<tr><td><code>p1 p2</code></td> <td>concatenation</td></tr>
+<tr><td><code>p1 / p2</code></td> <td>ordered choice</td></tr>
+<tr><td>(<code>name <- p</code>)<sup>+</sup></td> <td>grammar</td></tr>
+</tbody></table>
+<p>
+Any space appearing in a syntax description can be
+replaced by zero or more space characters and Lua-style comments
+(<code>--</code> until end of line).
+</p>
+
+<p>
+Character classes define sets of characters.
+An initial <code>^</code> complements the resulting set.
+A range <em>x</em><code>-</code><em>y</em> includes in the set
+all characters with codes between the codes of <em>x</em> and <em>y</em>.
+A pre-defined class <code>%</code><em>name</em> includes all
+characters of that class.
+A simple character includes itself in the set.
+The only special characters inside a class are <code>^</code>
+(special only if it is the first character);
+<code>]</code>
+(can be included in the set as the first character,
+after the optional <code>^</code>);
+<code>%</code> (special only if followed by a letter);
+and <code>-</code>
+(can be included in the set as the first or the last character).
+</p>
+
+<p>
+Currently the pre-defined classes are similar to those from the
+Lua's string library
+(<code>%a</code> for letters,
+<code>%A</code> for non letters, etc.).
+There is also a class <code>%nl</code>
+containing only the newline character,
+which is particularly handy for grammars written inside long strings,
+as long strings do not interpret escape sequences like <code>\n</code>.
+</p>
+
+
+<h2><a name="func">Functions</a></h2>
+
+<h3><code>re.compile (string, [, defs])</code></h3>
+<p>
+Compiles the given string and
+returns an equivalent LPeg pattern.
+The given string may define either an expression or a grammar.
+The optional <code>defs</code> table provides extra Lua values
+to be used by the pattern.
+</p>
+
+<h3><code>re.find (subject, pattern [, init])</code></h3>
+<p>
+Searches the given pattern in the given subject.
+If it finds a match,
+returns the index where this occurrence starts,
+plus the captures made by the pattern (if any).
+Otherwise, returns nil.
+</p>
+
+<p>
+An optional numeric argument <code>init</code> makes the search
+starts at that position in the subject string.
+As usual in Lua libraries,
+a negative value counts from the end.
+</p>
+
+<h3><code>re.match (subject, pattern)</code></h3>
+<p>
+Matches the given pattern against the given subject.
+</p>
+
+<h3><code>re.updatelocale ()</code></h3>
+<p>
+Updates the pre-defined character classes to the current locale.
+</p>
+
+
+<h2><a name="ex">Some Examples</a></h2>
+
+<h3>A complete simple program</h3>
+<p>
+The next code shows a simple complete Lua program using
+the <code>re</code> module:
+</p>
+<pre class="example">
+local re = require"re"
+
+-- find the position of the first number in a string
+print(re.find("the number 423 is odd", "[0-9]+")) --> 12
+
+-- similar, but also captures (and returns) the number
+print(re.find("the number 423 is odd", "{[0-9]+}")) --> 12 423
+
+-- returns all words in a string
+print(re.match("the number 423 is odd", "({%a+} / .)*"))
+--> the number is odd
+</pre>
+
+
+<h3>Balanced parentheses</h3>
+<p>
+The following call will produce the same pattern produced by the
+Lua expression in the
+<a href="lpeg.html#balanced">balanced parentheses</a> example:
+</p>
+<pre class="example">
+b = re.compile[[ balanced <- "(" ([^()] / balanced)* ")" ]]
+</pre>
+
+<h3>String reversal</h3>
+<p>
+The next example reverses a string:
+</p>
+<pre class="example">
+rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
+print(rev:match"0123456789") --> 9876543210
+</pre>
+
+<h3>CSV decoder</h3>
+<p>
+The next example replicates the <a href="lpeg.html#CSV">CSV decoder</a>:
+</p>
+<pre class="example">
+record = re.compile[[
+ record <- ( field (',' field)* ) -> {} (%nl / !.)
+ field <- escaped / nonescaped
+ nonescaped <- { [^,"%nl]* }
+ escaped <- '"' {~ ([^"] / '""' -> '"')* ~} '"'
+]]
+</pre>
+
+<h3>Lua's long strings</h3>
+<p>
+The next example matches Lua long strings:
+</p>
+<pre class="example">
+c = re.compile([[
+ longstring <- ('[' {:eq: '='* :} '[' close) -> void
+ close <- ']' =eq ']' / . close
+]], {void = function () end})
+
+print(c:match'[==[]]===]]]]==]===[]') --> 17
+</pre>
+
+<h3>Abstract Syntax Trees</h3>
+<p>
+This example shows a simple way to build an
+abstract syntax tree (AST) for a given grammar.
+To keep our example simple,
+let us consider the following grammar
+for lists of names:
+</p>
+<pre class="example">
+p = re.compile[[
+ listname <- (name s)*
+ name <- [a-z][a-z]*
+ s <- %s*
+]]
+</pre>
+<p>
+Now, we will add captures to build a corresponding AST.
+As a first step, the pattern will build a table to
+represent each non terminal;
+terminals will be represented by their corresponding strings:
+</p>
+<pre class="example">
+c = re.compile[[
+ listname <- (name s)* -> {}
+ name <- {[a-z][a-z]*} -> {}
+ s <- %s*
+]]
+</pre>
+<p>
+Now, a match against <code>"hi hello bye"</code>
+results in the table
+<code>{{"hi"}, {"hello"}, {"bye"}}</code>.
+</p>
+<p>
+For such a simple grammar,
+this AST is more than enough;
+actually, the tables around each single name
+are already overkilling.
+More complex grammars,
+however, may need some more structure.
+Specifically,
+it would be useful if each table had
+a <code>tag</code> field telling what non terminal
+that table represents.
+We can add such a tag using
+<a href="lpeg.html/#cap-g">named group captures</a>:
+</p>
+<pre class="example">
+x = re.compile[[
+ listname <- ({:tag: '' -> 'list':} (name s)*) -> {}
+ name <- ({:tag: '' -> 'id':} {[a-z][a-z]*}) -> {}
+ s <- ' '*
+]]
+</pre>
+<p>
+With these group captures,
+a match against <code>"hi hello bye"</code>
+results in the following table:
+</p>
+<pre class="example">
+{tag="list",
+ {tag="id", "hi"},
+ {tag="id", "hello"},
+ {tag="id", "bye"}
+}
+</pre>
+
+
+<h3>Indented blocks</h3>
+<p>
+This example breaks indented blocks into tables,
+respecting the indentation:
+</p>
+<pre class="example">
+p = re.compile[[
+ block <- ({:ident:' '*:} line
+ ((=ident !' ' line) / &(=ident ' ') block)*) -> {}
+ line <- {[^%nl]*} %nl
+]]
+</pre>
+<p>
+As an example,
+consider the following text:
+</p>
+<pre class="example">
+t = p:match[[
+first line
+ subline 1
+ subline 2
+second line
+third line
+ subline 3.1
+ subline 3.1.1
+ subline 3.2
+]]
+</pre>
+<p>
+The resulting table <code>t</code> will be like this:
+</p>
+<pre class="example">
+ {'first line'; {'subline 1'; 'subline 2'; ident = ' '};
+ 'second line';
+ 'third line'; { 'subline 3.1'; {'subline 3.1.1'; ident = ' '};
+ 'subline 3.2'; ident = ' '};
+ ident = ''}
+</pre>
+
+<h3>Macro expander</h3>
+<p>
+This example implements a simple macro expander.
+Macros must be defined as part of the pattern,
+following some simple rules:
+</p>
+<pre class="example">
+p = re.compile[[
+ text <- {~ item* ~}
+ item <- macro / [^()] / '(' item* ')'
+ arg <- ' '* {~ (!',' item)* ~}
+ args <- '(' arg (',' arg)* ')'
+ -- now we define some macros
+ macro <- ('apply' args) -> '%1(%2)'
+ / ('add' args) -> '%1 + %2'
+ / ('mul' args) -> '%1 * %2'
+]]
+
+print(p:match"add(mul(a,b), apply(f,x))") --> a * b + f(x)
+</pre>
+<p>
+A <code>text</code> is a sequence of items,
+wherein we apply a substitution capture to expand any macros.
+An <code>item</code> is either a macro,
+any character different from parentheses,
+or a parenthesized expression.
+A macro argument (<code>arg</code>) is a sequence
+of items different from a comma.
+(Note that a comma may appear inside an item,
+e.g., inside a parenthesized expression.)
+Again we do a substitution capture to expand any macro
+in the argument before expanding the outer macro.
+<code>args</code> is a list of arguments separated by commas.
+Finally we define the macros.
+Each macro is a string substitution;
+it replaces the macro name and its arguments by its corresponding string,
+with each <code>%</code><em>n</em> replaced by the <em>n</em>-th argument.
+</p>
+
+<h3>Patterns</h3>
+<p>
+This example shows the complete syntax
+of patterns accepted by <code>re</code>.
+</p>
+<pre class="example">
+p = [=[
+
+pattern <- exp !.
+exp <- S (alternative / grammar)
+
+alternative <- seq ('/' S seq)*
+seq <- prefix*
+prefix <- '&' S prefix / '!' S prefix / suffix
+suffix <- primary S (([+*?]
+ / '^' [+-]? num
+ / '->' S (string / '{}' / name)
+ / '=>' S name) S)*
+
+primary <- '(' exp ')' / string / class / defined
+ / '{:' (name ':')? exp ':}'
+ / '=' name
+ / '{}'
+ / '{~' exp '~}'
+ / '{' exp '}'
+ / '.'
+ / name S !arrow
+ / '<' name '>' -- old-style non terminals
+
+grammar <- definition+
+definition <- name S arrow exp
+
+class <- '[' '^'? item (!']' item)* ']'
+item <- defined / range / .
+range <- . '-' [^]]
+
+S <- (%s / '--' [^%nl]*)* -- spaces and comments
+name <- [A-Za-z][A-Za-z0-9_]*
+arrow <- '<-'
+num <- [0-9]+
+string <- '"' [^"]* '"' / "'" [^']* "'"
+defined <- '%' name
+
+]=]
+
+print(re.match(p, p)) -- a self description must match itself
+</pre>
+
+
+
+<h2><a name="license">License</a></h2>
+
+<p>
+Copyright © 2008-2010 Lua.org, PUC-Rio.
+</p>
+<p>
+Permission is hereby granted, free of charge,
+to any person obtaining a copy of this software and
+associated documentation files (the "Software"),
+to deal in the Software without restriction,
+including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software,
+and to permit persons to whom the Software is
+furnished to do so,
+subject to the following conditions:
+</p>
+
+<p>
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions of the Software.
+</p>
+
+<p>
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+</p>
+
+</div> <!-- id="content" -->
+
+</div> <!-- id="main" -->
+
+<div id="about">
+<p><small>
+$Id: re.html,v 1.17 2011/01/10 15:08:06 roberto Exp $
+</small></p>
+</div> <!-- id="about" -->
+
+</div> <!-- id="container" -->
+
+</body>
+</html>
diff --git a/lpeg-0.10.2/re.lua b/lpeg-0.10.2/re.lua
new file mode 100644
index 0000000..2320c20
--- /dev/null
+++ b/lpeg-0.10.2/re.lua
@@ -0,0 +1,248 @@
+-- $Id: re.lua,v 1.39 2010/11/04 19:44:18 roberto Exp $
+
+-- imported functions and modules
+local tonumber, type, print, error = tonumber, type, print, error
+local setmetatable = setmetatable
+local m = require"lpeg"
+
+-- 'm' will be used to parse expressions, and 'mm' will be used to
+-- create expressions; that is, 're' runs on 'm', creating patterns
+-- on 'mm'
+local mm = m
+
+-- pattern's metatable
+local mt = getmetatable(mm.P(0))
+
+
+
+-- No more global accesses after this point
+local version = _VERSION
+if version == "Lua 5.2" then _ENV = nil end
+
+
+local any = m.P(1)
+
+
+-- Pre-defined names
+local Predef = { nl = m.P"\n" }
+
+
+local mem
+local fmem
+local gmem
+
+
+local function updatelocale ()
+ mm.locale(Predef)
+ Predef.a = Predef.alpha
+ Predef.c = Predef.cntrl
+ Predef.d = Predef.digit
+ Predef.g = Predef.graph
+ Predef.l = Predef.lower
+ Predef.p = Predef.punct
+ Predef.s = Predef.space
+ Predef.u = Predef.upper
+ Predef.w = Predef.alnum
+ Predef.x = Predef.xdigit
+ Predef.A = any - Predef.a
+ Predef.C = any - Predef.c
+ Predef.D = any - Predef.d
+ Predef.G = any - Predef.g
+ Predef.L = any - Predef.l
+ Predef.P = any - Predef.p
+ Predef.S = any - Predef.s
+ Predef.U = any - Predef.u
+ Predef.W = any - Predef.w
+ Predef.X = any - Predef.x
+ mem = {} -- restart memoization
+ fmem = {}
+ gmem = {}
+ local mt = {__mode = "v"}
+ setmetatable(mem, mt)
+ setmetatable(fmem, mt)
+ setmetatable(gmem, mt)
+end
+
+
+updatelocale()
+
+
+
+local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
+
+
+local function getdef (id, Defs)
+ local c = Defs and Defs[id]
+ if not c then error("undefined name: " .. id) end
+ return c
+end
+
+
+local function patt_error (s, i)
+ local msg = (#s < i + 20) and s:sub(i)
+ or s:sub(i,i+20) .. "..."
+ msg = ("pattern error near '%s'"):format(msg)
+ error(msg, 2)
+end
+
+local function mult (p, n)
+ local np = mm.P(true)
+ while n >= 1 do
+ if n%2 >= 1 then np = np * p end
+ p = p * p
+ n = n/2
+ end
+ return np
+end
+
+local function equalcap (s, i, c)
+ if type(c) ~= "string" then return nil end
+ local e = #c + i
+ if s:sub(i, e - 1) == c then return e else return nil end
+end
+
+
+local S = (m.S(" \f\n\r\t\v") + "--" * (any - Predef.nl)^0)^0
+
+local name = m.R("AZ", "az") * m.R("AZ", "az", "__", "09")^0
+
+local arrow = S * "<-"
+
+local exp_follow = m.P"/" + ")" + "}" + ":}" + "~}" + (name * arrow) + -1
+
+name = m.C(name)
+
+
+-- identifiers only have meaning in a given environment
+local Identifier = name * m.Carg(1)
+
+local num = m.C(m.R"09"^1) * S / tonumber
+
+local String = "'" * m.C((any - "'")^0) * "'" +
+ '"' * m.C((any - '"')^0) * '"'
+
+
+local defined = "%" * Identifier / function (c,Defs)
+ local cat = Defs and Defs[c] or Predef[c]
+ if not cat then error ("name '" .. c .. "' undefined") end
+ return cat
+end
+
+local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
+
+local item = defined + Range + m.C(any)
+
+local Class =
+ "["
+ * (m.C(m.P"^"^-1)) -- optional complement symbol
+ * m.Cf(item * (item - "]")^0, mt.__add) /
+ function (c, p) return c == "^" and any - p or p end
+ * "]"
+
+local function adddef (t, k, Defs, exp)
+ if t[k] then
+ error("'"..k.."' already defined as a rule")
+ else
+ t[k] = exp
+ end
+ return t
+end
+
+local function firstdef (n, Defs, r) return adddef({n}, n, Defs, r) end
+
+
+
+local exp = m.P{ "Exp",
+ Exp = S * ( m.V"Grammar"
+ + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) );
+ Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul)
+ * (#exp_follow + patt_error);
+ Prefix = "&" * S * m.V"Prefix" / mt.__len
+ + "!" * S * m.V"Prefix" / mt.__unm
+ + m.V"Suffix";
+ Suffix = m.Cf(m.V"Primary" * S *
+ ( ( m.P"+" * m.Cc(1, mt.__pow)
+ + m.P"*" * m.Cc(0, mt.__pow)
+ + m.P"?" * m.Cc(-1, mt.__pow)
+ + "^" * ( m.Cg(num * m.Cc(mult))
+ + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow))
+ )
+ + "->" * S * ( m.Cg(String * m.Cc(mt.__div))
+ + m.P"{}" * m.Cc(nil, m.Ct)
+ + m.Cg(Identifier / getdef * m.Cc(mt.__div))
+ )
+ + "=>" * S * m.Cg(Identifier / getdef * m.Cc(m.Cmt))
+ ) * S
+ )^0, function (a,b,f) return f(a,b) end );
+ Primary = "(" * m.V"Exp" * ")"
+ + String / mm.P
+ + Class
+ + defined
+ + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
+ function (n, p) return mm.Cg(p, n) end
+ + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
+ + m.P"{}" / mm.Cp
+ + "{~" * m.V"Exp" * "~}" / mm.Cs
+ + "{" * m.V"Exp" * "}" / mm.C
+ + m.P"." * m.Cc(any)
+ + name * -arrow / mm.V
+ + "<" * name * ">" / mm.V;
+ Definition = Identifier * arrow * m.V"Exp";
+ Grammar = m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0, adddef) /
+ mm.P
+}
+
+local pattern = S * exp / mm.P * (-any + patt_error)
+
+
+local function compile (p, defs)
+ if mm.type(p) == "pattern" then return p end -- already compiled
+ local cp = pattern:match(p, 1, defs)
+ if not cp then error("incorrect pattern", 3) end
+ return cp
+end
+
+local function match (s, p, i)
+ local cp = mem[p]
+ if not cp then
+ cp = compile(p)
+ mem[p] = cp
+ end
+ return cp:match(s, i or 1)
+end
+
+local function find (s, p, i)
+ local cp = fmem[p]
+ if not cp then
+ cp = compile(p)
+ cp = mm.P{ mm.Cp() * cp + 1 * mm.V(1) }
+ fmem[p] = cp
+ end
+ return cp:match(s, i or 1)
+end
+
+local function gsub (s, p, rep)
+ local g = gmem[p] or {} -- ensure gmem[p] is not collected while here
+ gmem[p] = g
+ local cp = g[rep]
+ if not cp then
+ cp = compile(p)
+ cp = mm.Cs((cp / rep + 1)^0)
+ g[rep] = cp
+ end
+ return cp:match(s)
+end
+
+
+-- exported names
+local re = {
+ compile = compile,
+ match = match,
+ find = find,
+ gsub = gsub,
+ updatelocale = updatelocale,
+}
+
+if version == "Lua 5.1" then _G.re = re end
+
+return re
diff --git a/lpeg-0.10.2/test.lua b/lpeg-0.10.2/test.lua
new file mode 100755
index 0000000..533a6d2
--- /dev/null
+++ b/lpeg-0.10.2/test.lua
@@ -0,0 +1,1188 @@
+#!/usr/bin/env lua5.1
+
+-- $Id: test.lua,v 1.82 2010/12/03 14:49:54 roberto Exp $
+
+require"strict" -- just to be pedantic
+
+local m = require"lpeg"
+
+local debug = require"debug"
+
+
+-- compatibility with Lua 5.2
+local unpack = table.unpack or unpack
+
+
+-- most tests here do not need much stack space
+m.setmaxstack(5)
+
+any = m.P(1)
+space = m.S" \t\n"^0
+
+local function checkeq (x, y, p)
+if p then print(x,y) end
+ if type(x) ~= "table" then assert(x == y)
+ else
+ for k,v in pairs(x) do checkeq(v, y[k], p) end
+ for k,v in pairs(y) do checkeq(v, x[k], p) end
+ end
+end
+
+
+mt = getmetatable(m.P(1))
+
+
+local allchar = {}
+for i=0,255 do allchar[i + 1] = i end
+allchar = string.char(unpack(allchar))
+assert(#allchar == 256)
+
+local function cs2str (c)
+ return m.match(m.Cs((c + m.P(1)/"")^0), allchar)
+end
+
+local function eqcharset (c1, c2)
+ assert(cs2str(c1) == cs2str(c2))
+end
+
+
+print"General tests for LPeg library"
+
+assert(type(m.version()) == "string")
+print("version " .. m.version())
+assert(m.type("alo") ~= "pattern")
+assert(m.type(io.input) ~= "pattern")
+assert(m.type(m.P"alo") == "pattern")
+
+-- tests for some basic optimizations
+assert(m.match(m.P(false) + "a", "a") == 2)
+assert(m.match(m.P(true) + "a", "a") == 1)
+assert(m.match("a" + m.P(false), "b") == nil)
+assert(m.match("a" + m.P(true), "b") == 1)
+
+assert(m.match(m.P(false) * "a", "a") == nil)
+assert(m.match(m.P(true) * "a", "a") == 2)
+assert(m.match("a" * m.P(false), "a") == nil)
+assert(m.match("a" * m.P(true), "a") == 2)
+
+assert(m.match(#m.P(false) * "a", "a") == nil)
+assert(m.match(#m.P(true) * "a", "a") == 2)
+assert(m.match("a" * #m.P(false), "a") == nil)
+assert(m.match("a" * #m.P(true), "a") == 2)
+
+
+-- tests for locale
+do
+ assert(m.locale(m) == m)
+ local t = {}
+ assert(m.locale(t, m) == t)
+ local x = m.locale()
+ for n,v in pairs(x) do
+ assert(type(n) == "string")
+ eqcharset(v, m[n])
+ end
+end
+
+
+
+assert(m.match(3, "aaaa"))
+assert(m.match(4, "aaaa"))
+assert(not m.match(5, "aaaa"))
+assert(m.match(-3, "aa"))
+assert(not m.match(-3, "aaa"))
+assert(not m.match(-3, "aaaa"))
+assert(not m.match(-4, "aaaa"))
+assert(m.P(-5):match"aaaa")
+
+assert(m.match("a", "alo") == 2)
+assert(m.match("al", "alo") == 3)
+assert(not m.match("alu", "alo"))
+assert(m.match(true, "") == 1)
+
+digit = m.S"0123456789"
+upper = m.S"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+lower = m.S"abcdefghijklmnopqrstuvwxyz"
+letter = m.S"" + upper + lower
+alpha = letter + digit + m.R()
+
+eqcharset(m.S"", m.P(false))
+eqcharset(upper, m.R("AZ"))
+eqcharset(lower, m.R("az"))
+eqcharset(upper + lower, m.R("AZ", "az"))
+eqcharset(upper + lower, m.R("AZ", "cz", "aa", "bb", "90"))
+eqcharset(digit, m.S"01234567" + "8" + "9")
+eqcharset(upper, letter - lower)
+eqcharset(m.S(""), m.R())
+assert(cs2str(m.S("")) == "")
+
+eqcharset(m.S"\0", "\0")
+eqcharset(m.S"\1\0\2", m.R"\0\2")
+eqcharset(m.S"\1\0\2", m.R"\1\2" + "\0")
+eqcharset(m.S"\1\0\2" - "\0", m.R"\1\2")
+
+word = alpha^1 * (1 - alpha)^0
+
+assert((word^0 * -1):match"alo alo")
+assert(m.match(word^1 * -1, "alo alo"))
+assert(m.match(word^2 * -1, "alo alo"))
+assert(not m.match(word^3 * -1, "alo alo"))
+
+assert(not m.match(word^-1 * -1, "alo alo"))
+assert(m.match(word^-2 * -1, "alo alo"))
+assert(m.match(word^-3 * -1, "alo alo"))
+
+eos = m.P(-1)
+
+assert(m.match(digit^0 * letter * digit * eos, "1298a1"))
+assert(not m.match(digit^0 * letter * eos, "1257a1"))
+
+b = {
+ [1] = "(" * (((1 - m.S"()") + #m.P"(" * m.V(1))^0) * ")"
+}
+
+assert(m.match(b, "(al())()"))
+assert(not m.match(b * eos, "(al())()"))
+assert(m.match(b * eos, "((al())()(é))"))
+assert(not m.match(b, "(al()()"))
+
+assert(not m.match(letter^1 - "for", "foreach"))
+assert(m.match(letter^1 - ("for" * eos), "foreach"))
+assert(not m.match(letter^1 - ("for" * eos), "for"))
+
+function basiclookfor (p)
+ return m.P {
+ [1] = p + (1 * m.V(1))
+ }
+end
+
+function caplookfor (p)
+ return basiclookfor(p:C())
+end
+
+assert(m.match(caplookfor(letter^1), " 4achou123...") == "achou")
+a = {m.match(caplookfor(letter^1)^0, " two words, one more ")}
+checkeq(a, {"two", "words", "one", "more"})
+
+assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7)
+
+a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")}
+checkeq(a, {"123", "d"})
+
+a = {m.match(m.C(digit^1) * "d" * -1 + m.C(letter^1 * m.Cc"l"), "123d")}
+checkeq(a, {"123"})
+
+a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")}
+checkeq(a, {"abcd", "l"})
+
+a = {m.match(m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
+checkeq(a, {10,20,30,2})
+a = {m.match(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
+checkeq(a, {1,10,20,30,2})
+a = m.match(m.Ct(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
+checkeq(a, {1,10,20,30,2})
+a = m.match(m.Ct(m.Cp() * m.Cc(7,8) * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
+checkeq(a, {1,7,8,10,20,30,2})
+a = {m.match(m.Cc() * m.Cc() * m.Cc(1) * m.Cc(2,3,4) * m.Cc() * 'a', 'aaa')}
+checkeq(a, {1,2,3,4})
+
+a = {m.match(m.Cp() * letter^1 * m.Cp(), "abcd")}
+checkeq(a, {1, 5})
+
+
+t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")}
+checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""})
+
+
+-- test for small capture boundary
+for i = 250,260 do
+ assert(#m.match(m.C(i), string.rep('a', i)) == i)
+ assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i)
+end
+
+
+-- tests for any*n
+for n = 1, 550 do
+ local x_1 = string.rep('x', n - 1)
+ local x = x_1 .. 'a'
+ assert(not m.P(n):match(x_1))
+ assert(m.P(n):match(x) == n + 1)
+ assert(n < 4 or m.match(m.P(n) + "xxx", x_1) == 4)
+ assert(m.C(n):match(x) == x)
+ assert(m.C(m.C(n)):match(x) == x)
+ assert(m.P(-n):match(x_1) == 1)
+ assert(not m.P(-n):match(x))
+ assert(n < 13 or m.match(m.Cc(20) * ((n - 13) * m.P(10)) * 3, x) == 20)
+ local n3 = math.floor(n/3)
+ assert(m.match(n3 * m.Cp() * n3 * n3, x) == n3 + 1)
+end
+
+assert(m.P(0):match("x") == 1)
+assert(m.P(0):match("") == 1)
+assert(m.C(0):match("x") == "")
+assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxu") == 1)
+assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxuxuxuxu") == 0)
+assert(m.match(m.C(m.P(2)^1), "abcde") == "abcd")
+p = m.Cc(0) * 1 + m.Cc(1) * 2 + m.Cc(2) * 3 + m.Cc(3) * 4
+
+
+-- test for alternation optimization
+assert(m.match(m.P"a"^1 + "ab" + m.P"x"^0, "ab") == 2)
+assert(m.match((m.P"a"^1 + "ab" + m.P"x"^0 * 1)^0, "ab") == 3)
+assert(m.match(m.P"ab" + "cd" + "" + "cy" + "ak", "98") == 1)
+assert(m.match(m.P"ab" + "cd" + "ax" + "cy", "ax") == 3)
+assert(m.match("a" * m.P"b"^0 * "c" + "cd" + "ax" + "cy", "ax") == 3)
+assert(m.match((m.P"ab" + "cd" + "ax" + "cy")^0, "ax") == 3)
+assert(m.match(m.P(1) * "x" + m.S"" * "xu" + "ay", "ay") == 3)
+assert(m.match(m.P"abc" + "cde" + "aka", "aka") == 4)
+assert(m.match(m.S"abc" * "x" + "cde" + "aka", "ax") == 3)
+assert(m.match(m.S"abc" * "x" + "cde" + "aka", "aka") == 4)
+assert(m.match(m.S"abc" * "x" + "cde" + "aka", "cde") == 4)
+assert(m.match(m.S"abc" * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
+assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "ax") == 3)
+assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "aka") == 4)
+assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "cde") == 4)
+assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
+assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "ax") == 3)
+assert(m.match(m.P(1) * "x" + "cde" + m.S"ab" * "ka", "aka") == 4)
+assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "aka") == 4)
+assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "cde") == 4)
+assert(m.match(m.P"eb" + "cd" + m.P"e"^0 + "x", "ee") == 3)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "abcd") == 3)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "eeex") == 4)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "cd") == 3)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "x") == 1)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x" + "", "zee") == 1)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "abcd") == 3)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "eeex") == 4)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "cd") == 3)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "x") == 2)
+assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x" + "", "zee") == 1)
+
+pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510"
+assert(m.match(m.Cs((m.P"1" / "a" + m.P"5" / "b" + m.P"9" / "c" + 1)^0), pi) ==
+ m.match(m.Cs((m.P(1) / {["1"] = "a", ["5"] = "b", ["9"] = "c"})^0), pi))
+print"+"
+
+
+-- tests for capture optimizations
+assert(m.match((m.P(3) + 4 * m.Cp()) * "a", "abca") == 5)
+t = {m.match(((m.P"a" + m.Cp()) * m.P"x")^0, "axxaxx")}
+checkeq(t, {3, 6})
+
+-- test for table captures
+t = m.match(m.Ct(letter^1), "alo")
+checkeq(t, {})
+
+t, n = m.match(m.Ct(m.C(letter)^1) * m.Cc"t", "alo")
+assert(n == "t" and table.concat(t) == "alo")
+
+t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
+assert(table.concat(t, ";") == "alo;a;l;o")
+
+t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
+assert(table.concat(t, ";") == "alo;a;l;o")
+
+t = m.match(m.Ct(m.Ct((m.Cp() * letter * m.Cp())^1)), "alo")
+assert(table.concat(t[1], ";") == "1;2;2;3;3;4")
+
+t = m.match(m.Ct(m.C(m.C(1) * 1 * m.C(1))), "alo")
+checkeq(t, {"alo", "a", "o"})
+
+
+-- tests for groups
+p = m.Cg(1) -- no capture
+assert(p:match('x') == 'x')
+p = m.Cg(m.P(true)/function () end * 1) -- no value
+assert(p:match('x') == 'x')
+p = m.Cg(m.Cg(m.Cg(m.C(1))))
+assert(p:match('x') == 'x')
+p = m.Cg(m.Cg(m.Cg(m.C(1))^0) * m.Cg(m.Cc(1) * m.Cc(2)))
+t = {p:match'abc'}
+checkeq(t, {'a', 'b', 'c', 1, 2})
+
+-- test for non-pattern as arguments to pattern functions
+
+p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 }
+assert(m.match(p, "aaabaac") == 7)
+
+-- a large table capture
+t = m.match(m.Ct(m.C('a')^0), string.rep("a", 10000))
+assert(#t == 10000 and t[1] == 'a' and t[#t] == 'a')
+
+
+-- test for errors
+local function checkerr (msg, ...)
+ assert(m.match({ m.P(msg) + 1 * m.V(1) }, select(2, pcall(...))))
+end
+
+-- checkerr("rule '1' is left recursive", m.match, { m.V(1) * 'a' }, "a")
+checkerr("rule '1' outside a grammar", m.match, m.V(1), "")
+checkerr("rule 'hiii' outside a grammar", m.match, m.V('hiii'), "")
+checkerr("rule 'hiii' is not defined", m.match, { m.V('hiii') }, "")
+checkerr("rule <a table> is not defined", m.match, { m.V{} }, "")
+
+checkerr("rule 'A' is not a pattern", m.P, { A = {} })
+checkerr("rule <a function> is not a pattern", m.P, { [print] = {} })
+
+-- test for non-pattern as arguments to pattern functions
+
+p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 }
+assert(m.match(p, "aaabaac") == 7)
+
+-- a large table capture
+t = m.match(m.Ct(m.C('a')^0), string.rep("a", 10000))
+assert(#t == 10000 and t[1] == 'a' and t[#t] == 'a')
+
+
+-- test for errors
+local function checkerr (msg, ...)
+ assert(m.match({ m.P(msg) + 1 * m.V(1) }, select(2, pcall(...))))
+end
+
+checkerr("rule '1' is left recursive", m.match, { m.V(1) * 'a' }, "a")
+checkerr("rule '1' outside a grammar", m.match, m.V(1), "")
+checkerr("rule 'hiii' outside a grammar", m.match, m.V('hiii'), "")
+checkerr("rule 'hiii' is not defined", m.match, { m.V('hiii') }, "")
+checkerr("rule <a table> is not defined", m.match, { m.V({}) }, "")
+
+print('+')
+
+
+-- bug in 0.10 (rechecking a grammar, after tail-call optimization)
+m.P{ m.P { (m.P(3) + "xuxu")^0 * m.V"xuxu", xuxu = m.P(1) } }
+
+local V = m.V
+
+local Space = m.S(" \n\t")^0
+local Number = m.C(m.R("09")^1) * Space
+local FactorOp = m.C(m.S("+-")) * Space
+local TermOp = m.C(m.S("*/")) * Space
+local Open = "(" * Space
+local Close = ")" * Space
+
+
+local function f_factor (v1, op, v2, d)
+ assert(d == nil)
+ if op == "+" then return v1 + v2
+ else return v1 - v2
+ end
+end
+
+
+local function f_term (v1, op, v2, d)
+ assert(d == nil)
+ if op == "*" then return v1 * v2
+ else return v1 / v2
+ end
+end
+
+G = m.P{ "Exp",
+ Exp = m.Cf(V"Factor" * m.Cg(FactorOp * V"Factor")^0, f_factor);
+ Factor = m.Cf(V"Term" * m.Cg(TermOp * V"Term")^0, f_term);
+ Term = Number / tonumber + Open * V"Exp" * Close;
+}
+
+G = Space * G * -1
+
+for _, s in ipairs{" 3 + 5*9 / (1+1) ", "3+4/2", "3+3-3- 9*2+3*9/1- 8"} do
+ assert(m.match(G, s) == loadstring("return "..s)())
+end
+
+
+-- test for grammars (errors deep in calling non-terminals)
+g = m.P{
+ [1] = m.V(2) + "a",
+ [2] = "a" * m.V(3) * "x",
+ [3] = "b" * m.V(3) + "c"
+}
+
+assert(m.match(g, "abbbcx") == 7)
+assert(m.match(g, "abbbbx") == 2)
+
+
+-- tests for \0
+assert(m.match(m.R("\0\1")^1, "\0\1\0") == 4)
+assert(m.match(m.S("\0\1ab")^1, "\0\1\0a") == 5)
+assert(m.match(m.P(1)^3, "\0\1\0a") == 5)
+assert(not m.match(-4, "\0\1\0a"))
+assert(m.match("\0\1\0a", "\0\1\0a") == 5)
+assert(m.match("\0\0\0", "\0\0\0") == 4)
+assert(not m.match("\0\0\0", "\0\0"))
+
+
+-- tests for predicates
+assert(not m.match(-m.P("a") * 2, "alo"))
+assert(m.match(- -m.P("a") * 2, "alo") == 3)
+assert(m.match(#m.P("a") * 2, "alo") == 3)
+assert(m.match(##m.P("a") * 2, "alo") == 3)
+assert(not m.match(##m.P("c") * 2, "alo"))
+assert(m.match(m.Cs((##m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
+assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
+assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
+assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
+
+
+-- look-behind predicate
+assert(not m.match(m.B'a', 'a'))
+assert(m.match(1 * m.B'a', 'a') == 2)
+assert(not m.match(m.B(1), 'a'))
+assert(m.match(1 * m.B(1), 'a') == 2)
+assert(m.match(-m.B(1), 'a') == 1)
+
+B = #letter * -m.B(letter) + -letter * m.B(letter)
+x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) })
+checkeq(m.match(x, 'ar cal c'), {1,3,4,7,9,10})
+checkeq(m.match(x, ' ar cal '), {2,4,5,8})
+checkeq(m.match(x, ' '), {})
+checkeq(m.match(x, 'aloalo'), {1,7})
+
+assert(m.match(B, "a") == 1)
+assert(m.match(1 * B, "a") == 2)
+assert(not m.B(-letter):match(""))
+assert((-m.B(letter)):match("") == 1)
+
+assert((4 * m.B(letter, 4)):match("aaaaaaaa") == 5)
+assert(not (4 * m.B(letter, 5)):match("aaaaaaaa"))
+assert((4 * -m.B(letter, 5)):match("aaaaaaaa") == 5)
+
+assert((3 * m.B(m.C(1))):match("12345") == 4)
+
+
+-- bug in 0.9
+assert(m.match(('a' * #m.P'b'), "ab") == 2)
+assert(not m.match(('a' * #m.P'b'), "a"))
+
+assert(not m.match(#m.S'567', ""))
+assert(m.match(#m.S'567' * 1, "6") == 2)
+
+
+-- tests for Tail Calls
+
+-- create a grammar for a simple DFA for even number of 0s and 1s
+-- finished in '$':
+--
+-- ->1 <---0---> 2
+-- ^ ^
+-- | |
+-- 1 1
+-- | |
+-- V V
+-- 3 <---0---> 4
+--
+-- this grammar should keep no backtracking information
+
+p = m.P{
+ [1] = '0' * m.V(2) + '1' * m.V(3) + '$',
+ [2] = '0' * m.V(1) + '1' * m.V(4),
+ [3] = '0' * m.V(4) + '1' * m.V(1),
+ [4] = '0' * m.V(3) + '1' * m.V(2),
+}
+
+assert(p:match(string.rep("00", 10000) .. "$"))
+assert(p:match(string.rep("01", 10000) .. "$"))
+assert(p:match(string.rep("011", 10000) .. "$"))
+assert(not p:match(string.rep("011", 10001) .. "$"))
+
+
+-- this grammar does need backtracking info.
+local lim = 10000
+p = m.P{ '0' * m.V(1) + '0' }
+assert(not pcall(m.match, p, string.rep("0", lim)))
+m.setmaxstack(2*lim)
+assert(not pcall(m.match, p, string.rep("0", lim)))
+m.setmaxstack(2*lim + 2)
+assert(pcall(m.match, p, string.rep("0", lim)))
+
+-- tests for optional start position
+assert(m.match("a", "abc", 1))
+assert(m.match("b", "abc", 2))
+assert(m.match("c", "abc", 3))
+assert(not m.match(1, "abc", 4))
+assert(m.match("a", "abc", -3))
+assert(m.match("b", "abc", -2))
+assert(m.match("c", "abc", -1))
+assert(m.match("abc", "abc", -4)) -- truncate to position 1
+
+assert(m.match("", "abc", 10)) -- empty string is everywhere!
+assert(m.match("", "", 10))
+assert(not m.match(1, "", 1))
+assert(not m.match(1, "", -1))
+assert(not m.match(1, "", 0))
+
+print("+")
+
+
+-- tests for argument captures
+assert(not pcall(m.Carg, 0))
+assert(not pcall(m.Carg, -1))
+assert(not pcall(m.Carg, 2^18))
+assert(not pcall(m.match, m.Carg(1), 'a', 1))
+assert(m.match(m.Carg(1), 'a', 1, print) == print)
+x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)}
+checkeq(x, {10, 20})
+
+assert(m.match(m.Cmt(m.Cg(m.Carg(3), "a") *
+ m.Cmt(m.Cb("a"), function (s,i,x)
+ assert(s == "a" and i == 1);
+ return i, x+1
+ end) *
+ m.Carg(2), function (s,i,a,b,c)
+ assert(s == "a" and i == 1 and c == nil);
+ return i, 2*a + 3*b
+ end) * "a",
+ "a", 1, false, 100, 1000) == 2*1001 + 3*100)
+
+
+-- tests for Lua functions
+
+t = {}
+s = ""
+p = function (s1, i) assert(s == s1); t[#t + 1] = i; return nil end
+s = "hi, this is a test"
+assert(m.match(((p - m.P(-1)) + 2)^0, s) == string.len(s) + 1)
+assert(#t == string.len(s)/2 and t[1] == 1 and t[2] == 3)
+
+assert(not m.match(p, s))
+
+p = mt.__add(function (s, i) return i end, function (s, i) return nil end)
+assert(m.match(p, "alo"))
+
+p = mt.__mul(function (s, i) return i end, function (s, i) return nil end)
+assert(not m.match(p, "alo"))
+
+
+t = {}
+p = function (s1, i) assert(s == s1); t[#t + 1] = i; return i end
+s = "hi, this is a test"
+assert(m.match((m.P(1) * p)^0, s) == string.len(s) + 1)
+assert(#t == string.len(s) and t[1] == 2 and t[2] == 3)
+
+t = {}
+p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i;
+ return i <= s1:len() and i + 1 end)
+s = "hi, this is a test"
+assert(m.match(p^0, s) == string.len(s) + 1)
+assert(#t == string.len(s) + 1 and t[1] == 1 and t[2] == 2)
+
+p = function (s1, i) return m.match(m.P"a"^1, s1, i) end
+assert(m.match(p, "aaaa") == 5)
+assert(m.match(p, "abaa") == 2)
+assert(not m.match(p, "baaa"))
+
+assert(not pcall(m.match, function () return 2^20 end, s))
+assert(not pcall(m.match, function () return 0 end, s))
+assert(not pcall(m.match, function (s, i) return i - 1 end, s))
+assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i - 1 end, s))
+assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s))
+assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i + 1 end, s))
+assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s))
+assert(not pcall(m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s))
+assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s))
+assert(m.match(m.P(1)^0 * function (_, i) return true end, s) ==
+ string.len(s) + 1)
+for i = 1, string.len(s) + 1 do
+ assert(m.match(function (_, _) return i end, s) == i)
+end
+
+p = (m.P(function (s, i) return i%2 == 0 and i + 1 end)
+ + m.P(function (s, i) return i%2 ~= 0 and i + 2 <= s:len() and i + 3 end))^0
+ * -1
+assert(p:match(string.rep('a', 14000)))
+
+-- tests for Function Replacements
+f = function (a, ...) if a ~= "x" then return {a, ...} end end
+
+t = m.match(m.C(1)^0/f, "abc")
+checkeq(t, {"a", "b", "c"})
+
+t = m.match(m.C(1)^0/f/f, "abc")
+checkeq(t, {{"a", "b", "c"}})
+
+t = m.match(m.P(1)^0/f/f, "abc") -- no capture
+checkeq(t, {{"abc"}})
+
+t = m.match((m.P(1)^0/f * m.Cp())/f, "abc")
+checkeq(t, {{"abc"}, 4})
+
+t = m.match((m.C(1)^0/f * m.Cp())/f, "abc")
+checkeq(t, {{"a", "b", "c"}, 4})
+
+t = m.match((m.C(1)^0/f * m.Cp())/f, "xbc")
+checkeq(t, {4})
+
+t = m.match(m.C(m.C(1)^0)/f, "abc")
+checkeq(t, {"abc", "a", "b", "c"})
+
+g = function (...) return 1, ... end
+t = {m.match(m.C(1)^0/g/g, "abc")}
+checkeq(t, {1, 1, "a", "b", "c"})
+
+t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")}
+t1 = {1,1,nil,nil,4,nil,3,nil,nil}
+for i=1,10 do assert(t[i] == t1[i]) end
+
+t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")}
+checkeq(t, {"a", "ax", "b", "bx", "c", "cx"})
+
+t = m.match(m.Ct((m.C(1) / function (x,y) return y, x end * m.Cc(1))^0), "abc")
+checkeq(t, {nil, "a", 1, nil, "b", 1, nil, "c", 1})
+
+-- tests for Query Replacements
+
+assert(m.match(m.C(m.C(1)^0)/{abc = 10}, "abc") == 10)
+assert(m.match(m.C(1)^0/{a = 10}, "abc") == 10)
+assert(m.match(m.S("ba")^0/{ab = 40}, "abc") == 40)
+t = m.match(m.Ct((m.S("ba")/{a = 40})^0), "abc")
+checkeq(t, {40})
+
+assert(m.match(m.Cs((m.C(1)/{a=".", d=".."})^0), "abcdde") == ".bc....e")
+assert(m.match(m.Cs((m.C(1)/{f="."})^0), "abcdde") == "abcdde")
+assert(m.match(m.Cs((m.C(1)/{d="."})^0), "abcdde") == "abc..e")
+assert(m.match(m.Cs((m.C(1)/{e="."})^0), "abcdde") == "abcdd.")
+assert(m.match(m.Cs((m.C(1)/{e=".", f="+"})^0), "eefef") == "..+.+")
+assert(m.match(m.Cs((m.C(1))^0), "abcdde") == "abcdde")
+assert(m.match(m.Cs(m.C(m.C(1)^0)), "abcdde") == "abcdde")
+assert(m.match(1 * m.Cs(m.P(1)^0), "abcdde") == "bcdde")
+assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "abcdde") == "abcdde")
+assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "0ab0b0") == "xabxbx")
+assert(m.match(m.Cs((m.C('0')/'x' + m.P(1)/{b=3})^0), "b0a0b") == "3xax3")
+assert(m.match(m.P(1)/'%0%0'/{aa = -3} * 'x', 'ax') == -3)
+assert(m.match(m.C(1)/'%0%1'/{aa = 'z'}/{z = -3} * 'x', 'ax') == -3)
+
+assert(m.match(m.Cs(m.Cc(0) * (m.P(1)/"")), "4321") == "0")
+
+assert(m.match(m.Cs((m.P(1) / "%0")^0), "abcd") == "abcd")
+assert(m.match(m.Cs((m.P(1) / "%0.%0")^0), "abcd") == "a.ab.bc.cd.d")
+assert(m.match(m.Cs((m.P("a") / "%0.%0" + 1)^0), "abcad") == "a.abca.ad")
+assert(m.match(m.C("a") / "%1%%%0", "a") == "a%a")
+assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx")
+assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") ==
+ "411 - abc ")
+
+assert(pcall(m.match, m.P(1)/"%0", "abc"))
+assert(not pcall(m.match, m.P(1)/"%1", "abc")) -- out of range
+assert(not pcall(m.match, m.P(1)/"%9", "abc")) -- out of range
+
+p = m.C(1)
+p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1"
+assert(p:match("1234567890") == "9 - 1")
+
+assert(m.match(m.Cc(print), "") == print)
+
+-- too many captures (just ignore extra ones)
+p = m.C(1)^0 / "%2-%9-%0-%9"
+assert(p:match"01234567890123456789" == "1-8-01234567890123456789-8")
+s = string.rep("12345678901234567890", 20)
+assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3")
+
+-- string captures with non-string subcaptures
+p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1"
+assert(p:match'x' == 'alo - x - alo')
+
+assert(not pcall(m.match, m.Cc(true) / "%1", "a"))
+
+-- long strings for string capture
+l = 10000
+s = string.rep('a', l) .. string.rep('b', l) .. string.rep('c', l)
+
+p = (m.C(m.P'a'^1) * m.C(m.P'b'^1) * m.C(m.P'c'^1)) / '%3%2%1'
+
+assert(p:match(s) == string.rep('c', l) ..
+ string.rep('b', l) ..
+ string.rep('a', l))
+
+print"+"
+
+-- accumulator capture
+function f (x) return x + 1 end
+assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7)
+
+t = {m.match(m.Cf(m.Cc(1,2,3), error), "")}
+checkeq(t, {1})
+p = m.Cf(m.Ct(true) * m.Cg(m.C(m.R"az"^1) * "=" * m.C(m.R"az"^1) * ";")^0,
+ rawset)
+t = p:match("a=b;c=du;xux=yuy;")
+checkeq(t, {a="b", c="du", xux="yuy"})
+
+
+-- tests for loop checker
+
+local function haveloop (p)
+ assert(not pcall(function (p) return p^0 end, m.P(p)))
+end
+
+haveloop(m.P("x")^-4)
+assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3)
+assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3)
+haveloop("")
+haveloop(m.P("x")^0)
+haveloop(m.P("x")^-1)
+haveloop(m.P("x") + 1 + 2 + m.P("a")^-1)
+haveloop(-m.P("ab"))
+haveloop(- -m.P("ab"))
+haveloop(# #(m.P("ab") + "xy"))
+haveloop(- #m.P("ab")^0)
+haveloop(# -m.P("ab")^1)
+haveloop(#m.V(3))
+haveloop(m.V(3) + m.V(1) + m.P('a')^-1)
+haveloop({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)})
+assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc")
+ == 3)
+assert(m.match(m.P""^-3, "a") == 1)
+
+local function find (p, s)
+ return m.match(basiclookfor(p), s)
+end
+
+
+local function badgrammar (g, exp)
+ local err, msg = pcall(m.P, g)
+ assert(not err)
+ if exp then assert(find(exp, msg)) end
+end
+
+badgrammar({[1] = m.V(1)}, "rule '1'")
+badgrammar({[1] = m.V(2)}, "rule '2'") -- invalid non-terminal
+badgrammar({[1] = m.V"x"}, "rule 'x'") -- invalid non-terminal
+badgrammar({[1] = m.V{}}, "rule <a table>") -- invalid non-terminal
+badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'")
+badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'")
+badgrammar({[1] = -1 * m.V(1)}, "rule '1'")
+badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'")
+badgrammar({[1] = m.P(0), [2] = 1 * m.V(1)^0}, "loop in rule '2'")
+badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'")
+badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'")
+badgrammar({ #(m.V(1) * 'a') }, "rule '1'")
+badgrammar({ -(m.V(1) * 'a') }, "rule '1'")
+
+assert(m.match({'a' * -m.V(1)}, "aaa") == 2)
+assert(m.match({'a' * -m.V(1)}, "aaaa") == nil)
+
+
+-- simple tests for maximum sizes:
+local p = m.P"a"
+for i=1,14 do p = p * p end
+
+p = {}
+for i=1,100 do p[i] = m.P"a" end
+p = m.P(p)
+
+
+-- strange values for rule labels
+
+p = m.P{ "print",
+ print = m.V(print),
+ [print] = m.V(_G),
+ [_G] = m.P"a",
+ }
+
+assert(p:match("a"))
+
+-- initial rule
+g = {}
+for i = 1, 10 do g["i"..i] = "a" * m.V("i"..i+1) end
+g.i11 = m.P""
+for i = 1, 10 do
+ g[1] = "i"..i
+ local p = m.P(g)
+ assert(p:match("aaaaaaaaaaa") == 11 - i + 1)
+end
+
+print"+"
+
+
+-- tests for back references
+assert(not pcall(m.match, m.Cb('x'), ''))
+assert(not pcall(m.match, m.Cg(1, 'a') * m.Cb('b'), 'a'))
+
+p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k"))
+t = p:match("ab")
+checkeq(t, {"a", "b"})
+
+
+t = {}
+function foo (p) t[#t + 1] = p; return p .. "x" end
+
+p = m.Cg(m.C(2) / foo, "x") * m.Cb"x" *
+ m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
+ m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
+ m.Cg(m.Cb('x') / foo, "x") * m.Cb"x"
+x = {p:match'ab'}
+checkeq(x, {'abx', 'abxx', 'abxxx', 'abxxxx'})
+checkeq(t, {'ab',
+ 'ab', 'abx',
+ 'ab', 'abx', 'abxx',
+ 'ab', 'abx', 'abxx', 'abxxx'})
+
+
+
+-- tests for match-time captures
+
+local function id (s, i, ...)
+ return true, ...
+end
+
+assert(m.Cmt(m.Cs((m.Cmt(m.S'abc' / { a = 'x', c = 'y' }, id) +
+ m.R'09'^1 / string.char +
+ m.P(1))^0), id):match"acb98+68c" == "xyb\98+\68y")
+
+p = m.P{'S',
+ S = m.V'atom' * space
+ + m.Cmt(m.Ct("(" * space * (m.Cmt(m.V'S'^1, id) + m.P(true)) * ")" * space), id),
+ atom = m.Cmt(m.C(m.R("AZ", "az", "09")^1), id)
+}
+x = p:match"(a g () ((b) c) (d (e)))"
+checkeq(x, {'a', 'g', {}, {{'b'}, 'c'}, {'d', {'e'}}});
+
+x = {(m.Cmt(1, id)^0):match(string.rep('a', 500))}
+assert(#x == 500)
+
+local function id(s, i, x)
+ if x == 'a' then return i + 1, 1, 3, 7
+ else return nil, 2, 4, 6, 8
+ end
+end
+
+p = ((m.P(id) + m.Cmt(2, id) + m.Cmt(1, id)))^0
+assert(table.concat{p:match('abababab')} == string.rep('137', 4))
+
+local function ref (s, i, x)
+ return m.match(x, s, i - x:len())
+end
+
+assert(m.Cmt(m.P(1)^0, ref):match('alo') == 4)
+assert((m.P(1) * m.Cmt(m.P(1)^0, ref)):match('alo') == 4)
+assert(not (m.P(1) * m.Cmt(m.C(1)^0, ref)):match('alo'))
+
+ref = function (s,i,x) return i == tonumber(x) and i, 'xuxu' end
+
+assert(m.Cmt(1, ref):match'2')
+assert(not m.Cmt(1, ref):match'1')
+assert(m.Cmt(m.P(1)^0, ref):match'03')
+
+function ref (s, i, a, b)
+ if a == b then return i, a:upper() end
+end
+
+p = m.Cmt(m.C(m.R"az"^1) * "-" * m.C(m.R"az"^1), ref)
+p = (any - p)^0 * p * any^0 * -1
+
+assert(p:match'abbbc-bc ddaa' == 'BC')
+
+
+c = '[' * m.Cg(m.P'='^0, "init") * '[' *
+ { m.Cmt(']' * m.C(m.P'='^0) * ']' * m.Cb("init"), function (_, _, s1, s2)
+ return s1 == s2 end)
+ + 1 * m.V(1) } / function () end
+
+assert(c:match'[==[]]====]]]]==]===[]' == 18)
+assert(c:match'[[]=]====]=]]]==]===[]' == 14)
+assert(not c:match'[[]=]====]=]=]==]===[]')
+
+
+
+-------------------------------------------------------------------
+-- Tests for 're' module
+-------------------------------------------------------------------
+
+local re = require "re"
+
+local match, compile = re.match, re.compile
+
+
+
+assert(match("a", ".") == 2)
+assert(match("a", "''") == 1)
+assert(match("", " ! . ") == 1)
+assert(not match("a", " ! . "))
+assert(match("abcde", " ( . . ) * ") == 5)
+assert(match("abbcde", " [a-c] +") == 5)
+assert(match("0abbc1de", "'0' [a-c]+ '1'") == 7)
+assert(match("0zz1dda", "'0' [^a-c]+ 'a'") == 8)
+assert(match("abbc--", " [a-c] + +") == 5)
+assert(match("abbc--", " [ac-] +") == 2)
+assert(match("abbc--", " [-acb] + ") == 7)
+assert(not match("abbcde", " [b-z] + "))
+assert(match("abb\"de", '"abb"["]"de"') == 7)
+assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee")
+assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8)
+local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")}
+checkeq(t, {4, 5, 7})
+local t = {match("abceefe", "((&&'e' {})? .)*")}
+checkeq(t, {4, 5, 7})
+local t = {match("abceefe", "( ( ! ! 'e' {} ) ? . ) *")}
+checkeq(t, {4, 5, 7})
+local t = {match("abceefe", "(( & ! & ! 'e' {})? .)*")}
+checkeq(t, {4, 5, 7})
+
+assert(match("cccx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 5)
+assert(match("cdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 4)
+assert(match("abcdcdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 8)
+
+assert(match("abc", "a <- (. a)?") == 4)
+b = "balanced <- '(' ([^()] / balanced)* ')'"
+assert(match("(abc)", b))
+assert(match("(a(b)((c) (d)))", b))
+assert(not match("(a(b ((c) (d)))", b))
+
+b = compile[[ balanced <- "(" ([^()] / balanced)* ")" ]]
+assert(b == m.P(b))
+assert(b:match"((((a))(b)))")
+
+local g = [[
+ S <- "0" B / "1" A / "" -- balanced strings
+ A <- "0" S / "1" A A -- one more 0
+ B <- "1" S / "0" B B -- one more 1
+]]
+assert(match("00011011", g) == 9)
+
+local g = [[
+ S <- ("0" B / "1" A)*
+ A <- "0" / "1" A A
+ B <- "1" / "0" B B
+]]
+assert(match("00011011", g) == 9)
+assert(match("000110110", g) == 9)
+assert(match("011110110", g) == 3)
+assert(match("000110010", g) == 1)
+
+s = "aaaaaaaaaaaaaaaaaaaaaaaa"
+assert(match(s, "'a'^3") == 4)
+assert(match(s, "'a'^0") == 1)
+assert(match(s, "'a'^+3") == s:len() + 1)
+assert(not match(s, "'a'^+30"))
+assert(match(s, "'a'^-30") == s:len() + 1)
+assert(match(s, "'a'^-5") == 6)
+for i = 1, s:len() do
+ assert(match(s, string.format("'a'^+%d", i)) >= i + 1)
+ assert(match(s, string.format("'a'^-%d", i)) <= i + 1)
+ assert(match(s, string.format("'a'^%d", i)) == i + 1)
+end
+assert(match("01234567890123456789", "[0-9]^3+") == 19)
+
+
+assert(match("01234567890123456789", "({....}{...}) -> '%2%1'") == "4560123")
+t = match("0123456789", "{.}*->{}")
+checkeq(t, {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"})
+assert(match("012345", "( (..) -> '%0%0' ) -> {}")[1] == "0101")
+
+eqcharset(compile"[]]", "]")
+eqcharset(compile"[][]", m.S"[]")
+eqcharset(compile"[]-]", m.S"-]")
+eqcharset(compile"[-]", m.S"-")
+eqcharset(compile"[az-]", m.S"a-z")
+eqcharset(compile"[-az]", m.S"a-z")
+eqcharset(compile"[a-z]", m.R"az")
+eqcharset(compile"[]['\"]", m.S[[]['"]])
+
+eqcharset(compile"[^]]", any - "]")
+eqcharset(compile"[^][]", any - m.S"[]")
+eqcharset(compile"[^]-]", any - m.S"-]")
+eqcharset(compile"[^]-]", any - m.S"-]")
+eqcharset(compile"[^-]", any - m.S"-")
+eqcharset(compile"[^az-]", any - m.S"a-z")
+eqcharset(compile"[^-az]", any - m.S"a-z")
+eqcharset(compile"[^a-z]", any - m.R"az")
+eqcharset(compile"[^]['\"]", any - m.S[[]['"]])
+
+-- tests for comments in 're'
+e = compile[[
+A <- B -- \t \n %nl .<> <- -> --
+B <- 'x' --]]
+assert(e:match'xy' == 2)
+
+-- tests for 're' with pre-definitions
+defs = {digits = m.R"09", letters = m.R"az"}
+e = compile("%letters (%letters / %digits)*", defs)
+assert(e:match"x123" == 5)
+
+e = compile([[
+ S <- A+
+ A <- %letters+ B
+ B <- %digits+
+]], defs)
+
+e = compile("{[0-9]+'.'?[0-9]*} -> sin", math)
+assert(e:match("2.34") == math.sin(2.34))
+
+
+function eq (_, _, a, b) return a == b end
+
+c = re.compile([[
+ longstring <- '[' {:init: '='* :} '[' close
+ close <- ']' =init ']' / . close
+]])
+
+assert(c:match'[==[]]===]]]]==]===[]' == 17)
+assert(c:match'[[]=]====]=]]]==]===[]' == 14)
+assert(not c:match'[[]=]====]=]=]==]===[]')
+
+c = re.compile" '[' {:init: '='* :} '[' (!(']' =init ']') .)* ']' =init ']' !. "
+
+assert(c:match'[==[]]===]]]]==]')
+assert(c:match'[[]=]====]=][]==]===[]]')
+assert(not c:match'[[]=]====]=]=]==]===[]')
+
+assert(re.find("hi alalo", "{:x:..:} =x") == 4)
+assert(re.find("hi alalo", "{:x:..:} =x", 4) == 4)
+assert(not re.find("hi alalo", "{:x:..:} =x", 5))
+assert(re.find("hi alalo", "'al'", 5) == 6)
+assert(re.find("hi aloalolo", "{:x:..:} =x") == 8)
+assert(re.find("alo alohi x x", "{:word:%w+:}%W*(=word)!%w") == 11)
+
+assert(re.gsub("alo alo", "[abc]", "x") == "xlo xlo")
+assert(re.gsub("alo alo", "%w+", ".") == ". .")
+assert(re.gsub("hi, how are you", "[aeiou]", string.upper) ==
+ "hI, hOw ArE yOU")
+
+s = 'hi [[a comment[=]=] ending here]] and [=[another]]=]]'
+c = re.compile" '[' {:i: '='* :} '[' (!(']' =i ']') .)* ']' { =i } ']' "
+assert(re.gsub(s, c, "%2") == 'hi and =]')
+assert(re.gsub(s, c, "%0") == s)
+assert(re.gsub('[=[hi]=]', c, "%2") == '=')
+
+assert(re.find("", "!.") == 1)
+assert(re.find("alo", "!.") == 4)
+
+function addtag (s, i, t, tag) t.tag = tag; return i, t end
+
+c = re.compile([[
+ doc <- block !.
+ block <- (start (block / { [^<]+ })* -> {} end?) => addtag
+ start <- '<' {:tag: [a-z]+ :} '>'
+ end <- '</' { =tag } '>'
+]], {addtag = addtag})
+
+x = c:match[[
+<x>hi<b>hello</b>but<b>totheend</x>]]
+checkeq(x, {tag='x', 'hi', {tag = 'b', 'hello'}, 'but',
+ {'totheend'}})
+
+assert(not pcall(compile, "x <- 'a' x <- 'b'"))
+assert(not pcall(compile, "'x' -> x", {x = 3}))
+
+
+-- tests for look-ahead captures
+x = {re.match("alo", "&(&{.}) !{'b'} {&(...)} &{..} {...} {!.}")}
+checkeq(x, {"", "alo", ""})
+
+assert(re.match("aloalo",
+ "{~ (((&'al' {.}) -> 'A%1' / (&%l {.}) -> '%1%1') / .)* ~}")
+ == "AallooAalloo")
+
+-- bug in 0.9 (and older versions), due to captures in look-aheads
+x = re.compile[[ {~ (&(. ([a-z]* -> '*')) ([a-z]+ -> '+') ' '*)* ~} ]]
+assert(x:match"alo alo" == "+ +")
+
+-- valid capture in look-ahead (used inside the look-ahead itself)
+x = re.compile[[
+ S <- &({:two: .. :} . =two) {[a-z]+} / . S
+]]
+assert(x:match("hello aloaLo aloalo xuxu") == "aloalo")
+
+
+p = re.compile[[
+ block <- ({:ident:' '*:} line
+ ((=ident !' ' line) / &(=ident ' ') block)*) -> {}
+ line <- {[^%nl]*} %nl
+]]
+
+t= p:match[[
+1
+ 1.1
+ 1.2
+ 1.2.1
+
+2
+ 2.1
+]]
+checkeq(t, {"1", {"1.1", "1.2", {"1.2.1", "", ident = " "}, ident = " "},
+ "2", {"2.1", ident = " "}, ident = ""})
+
+
+-- nested grammars
+p = re.compile[[
+ s <- a b !.
+ b <- ( x <- ('b' x)? )
+ a <- ( x <- 'a' x? )
+]]
+
+assert(p:match'aaabbb')
+assert(p:match'aaa')
+assert(not p:match'bbb')
+assert(not p:match'aaabbba')
+
+-- testing groups
+t = {re.match("abc", "{:S <- {:.:} {S} / '':}")}
+checkeq(t, {"a", "bc", "b", "c", "c", ""})
+
+t = re.match("1234", "({:a:.:} {:b:.:} {:c:.{.}:}) -> {}")
+checkeq(t, {a="1", b="2", c="4"})
+t = re.match("1234", "({:a:.:} {:b:{.}{.}:} {:c:{.}:}) -> {}")
+checkeq(t, {a="1", b="2", c="4"})
+t = re.match("12345", "({:.:} {:b:{.}{.}:} {:{.}{.}:}) -> {}")
+checkeq(t, {"1", b="2", "4", "5"})
+t = re.match("12345", "({:.:} {:{:b:{.}{.}:}:} {:{.}{.}:}) -> {}")
+checkeq(t, {"1", "23", "4", "5"})
+t = re.match("12345", "({:.:} {{:b:{.}{.}:}} {:{.}{.}:}) -> {}")
+checkeq(t, {"1", "23", "4", "5"})
+
+
+-- testing pre-defined names
+assert(os.setlocale("C") == "C")
+
+function eqlpeggsub (p1, p2)
+ local s1 = cs2str(re.compile(p1))
+ local s2 = string.gsub(allchar, "[^" .. p2 .. "]", "")
+if s1 ~= s2 then print(s1,s2) end
+ assert(s1 == s2)
+end
+
+
+eqlpeggsub("%w", "%w")
+eqlpeggsub("%a", "%a")
+eqlpeggsub("%l", "%l")
+eqlpeggsub("%u", "%u")
+eqlpeggsub("%p", "%p")
+eqlpeggsub("%d", "%d")
+eqlpeggsub("%x", "%x")
+eqlpeggsub("%s", "%s")
+
+eqlpeggsub("%W", "%W")
+eqlpeggsub("%A", "%A")
+eqlpeggsub("%L", "%L")
+eqlpeggsub("%U", "%U")
+eqlpeggsub("%P", "%P")
+eqlpeggsub("%D", "%D")
+eqlpeggsub("%X", "%X")
+eqlpeggsub("%S", "%S")
+
+eqlpeggsub("[%w]", "%w")
+eqlpeggsub("[_%w]", "_%w")
+eqlpeggsub("[^%w]", "%W")
+eqlpeggsub("[%W%S]", "%W%S")
+
+re.updatelocale()
+
+
+-- testing nested substitutions x string captures
+
+p = re.compile[[
+ text <- {~ item* ~}
+ item <- macro / [^()] / '(' item* ')'
+ arg <- ' '* {~ (!',' item)* ~}
+ args <- '(' arg (',' arg)* ')'
+ macro <- ('apply' args) -> '%1(%2)'
+ / ('add' args) -> '%1 + %2'
+ / ('mul' args) -> '%1 * %2'
+]]
+
+assert(p:match"add(mul(a,b), apply(f,x))" == "a * b + f(x)")
+
+rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
+
+assert(rev:match"0123456789" == "9876543210")
+
+print"OK"
+
+
diff --git a/lpeg.tar.gz b/lpeg.tar.gz
new file mode 100644
index 0000000..026b02d
--- /dev/null
+++ b/lpeg.tar.gz
Binary files differ
diff --git a/lpeg.url b/lpeg.url
new file mode 100644
index 0000000..e40361f
--- /dev/null
+++ b/lpeg.url
@@ -0,0 +1 @@
+http://www.inf.puc-rio.br/~roberto/lpeg/lpeg-0.10.2.tar.gz
diff --git a/lpeg.version b/lpeg.version
new file mode 100644
index 0000000..5eef0f1
--- /dev/null
+++ b/lpeg.version
@@ -0,0 +1 @@
+0.10.2