#!/usr/bin/env perl
#
# This snippet shows the canonical way to parse a triple,
# allowing quotation and escaping within it.
#

# An unquoted character can be anything except the special interpunction
# characters [ ] { } < > , " \

our $uqchar    = '^][}{,><"\\\\';

# An unquoted string is at least 1 of those.

our $uqstring  = qr/[${uqchar}]+/o;

# A quoted char is either a backslash followed by any character,
# or any character (except backslash or quote).

our $qchar     = qr/(?:\\.|[^\\"])/o;	# quoted something, or not " or \

# A quoted string is a sequence of quoted characters.

our $qstring   = qr/${qchar}*/o;

# A string is either quote + quoted string + quote,
# or an unquoted string.

our $string    = qr/(?:"$qstring")|(?:$uqstring)/o;

# A relator is 1 or more upper case letters, maybe followed
# by some lower case letters (including underscore).
# Maybe it is preceded by a < or >.

our $rel       = qr/[<>]?[A-Z]+[a-z_]*/o;

# A triple combines these, but there may be variants needed
# regarding allowed whitespace, brackets, etc.

our $triple = qr/\[\s*($string),\s*($rel),\s*($string)\s*\]/o;

# A variant with less punctuation: head<REL mod.

our $triplenp = qr/($string)\s*($rel)\s*($string)/o;

sub EscapeQuotes
{
    my $str = shift;

    $str =~ s/["\\]/\\&/g;
    return $str;
}

sub EnQuote
{
    my $str = shift;
    
    if ($str eq "" || $str =~ /[][}{><,"\\]/ || $str =~ /^ / || $str =~ / $/) {
	return '"' . EscapeQuotes(str) . '"';
    }
    return $str;
}

sub DeQuote
{
    my $str = shift;

    if ($str =~ s/^"//) {
	$str =~ s/"$//;
	$str =~ s/\\(.)/\1/g;
    }
    return $str;
}

=pod
# The same in Python:

UNQUOTED_CHAR  = r'^][}{,><"\\'
UNQUOTED_STRING= r'[' + UNQUOTED_CHAR + r']+'
QUOTED_CHAR    = r'(?:\\.|[^\"])'
QUOTED_STRING  = r'(?:' + QUOTED_CHAR + r'*)'
STRING         = r'(?:"'+QUOTED_STRING+r'"|'+UNQUOTED_STRING+r')'

QQUOTED_STRING = r'"(' + QUOTED_STRING + r')"'

RELATOR        = r'[<>]?[A-Z]+[a-z_]*'

STD_TRIPLE     = r'^\s*\[('+STRING+r'),(' + RELATOR + r'),('+STRING+r')\]\s*(\-?\d*)\s*$'

def EscapeQuotes(str):
  """
  Prefixes all quote characters in str with a backslash;
  does the same with backslashes.
  s/["\\]/\\&/g
  """
  str = re.sub(r'(["\\])', r'\\\1', str)
  #sys.stderr.write("EscapeQuotes: >" + str + "<\n")
  return str

def EnQuote(str):
  """
  If the str contains special characters, quote it, quote it and backslash
  the special characters.
  Also quote it if it starts or ends with a space, or is empty.
  Otherwise leave it unchanged.
  """
  if str == "" or \
     re.search(r'[][}{><,"\\]', str) != None or \
     str[0] == " " or str[-1] == " ":
    return '"' + EscapeQuotes(str) + '"'
  return str

def DeQuote(str):
  if str[0] == '"':
    str = re.sub(r'\\(.)', r'\1', str[1:-1])
    #sys.stderr.write("DeQuote: >" + str + "<\n")
  return str

=cut

sub unittest
{
    my $rc = 0;

    print "qstring: ", $qstring, "\n";
    print "uqstring: ", $uqstring, "\n";
    print "triple: ", $triple, "\n";

    while (<main::DATA>) {
	chomp;

	if (/^\[/) {
	    if (/$triple/) {
		print " P SUCC: '$_' => '", DeQuote($1), "' '$2' '", DeQuote($3), "'\n";
	    } else {
		print " P FAIL: '$_'\n";
		$rc = 1;
	    }
	} else {
	    if (/$triplenp/) {
		print "NP SUCC: '$_' => '", DeQuote($1), "' '$2' '", DeQuote($3), "'\n";
	    } else {
		print "NP FAIL: '$_'\n";
		$rc = 1;
	    }
	}
    }

    return $rc;
}

exit unittest();

0;

__DATA__
[ a, B, c ]
[ a,B, c]
[a, B,c ]
[a,B,c]
[ some formula, ATTR, 1 2 dichloor ]
[ formula, ATTR, "1,2-dichloor" ]
[formula, ATTR, "1,2\"-dichloor" ]
[formula, ATTR, "1,2\"[]{},-dichloor"]
[formula, ATTR, "1,2\"\\-\\\"-dichloor"]
[ must fail: formula, ATTR, 1,2\"[]{},-dichloor ]
a<B c
a<PREPto c
formula>ATTR "1,2\"[]{},-dichloor"
formula>ATTR"1,2\"\\[]\\\"{},-dichloor"
