/*
* [TestRegexFindQuotedString.java]
*
* Summary: Finding a quoted String with a regex.
..
*
* Copyright: (c) 2012 Roedy Green, Canadian Mind Products,
http://mindprod.com
*
* Licence: This software may be copied and used freely for any
purpose but military.
*
http://mindprod.com/contact/nonmil.html
*
* Requires: JDK 1.7+
*
* Created with: JetBrains IntelliJ IDEA IDE
http://www.jetbrains.com/idea/
*
* Version History:
* 1.0 2012-05-25 initial release
*/
package com.mindprod.example;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static java.lang.System.out;
/**
* Finding a quoted String with a regex.
*
* @author Roedy Green, Canadian Mind Products
* @version 1.0 2012-05-25 initial release
* @since 2012-05-25
*/
public class TestRegexFindQuotedString
{
// ------------------------------ CONSTANTS
------------------------------
private static final String lookIn = "George said \"that's the
ticket\"." +
" Jeb replied '\"ticket?\"
what ticket'." +
" \"How na\u00efve!\"." +
" empty: \"\"" +
" 'unbalanced\"";
// -------------------------- STATIC METHODS
--------------------------
/**
* exercise that pattern to see what if can find
*/
static void exercisePattern( Pattern pattern )
{
out.println();
out.println( "Pattern: " + pattern.toString() );
final Matcher m = pattern.matcher( lookIn ); // Matchers are
used both for matching and finding.
while ( m.find() )
{
out.println( m.group( 0 ) );
}
}
// --------------------------- main() method
---------------------------
/**
* test harness
*
* @param args not used
*/
public static void main( String[] args )
{
// We want to find Strings of the form "xx'xx" or 'xx"xx'
// We want to avoid the following problems:
// 1. Works even if String contains foreign languages, even
Russian or accented letters.
// 2. If starts with " must end with ", if starts with ' must
end with '.
// 3. ' is ok inside "...", and " is ok inside '...'
// 4. We don't worry about how to use ' inside '...'.
// here are some suggested techniques:
exercisePattern( Pattern.compile( "[\"']\\p{Print}+?[\"']" )
); // fails 1 2 3
exercisePattern( Pattern.compile( "[\"'][^\"']+[\"']" ) ); //
fails 2 3
exercisePattern( Pattern.compile( "([\"'])[^\"']+\\1" ) ); //
fails 3, uses a capturing group.
exercisePattern( Pattern.compile( "\"[^\"]+\"|'[^']+'" ) ); //
works, rejects empty strings by Mark Space.
exercisePattern( Pattern.compile( "\"[^\"]*\"|'[^']*'" ) ); //
works, accepts empty strings by Robert Klemme.
exercisePattern( Pattern.compile(
"\"(?:\\\\.|[^\\\"])*\"|'(?:\\\\.|[^\\'])*'" ) ); // works, accepts
empty strings
// (?: ) is a non-capturing group. This is Robert Klemme's
contribution. I don't understand how it works.
}
}
--
Roedy Green Canadian Mind Products
http://mindprod.com
I would be quite surprised if the NSA (National Security Agency)
did not have a computer program to scan bits of shredded
documents and electronically put them back together like a giant
jigsaw puzzle. This suggests you cannot just shred, you must also burn.
..