Unicode C++
Regular Expression with Multiple Matches and Capture Groups

Demonstrates a regular expression with multiple matches and capture groups for each match.
Note: Chilkat uses PCRE2. See PCRE2 Regular Expressions
Also see: PCRE2 Performance
Chilkat Unicode C++ Downloads

Unicode C++
#include <CkStringBuilderW.h>
#include <CkJsonObjectW.h>

void ChilkatSample(void)
    {
    bool success = false;

    const wchar_t *pattern = L"Name:\\s+(\\w+)\\s+(\\w+),\\s+Email:\\s+(\\S+)";

    CkStringBuilderW sb;
    bool crlf = true;
    sb.AppendLine(L"Name: John Smith, Email: john.smith@example.com",crlf);
    sb.AppendLine(L"Name: Jack Johnson, Email: jack.johnson@example.com",crlf);
    sb.AppendLine(L"Name: Mary Adams, Email: mary.adams@example.com",crlf);

    wprintf(L"%s\n",sb.getAsString());

    //  We have the following string:
    //  Name: John Smith, Email: john.smith@example.com
    //  Name: Jack Johnson, Email: jack.johnson@example.com
    //  Name: Mary Adams, Email: mary.adams@example.com

    CkJsonObjectW json;
    json.put_EmitCompact(false);

    int timeoutMs = 2000;
    int numMatches = sb.RegexMatch(pattern,json,timeoutMs);
    if (numMatches < 0) {
        //  Probably an error in the regular expression.
        //  Suggestion: Use AI to help create and/or diagnose regular expressions.
        wprintf(L"%s\n",sb.lastErrorText());
        return;
    }

    //  Examine the matches:
    wprintf(L"%s\n",json.emit());

    //  This is the JSON with the match information.
    //  See the JSON parsing code below to get the matched capture group values.

    //  {
    //    "match": [
    //      {
    //        "group": [
    //          {
    //            "cap": "Name: John Smith, Email: john.smith@example.com",
    //            "idx": 0,
    //            "len": 47
    //          },
    //          {
    //            "cap": "John",
    //            "idx": 6,
    //            "len": 4
    //          },
    //          {
    //            "cap": "Smith",
    //            "idx": 11,
    //            "len": 5
    //          },
    //          {
    //            "cap": "john.smith@example.com",
    //            "idx": 25,
    //            "len": 22
    //          }
    //        ]
    //      },
    //      {
    //        "group": [
    //          {
    //            "cap": "Name: Jack Johnson, Email: jack.johnson@example.com",
    //            "idx": 49,
    //            "len": 51
    //          },
    //          {
    //            "cap": "Jack",
    //            "idx": 55,
    //            "len": 4
    //          },
    //          {
    //            "cap": "Johnson",
    //            "idx": 60,
    //            "len": 7
    //          },
    //          {
    //            "cap": "jack.johnson@example.com",
    //            "idx": 76,
    //            "len": 24
    //          }
    //        ]
    //      },
    //      {
    //        "group": [
    //          {
    //            "cap": "Name: Mary Adams, Email: mary.adams@example.com",
    //            "idx": 102,
    //            "len": 47
    //          },
    //          {
    //            "cap": "Mary",
    //            "idx": 108,
    //            "len": 4
    //          },
    //          {
    //            "cap": "Adams",
    //            "idx": 113,
    //            "len": 5
    //          },
    //          {
    //            "cap": "mary.adams@example.com",
    //            "idx": 127,
    //            "len": 22
    //          }
    //        ]
    //      }
    //    ]
    //  }

    //  Important:  Capture group 0 always contains the entire match — that is, the portion of the input string that matches the full regular expression.

    const wchar_t *cap = 0;
    int i = 0;
    int matchCount = json.SizeOfArray(L"match");
    while (i < matchCount) {
        wprintf(L"Match %d:\n",i + 1);
        json.put_I(i);
        int j = 0;
        int numCaptureGroups = json.SizeOfArray(L"match[i].group");
        while (j < numCaptureGroups) {
            json.put_J(j);
            cap = json.stringOf(L"match[i].group[j].cap");
            wprintf(L"%d: %s\n",j,cap);
            j = j + 1;
        }

        i = i + 1;
    }

    //  Capture group 0 always contains the entire match — that is, the portion of the input string that matches the full regular expression.

    //  Output

    //  Match 1:
    //  0: Name: John Smith, Email: john.smith@example.com
    //  1: John
    //  2: Smith
    //  3: john.smith@example.com
    //  Match 2:
    //  0: Name: Jack Johnson, Email: jack.johnson@example.com
    //  1: Jack
    //  2: Johnson
    //  3: jack.johnson@example.com
    //  Match 3:
    //  0: Name: Mary Adams, Email: mary.adams@example.com
    //  1: Mary
    //  2: Adams
    //  3: mary.adams@example.com
    }