Nix/src/libutil/regex.cc
Eelco Dolstra 976df480c9 Add a primop for regular expression pattern matching
The function ‘builtins.match’ takes a POSIX extended regular
expression and an arbitrary string. It returns ‘null’ if the string
does not match the regular expression. Otherwise, it returns a list
containing substring matches corresponding to parenthesis groups in
the regex. The regex must match the entire string (i.e. there is an
implied "^<pat>$" around the regex).  For example:

  match "foo" "foobar" => null
  match "foo" "foo" => []
  match "f(o+)(.*)" "foooobar" => ["oooo" "bar"]
  match "(.*/)?([^/]*)" "/dir/file.nix" => ["/dir/" "file.nix"]
  match "(.*/)?([^/]*)" "file.nix" => [null "file.nix"]

The following example finds all regular files with extension .nix or
.patch underneath the current directory:

  let

    findFiles = pat: dir: concatLists (mapAttrsToList (name: type:
      if type == "directory" then
        findFiles pat (dir + "/" + name)
      else if type == "regular" && match pat name != null then
        [(dir + "/" + name)]
      else []) (readDir dir));

  in findFiles ".*\\.(nix|patch)" (toString ./.)
2014-11-25 11:47:06 +01:00

51 lines
1.4 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "regex.hh"
#include "types.hh"
#include <algorithm>
namespace nix {
Regex::Regex(const string & pattern, bool subs)
{
/* Patterns must match the entire string. */
int err = regcomp(&preg, ("^(" + pattern + ")$").c_str(), (subs ? 0 : REG_NOSUB) | REG_EXTENDED);
if (err) throw RegexError(format("compiling pattern %1%: %2%") % pattern % showError(err));
nrParens = subs ? std::count(pattern.begin(), pattern.end(), '(') : 0;
}
Regex::~Regex()
{
regfree(&preg);
}
bool Regex::matches(const string & s)
{
int err = regexec(&preg, s.c_str(), 0, 0, 0);
if (err == 0) return true;
else if (err == REG_NOMATCH) return false;
throw Error(format("matching string %1%: %2%") % s % showError(err));
}
bool Regex::matches(const string & s, Subs & subs)
{
regmatch_t pmatch[nrParens + 2];
int err = regexec(&preg, s.c_str(), nrParens + 2, pmatch, 0);
if (err == 0) {
for (unsigned int n = 2; n < nrParens + 2; ++n)
if (pmatch[n].rm_eo != -1)
subs[n - 2] = string(s, pmatch[n].rm_so, pmatch[n].rm_eo - pmatch[n].rm_so);
return true;
}
else if (err == REG_NOMATCH) return false;
throw Error(format("matching string %1%: %2%") % s % showError(err));
}
string Regex::showError(int err)
{
char buf[256];
regerror(err, &preg, buf, sizeof(buf));
return string(buf);
}
}