V8 Project
v8::internal::CharacterRange Class Reference

#include <jsregexp.h>

+ Collaboration diagram for v8::internal::CharacterRange:

Public Member Functions

 CharacterRange ()
 
 CharacterRange (void *null)
 
 CharacterRange (uc16 from, uc16 to)
 
bool Contains (uc16 i)
 
uc16 from () const
 
void set_from (uc16 value)
 
uc16 to () const
 
void set_to (uc16 value)
 
bool is_valid ()
 
bool IsEverything (uc16 max)
 
bool IsSingleton ()
 
void AddCaseEquivalents (ZoneList< CharacterRange > *ranges, bool is_one_byte, Zone *zone)
 

Static Public Member Functions

static void AddClassEscape (uc16 type, ZoneList< CharacterRange > *ranges, Zone *zone)
 
static Vector< const intGetWordBounds ()
 
static CharacterRange Singleton (uc16 value)
 
static CharacterRange Range (uc16 from, uc16 to)
 
static CharacterRange Everything ()
 
static void Split (ZoneList< CharacterRange > *base, Vector< const int > overlay, ZoneList< CharacterRange > **included, ZoneList< CharacterRange > **excluded, Zone *zone)
 
static bool IsCanonical (ZoneList< CharacterRange > *ranges)
 
static void Canonicalize (ZoneList< CharacterRange > *ranges)
 
static void Negate (ZoneList< CharacterRange > *src, ZoneList< CharacterRange > *dst, Zone *zone)
 

Static Public Attributes

static const int kStartMarker = (1 << 24)
 
static const int kPayloadMask = (1 << 24) - 1
 

Private Attributes

uc16 from_
 
uc16 to_
 

Detailed Description

Definition at line 239 of file jsregexp.h.

Constructor & Destructor Documentation

◆ CharacterRange() [1/3]

v8::internal::CharacterRange::CharacterRange ( )
inline

Definition at line 241 of file jsregexp.h.

Referenced by AddCaseEquivalents(), Everything(), Negate(), Range(), Singleton(), and Split().

+ Here is the caller graph for this function:

◆ CharacterRange() [2/3]

v8::internal::CharacterRange::CharacterRange ( void *  null)
inline

Definition at line 243 of file jsregexp.h.

243 { DCHECK_EQ(NULL, null); } //NOLINT
enable harmony numeric enable harmony object literal extensions Optimize object Array DOM strings and string trace pretenuring decisions of HAllocate instructions Enables optimizations which favor memory size over execution speed maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining trace the tracking of allocation sites deoptimize every n garbage collections perform array bounds checks elimination analyze liveness of environment slots and zap dead values flushes the cache of optimized code for closures on every GC allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes enable context specialization in TurboFan execution budget before interrupt is triggered max percentage of megamorphic generic ICs to allow optimization enable use of SAHF instruction if enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable use of MLS instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long enable alignment of csp to bytes on platforms which prefer the register to always be NULL
#define DCHECK_EQ(v1, v2)
Definition: logging.h:206

References DCHECK_EQ, and NULL.

◆ CharacterRange() [3/3]

v8::internal::CharacterRange::CharacterRange ( uc16  from,
uc16  to 
)
inline

Definition at line 244 of file jsregexp.h.

244 : from_(from), to_(to) { }

Member Function Documentation

◆ AddCaseEquivalents()

void v8::internal::CharacterRange::AddCaseEquivalents ( ZoneList< CharacterRange > *  ranges,
bool  is_one_byte,
Zone zone 
)

Definition at line 5335 of file jsregexp.cc.

5336  {
5337  Isolate* isolate = zone->isolate();
5338  uc16 bottom = from();
5339  uc16 top = to();
5340  if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {
5341  if (bottom > String::kMaxOneByteCharCode) return;
5343  }
5345  if (top == bottom) {
5346  // If this is a singleton we just expand the one character.
5347  int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
5348  for (int i = 0; i < length; i++) {
5349  uc32 chr = chars[i];
5350  if (chr != bottom) {
5351  ranges->Add(CharacterRange::Singleton(chars[i]), zone);
5352  }
5353  }
5354  } else {
5355  // If this is a range we expand the characters block by block,
5356  // expanding contiguous subranges (blocks) one at a time.
5357  // The approach is as follows. For a given start character we
5358  // look up the remainder of the block that contains it (represented
5359  // by the end point), for instance we find 'z' if the character
5360  // is 'c'. A block is characterized by the property
5361  // that all characters uncanonicalize in the same way, except that
5362  // each entry in the result is incremented by the distance from the first
5363  // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and
5364  // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].
5365  // Once we've found the end point we look up its uncanonicalization
5366  // and produce a range for each element. For instance for [c-f]
5367  // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
5368  // add a range if it is not already contained in the input, so [c-f]
5369  // will be skipped but [C-F] will be added. If this range is not
5370  // completely contained in a block we do this for all the blocks
5371  // covered by the range (handling characters that is not in a block
5372  // as a "singleton block").
5374  int pos = bottom;
5375  while (pos <= top) {
5376  int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);
5377  uc16 block_end;
5378  if (length == 0) {
5379  block_end = pos;
5380  } else {
5381  DCHECK_EQ(1, length);
5382  block_end = range[0];
5383  }
5384  int end = (block_end > top) ? top : block_end;
5385  length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);
5386  for (int i = 0; i < length; i++) {
5387  uc32 c = range[i];
5388  uc16 range_from = c - (block_end - pos);
5389  uc16 range_to = c - (block_end - end);
5390  if (!(bottom <= range_from && range_to <= top)) {
5391  ranges->Add(CharacterRange(range_from, range_to), zone);
5392  }
5393  }
5394  pos = end + 1;
5395  }
5396  }
5397 }
static CharacterRange Singleton(uc16 value)
Definition: jsregexp.h:248
static const int32_t kMaxOneByteCharCode
Definition: objects.h:8811
unsigned int uchar
Definition: unicode.h:17
static bool RangeContainsLatin1Equivalents(CharacterRange range)
Definition: jsregexp.cc:2773
uint16_t uc16
Definition: globals.h:184
int32_t uc32
Definition: globals.h:185
static const int kMaxWidth
Definition: unicode.h:245

References v8::internal::List< T, AllocationPolicy >::Add(), CharacterRange(), DCHECK_EQ, from(), unibrow::Mapping< T, size >::get(), v8::internal::Zone::isolate(), v8::internal::Isolate::jsregexp_canonrange(), v8::internal::Isolate::jsregexp_uncanonicalize(), v8::internal::String::kMaxOneByteCharCode, unibrow::Ecma262UnCanonicalize::kMaxWidth, v8::internal::RangeContainsLatin1Equivalents(), Singleton(), and to().

+ Here is the call graph for this function:

◆ AddClassEscape()

void v8::internal::CharacterRange::AddClassEscape ( uc16  type,
ZoneList< CharacterRange > *  ranges,
Zone zone 
)
static

Definition at line 5233 of file jsregexp.cc.

5235  {
5236  switch (type) {
5237  case 's':
5238  AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
5239  break;
5240  case 'S':
5242  break;
5243  case 'w':
5244  AddClass(kWordRanges, kWordRangeCount, ranges, zone);
5245  break;
5246  case 'W':
5248  break;
5249  case 'd':
5250  AddClass(kDigitRanges, kDigitRangeCount, ranges, zone);
5251  break;
5252  case 'D':
5254  break;
5255  case '.':
5258  ranges,
5259  zone);
5260  break;
5261  // This is not a character range as defined by the spec but a
5262  // convenient shorthand for a character class that matches any
5263  // character.
5264  case '*':
5265  ranges->Add(CharacterRange::Everything(), zone);
5266  break;
5267  // This is the set of characters matched by the $ and ^ symbols
5268  // in multiline mode.
5269  case 'n':
5272  ranges,
5273  zone);
5274  break;
5275  default:
5276  UNREACHABLE();
5277  }
5278 }
static CharacterRange Everything()
Definition: jsregexp.h:255
#define UNREACHABLE()
Definition: logging.h:30
static const int kLineTerminatorRanges[]
Definition: jsregexp.cc:3590
static const int kWordRanges[]
Definition: jsregexp.cc:3583
static const int kLineTerminatorRangeCount
Definition: jsregexp.cc:3592
static const int kSpaceRanges[]
Definition: jsregexp.cc:3577
static const int kSpaceRangeCount
Definition: jsregexp.cc:3581
static void AddClassNegated(const int *elmv, int elmc, ZoneList< CharacterRange > *ranges, Zone *zone)
Definition: jsregexp.cc:5214
static const int kDigitRanges[]
Definition: jsregexp.cc:3586
static const int kWordRangeCount
Definition: jsregexp.cc:3585
static const int kDigitRangeCount
Definition: jsregexp.cc:3587
static void AddClass(const int *elmv, int elmc, ZoneList< CharacterRange > *ranges, Zone *zone)
Definition: jsregexp.cc:5201

References v8::internal::List< T, AllocationPolicy >::Add(), v8::internal::AddClass(), v8::internal::AddClassNegated(), Everything(), v8::internal::kDigitRangeCount, v8::internal::kDigitRanges, v8::internal::kLineTerminatorRangeCount, v8::internal::kLineTerminatorRanges, v8::internal::kSpaceRangeCount, v8::internal::kSpaceRanges, v8::internal::kWordRangeCount, v8::internal::kWordRanges, and UNREACHABLE.

Referenced by v8::internal::AddRangeOrEscape().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ Canonicalize()

void v8::internal::CharacterRange::Canonicalize ( ZoneList< CharacterRange > *  ranges)
static

Definition at line 5508 of file jsregexp.cc.

5508  {
5509  if (character_ranges->length() <= 1) return;
5510  // Check whether ranges are already canonical (increasing, non-overlapping,
5511  // non-adjacent).
5512  int n = character_ranges->length();
5513  int max = character_ranges->at(0).to();
5514  int i = 1;
5515  while (i < n) {
5516  CharacterRange current = character_ranges->at(i);
5517  if (current.from() <= max + 1) {
5518  break;
5519  }
5520  max = current.to();
5521  i++;
5522  }
5523  // Canonical until the i'th range. If that's all of them, we are done.
5524  if (i == n) return;
5525 
5526  // The ranges at index i and forward are not canonicalized. Make them so by
5527  // doing the equivalent of insertion sort (inserting each into the previous
5528  // list, in order).
5529  // Notice that inserting a range can reduce the number of ranges in the
5530  // result due to combining of adjacent and overlapping ranges.
5531  int read = i; // Range to insert.
5532  int num_canonical = i; // Length of canonicalized part of list.
5533  do {
5534  num_canonical = InsertRangeInCanonicalList(character_ranges,
5535  num_canonical,
5536  character_ranges->at(read));
5537  read++;
5538  } while (read < n);
5539  character_ranges->Rewind(num_canonical);
5540 
5541  DCHECK(CharacterRange::IsCanonical(character_ranges));
5542 }
static bool IsCanonical(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5400
#define DCHECK(condition)
Definition: logging.h:205
static int InsertRangeInCanonicalList(ZoneList< CharacterRange > *list, int count, CharacterRange insert)
Definition: jsregexp.cc:5442

References v8::internal::List< T, AllocationPolicy >::at(), DCHECK, from(), v8::internal::InsertRangeInCanonicalList(), IsCanonical(), and to().

Referenced by v8::internal::EmitCharClass(), v8::internal::TextNode::FilterOneByte(), and v8::internal::TextNode::GetSuccessorOfOmnivorousTextNode().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ Contains()

bool v8::internal::CharacterRange::Contains ( uc16  i)
inline

Definition at line 258 of file jsregexp.h.

258 { return from_ <= i && i <= to_; }

References from_, and to_.

Referenced by v8::internal::RangeContainsLatin1Equivalents().

+ Here is the caller graph for this function:

◆ Everything()

static CharacterRange v8::internal::CharacterRange::Everything ( )
inlinestatic

Definition at line 255 of file jsregexp.h.

255  {
256  return CharacterRange(0, 0xFFFF);
257  }

References CharacterRange().

Referenced by AddClassEscape().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ from()

uc16 v8::internal::CharacterRange::from ( ) const
inline

◆ GetWordBounds()

Vector< const int > v8::internal::CharacterRange::GetWordBounds ( )
static

Definition at line 5281 of file jsregexp.cc.

5281  {
5282  return Vector<const int>(kWordRanges, kWordRangeCount - 1);
5283 }

References v8::internal::kWordRangeCount, and v8::internal::kWordRanges.

◆ is_valid()

bool v8::internal::CharacterRange::is_valid ( )
inline

Definition at line 263 of file jsregexp.h.

263 { return from_ <= to_; }

References from_, and to_.

Referenced by v8::internal::DispatchTable::AddRange().

+ Here is the caller graph for this function:

◆ IsCanonical()

bool v8::internal::CharacterRange::IsCanonical ( ZoneList< CharacterRange > *  ranges)
static

Definition at line 5400 of file jsregexp.cc.

5400  {
5401  DCHECK_NOT_NULL(ranges);
5402  int n = ranges->length();
5403  if (n <= 1) return true;
5404  int max = ranges->at(0).to();
5405  for (int i = 1; i < n; i++) {
5406  CharacterRange next_range = ranges->at(i);
5407  if (next_range.from() <= max + 1) return false;
5408  max = next_range.to();
5409  }
5410  return true;
5411 }
#define DCHECK_NOT_NULL(p)
Definition: logging.h:213

References v8::internal::List< T, AllocationPolicy >::at(), DCHECK_NOT_NULL, from(), and to().

Referenced by Canonicalize(), v8::internal::EmitCharClass(), v8::internal::TextNode::FilterOneByte(), v8::internal::TextNode::GetSuccessorOfOmnivorousTextNode(), and Negate().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ IsEverything()

bool v8::internal::CharacterRange::IsEverything ( uc16  max)
inline

Definition at line 264 of file jsregexp.h.

264 { return from_ == 0 && to_ >= max; }

References from_, and to_.

◆ IsSingleton()

bool v8::internal::CharacterRange::IsSingleton ( )
inline

Definition at line 265 of file jsregexp.h.

265 { return (from_ == to_); }

References from_, and to_.

◆ Negate()

void v8::internal::CharacterRange::Negate ( ZoneList< CharacterRange > *  src,
ZoneList< CharacterRange > *  dst,
Zone zone 
)
static

Definition at line 5545 of file jsregexp.cc.

5547  {
5549  DCHECK_EQ(0, negated_ranges->length());
5550  int range_count = ranges->length();
5551  uc16 from = 0;
5552  int i = 0;
5553  if (range_count > 0 && ranges->at(0).from() == 0) {
5554  from = ranges->at(0).to();
5555  i = 1;
5556  }
5557  while (i < range_count) {
5558  CharacterRange range = ranges->at(i);
5559  negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone);
5560  from = range.to();
5561  i++;
5562  }
5564  negated_ranges->Add(CharacterRange(from + 1, String::kMaxUtf16CodeUnit),
5565  zone);
5566  }
5567 }
static const int kMaxUtf16CodeUnit
Definition: objects.h:8813

References v8::internal::List< T, AllocationPolicy >::Add(), v8::internal::List< T, AllocationPolicy >::at(), CharacterRange(), DCHECK, DCHECK_EQ, from(), IsCanonical(), v8::internal::String::kMaxUtf16CodeUnit, and to().

+ Here is the call graph for this function:

◆ Range()

static CharacterRange v8::internal::CharacterRange::Range ( uc16  from,
uc16  to 
)
inlinestatic

Definition at line 251 of file jsregexp.h.

251  {
252  DCHECK(from <= to);
253  return CharacterRange(from, to);
254  }

References CharacterRange(), DCHECK, from(), and to().

+ Here is the call graph for this function:

◆ set_from()

void v8::internal::CharacterRange::set_from ( uc16  value)
inline

Definition at line 260 of file jsregexp.h.

260 { from_ = value; }

References from_.

Referenced by v8::internal::DispatchTable::AddRange().

+ Here is the caller graph for this function:

◆ set_to()

void v8::internal::CharacterRange::set_to ( uc16  value)
inline

Definition at line 262 of file jsregexp.h.

262 { to_ = value; }

References to_.

◆ Singleton()

static CharacterRange v8::internal::CharacterRange::Singleton ( uc16  value)
inlinestatic

Definition at line 248 of file jsregexp.h.

248  {
249  return CharacterRange(value, value);
250  }

References CharacterRange().

Referenced by AddCaseEquivalents().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ Split()

void v8::internal::CharacterRange::Split ( ZoneList< CharacterRange > *  base,
Vector< const int overlay,
ZoneList< CharacterRange > **  included,
ZoneList< CharacterRange > **  excluded,
Zone zone 
)
static

Definition at line 5316 of file jsregexp.cc.

5320  {
5321  DCHECK_EQ(NULL, *included);
5322  DCHECK_EQ(NULL, *excluded);
5323  DispatchTable table(zone);
5324  for (int i = 0; i < base->length(); i++)
5325  table.AddRange(base->at(i), CharacterRangeSplitter::kInBase, zone);
5326  for (int i = 0; i < overlay.length(); i += 2) {
5327  table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1),
5329  }
5330  CharacterRangeSplitter callback(included, excluded, zone);
5331  table.ForEach(&callback);
5332 }

References v8::internal::DispatchTable::AddRange(), v8::internal::List< T, AllocationPolicy >::at(), CharacterRange(), DCHECK_EQ, v8::internal::DispatchTable::ForEach(), v8::internal::CharacterRangeSplitter::kInBase, v8::internal::CharacterRangeSplitter::kInOverlay, v8::internal::Vector< T >::length(), and NULL.

+ Here is the call graph for this function:

◆ to()

uc16 v8::internal::CharacterRange::to ( ) const
inline

Member Data Documentation

◆ from_

uc16 v8::internal::CharacterRange::from_
private

Definition at line 289 of file jsregexp.h.

Referenced by Contains(), from(), is_valid(), IsEverything(), IsSingleton(), and set_from().

◆ kPayloadMask

const int v8::internal::CharacterRange::kPayloadMask = (1 << 24) - 1
static

Definition at line 286 of file jsregexp.h.

◆ kStartMarker

const int v8::internal::CharacterRange::kStartMarker = (1 << 24)
static

Definition at line 285 of file jsregexp.h.

◆ to_

uc16 v8::internal::CharacterRange::to_
private

Definition at line 290 of file jsregexp.h.

Referenced by Contains(), is_valid(), IsEverything(), IsSingleton(), set_to(), and to().


The documentation for this class was generated from the following files: