V8 Project
regexp-macro-assembler.cc
Go to the documentation of this file.
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/v8.h"
6 
7 #include "src/assembler.h"
8 #include "src/ast.h"
10 #include "src/regexp-stack.h"
11 #include "src/simulator.h"
12 
13 namespace v8 {
14 namespace internal {
15 
17  : slow_safe_compiler_(false),
18  global_mode_(NOT_GLOBAL),
19  zone_(zone) {
20 }
21 
22 
24 }
25 
26 
27 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
28 
30  : RegExpMacroAssembler(zone) {
31 }
32 
33 
35 }
36 
37 
39  return FLAG_enable_unaligned_accesses && !slow_safe();
40 }
41 
43  String* subject,
44  int start_index) {
45  // Not just flat, but ultra flat.
46  DCHECK(subject->IsExternalString() || subject->IsSeqString());
47  DCHECK(start_index >= 0);
48  DCHECK(start_index <= subject->length());
49  if (subject->IsOneByteRepresentation()) {
50  const byte* address;
51  if (StringShape(subject).IsExternal()) {
52  const uint8_t* data = ExternalOneByteString::cast(subject)->GetChars();
53  address = reinterpret_cast<const byte*>(data);
54  } else {
55  DCHECK(subject->IsSeqOneByteString());
56  const uint8_t* data = SeqOneByteString::cast(subject)->GetChars();
57  address = reinterpret_cast<const byte*>(data);
58  }
59  return address + start_index;
60  }
61  const uc16* data;
62  if (StringShape(subject).IsExternal()) {
63  data = ExternalTwoByteString::cast(subject)->GetChars();
64  } else {
65  DCHECK(subject->IsSeqTwoByteString());
66  data = SeqTwoByteString::cast(subject)->GetChars();
67  }
68  return reinterpret_cast<const byte*>(data + start_index);
69 }
70 
71 
73  Handle<Code> regexp_code,
74  Handle<String> subject,
75  int* offsets_vector,
76  int offsets_vector_length,
77  int previous_index,
78  Isolate* isolate) {
79 
80  DCHECK(subject->IsFlat());
81  DCHECK(previous_index >= 0);
82  DCHECK(previous_index <= subject->length());
83 
84  // No allocations before calling the regexp, but we can't use
85  // DisallowHeapAllocation, since regexps might be preempted, and another
86  // thread might do allocation anyway.
87 
88  String* subject_ptr = *subject;
89  // Character offsets into string.
90  int start_offset = previous_index;
91  int char_length = subject_ptr->length() - start_offset;
92  int slice_offset = 0;
93 
94  // The string has been flattened, so if it is a cons string it contains the
95  // full string in the first part.
96  if (StringShape(subject_ptr).IsCons()) {
97  DCHECK_EQ(0, ConsString::cast(subject_ptr)->second()->length());
98  subject_ptr = ConsString::cast(subject_ptr)->first();
99  } else if (StringShape(subject_ptr).IsSliced()) {
100  SlicedString* slice = SlicedString::cast(subject_ptr);
101  subject_ptr = slice->parent();
102  slice_offset = slice->offset();
103  }
104  // Ensure that an underlying string has the same representation.
105  bool is_one_byte = subject_ptr->IsOneByteRepresentation();
106  DCHECK(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
107  // String is now either Sequential or External
108  int char_size_shift = is_one_byte ? 0 : 1;
109 
110  const byte* input_start =
111  StringCharacterPosition(subject_ptr, start_offset + slice_offset);
112  int byte_length = char_length << char_size_shift;
113  const byte* input_end = input_start + byte_length;
114  Result res = Execute(*regexp_code,
115  *subject,
116  start_offset,
117  input_start,
118  input_end,
119  offsets_vector,
120  offsets_vector_length,
121  isolate);
122  return res;
123 }
124 
125 
127  Code* code,
128  String* input, // This needs to be the unpacked (sliced, cons) string.
129  int start_offset,
130  const byte* input_start,
131  const byte* input_end,
132  int* output,
133  int output_size,
134  Isolate* isolate) {
135  // Ensure that the minimum stack has been allocated.
136  RegExpStackScope stack_scope(isolate);
137  Address stack_base = stack_scope.stack()->stack_base();
138 
139  int direct_call = 0;
140  int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
141  input,
142  start_offset,
143  input_start,
144  input_end,
145  output,
146  output_size,
147  stack_base,
148  direct_call,
149  isolate);
150  DCHECK(result >= RETRY);
151 
152  if (result == EXCEPTION && !isolate->has_pending_exception()) {
153  // We detected a stack overflow (on the backtrack stack) in RegExp code,
154  // but haven't created the exception yet.
155  isolate->StackOverflow();
156  }
157  return static_cast<Result>(result);
158 }
159 
160 
162  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
163  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
164  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
165  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
166 
167  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
168  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
169  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
170  0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
171 
172  0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
173  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
174  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
175  0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
176 
177  0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
178  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
179  0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
180  0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
181  // Latin-1 range
182  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
183  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
184  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
185  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
186 
187  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
188  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
189  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
190  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
191 
192  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
193  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196 
197  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
198  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
200  0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
201 };
202 
203 
205  Address byte_offset1,
206  Address byte_offset2,
207  size_t byte_length,
208  Isolate* isolate) {
211  // This function is not allowed to cause a garbage collection.
212  // A GC might move the calling generated code and invalidate the
213  // return address on the stack.
214  DCHECK(byte_length % 2 == 0);
215  uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
216  uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
217  size_t length = byte_length >> 1;
218 
219  for (size_t i = 0; i < length; i++) {
220  unibrow::uchar c1 = substring1[i];
221  unibrow::uchar c2 = substring2[i];
222  if (c1 != c2) {
223  unibrow::uchar s1[1] = { c1 };
224  canonicalize->get(c1, '\0', s1);
225  if (s1[0] != c2) {
226  unibrow::uchar s2[1] = { c2 };
227  canonicalize->get(c2, '\0', s2);
228  if (s1[0] != s2[0]) {
229  return 0;
230  }
231  }
232  }
233  }
234  return 1;
235 }
236 
237 
239  Address* stack_base,
240  Isolate* isolate) {
241  RegExpStack* regexp_stack = isolate->regexp_stack();
242  size_t size = regexp_stack->stack_capacity();
243  Address old_stack_base = regexp_stack->stack_base();
244  DCHECK(old_stack_base == *stack_base);
245  DCHECK(stack_pointer <= old_stack_base);
246  DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
247  Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
248  if (new_stack_base == NULL) {
249  return NULL;
250  }
251  *stack_base = new_stack_base;
252  intptr_t stack_content_size = old_stack_base - stack_pointer;
253  return new_stack_base - stack_content_size;
254 }
255 
256 #endif // V8_INTERPRETED_REGEXP
257 
258 } } // namespace v8::internal
int get(uchar c, uchar n, uchar *result)
Definition: unicode-inl.h:27
RegExpStack * regexp_stack()
Definition: isolate.h:960
unibrow::Mapping< unibrow::Ecma262Canonicalize > * regexp_macro_assembler_canonicalize()
Definition: isolate.h:956
Object * StackOverflow()
Definition: isolate.cc:773
bool has_pending_exception()
Definition: isolate.h:581
static Address GrowStack(Address stack_pointer, Address *stack_top, Isolate *isolate)
static Result Execute(Code *code, String *input, int start_offset, const byte *input_start, const byte *input_end, int *output, int output_size, Isolate *isolate)
static Result Match(Handle< Code > regexp, Handle< String > subject, int *offsets_vector, int offsets_vector_length, int previous_index, Isolate *isolate)
static const byte * StringCharacterPosition(String *subject, int start_index)
static int CaseInsensitiveCompareUC16(Address byte_offset1, Address byte_offset2, size_t byte_length, Isolate *isolate)
RegExpStack * stack() const
Definition: regexp-stack.h:26
Address EnsureCapacity(size_t size)
Definition: regexp-stack.cc:66
bool IsOneByteRepresentation() const
Definition: objects-inl.h:337
enable harmony numeric enable harmony object literal extensions Optimize object size
enable harmony numeric enable harmony object literal extensions Optimize object Array DOM strings and string trace pretenuring decisions of HAllocate instructions Enables optimizations which favor memory size over execution speed maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining trace the tracking of allocation sites deoptimize every n garbage collections perform array bounds checks elimination analyze liveness of environment slots and zap dead values flushes the cache of optimized code for closures on every GC allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes enable context specialization in TurboFan execution budget before interrupt is triggered max percentage of megamorphic generic ICs to allow optimization enable use of SAHF instruction if enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable use of MLS instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long enable alignment of csp to bytes on platforms which prefer the register to always be NULL
#define DCHECK(condition)
Definition: logging.h:205
#define DCHECK_EQ(v1, v2)
Definition: logging.h:206
unsigned int uchar
Definition: unicode.h:17
const SwVfpRegister s1
const SwVfpRegister s2
byte * Address
Definition: globals.h:101
uint16_t uc16
Definition: globals.h:184
Debugger support for the V8 JavaScript engine.
Definition: accessors.cc:20
#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7, p8)
Definition: simulator-arm.h:36