V8 Project
regexp-macro-assembler-arm64.cc
Go to the documentation of this file.
1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/v8.h"
6 
7 #if V8_TARGET_ARCH_ARM64
8 
9 #include "src/code-stubs.h"
10 #include "src/cpu-profiler.h"
11 #include "src/log.h"
12 #include "src/macro-assembler.h"
14 #include "src/regexp-stack.h"
15 #include "src/unicode.h"
16 
18 
19 namespace v8 {
20 namespace internal {
21 
22 #ifndef V8_INTERPRETED_REGEXP
23 /*
24  * This assembler uses the following register assignment convention:
25  * - w19 : Used to temporarely store a value before a call to C code.
26  * See CheckNotBackReferenceIgnoreCase.
27  * - x20 : Pointer to the current code object (Code*),
28  * it includes the heap object tag.
29  * - w21 : Current position in input, as negative offset from
30  * the end of the string. Please notice that this is
31  * the byte offset, not the character offset!
32  * - w22 : Currently loaded character. Must be loaded using
33  * LoadCurrentCharacter before using any of the dispatch methods.
34  * - x23 : Points to tip of backtrack stack.
35  * - w24 : Position of the first character minus one: non_position_value.
36  * Used to initialize capture registers.
37  * - x25 : Address at the end of the input string: input_end.
38  * Points to byte after last character in input.
39  * - x26 : Address at the start of the input string: input_start.
40  * - w27 : Where to start in the input string.
41  * - x28 : Output array pointer.
42  * - x29/fp : Frame pointer. Used to access arguments, local variables and
43  * RegExp registers.
44  * - x16/x17 : IP registers, used by assembler. Very volatile.
45  * - csp : Points to tip of C stack.
46  *
47  * - x0-x7 : Used as a cache to store 32 bit capture registers. These
48  * registers need to be retained every time a call to C code
49  * is done.
50  *
51  * The remaining registers are free for computations.
52  * Each call to a public method should retain this convention.
53  *
54  * The stack will have the following structure:
55  *
56  * Location Name Description
57  * (as referred to in
58  * the code)
59  *
60  * - fp[104] isolate Address of the current isolate.
61  * - fp[96] return_address Secondary link/return address
62  * used by an exit frame if this is a
63  * native call.
64  * ^^^ csp when called ^^^
65  * - fp[88] lr Return from the RegExp code.
66  * - fp[80] r29 Old frame pointer (CalleeSaved).
67  * - fp[0..72] r19-r28 Backup of CalleeSaved registers.
68  * - fp[-8] direct_call 1 => Direct call from JavaScript code.
69  * 0 => Call through the runtime system.
70  * - fp[-16] stack_base High end of the memory area to use as
71  * the backtracking stack.
72  * - fp[-24] output_size Output may fit multiple sets of matches.
73  * - fp[-32] input Handle containing the input string.
74  * - fp[-40] success_counter
75  * ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^
76  * - fp[-44] register N Capture registers initialized with
77  * - fp[-48] register N + 1 non_position_value.
78  * ... The first kNumCachedRegisters (N) registers
79  * ... are cached in x0 to x7.
80  * ... Only positions must be stored in the first
81  * - ... num_saved_registers_ registers.
82  * - ...
83  * - register N + num_registers - 1
84  * ^^^^^^^^^ csp ^^^^^^^^^
85  *
86  * The first num_saved_registers_ registers are initialized to point to
87  * "character -1" in the string (i.e., char_size() bytes before the first
88  * character of the string). The remaining registers start out as garbage.
89  *
90  * The data up to the return address must be placed there by the calling
91  * code and the remaining arguments are passed in registers, e.g. by calling the
92  * code entry as cast to a function with the signature:
93  * int (*match)(String* input,
94  * int start_offset,
95  * Address input_start,
96  * Address input_end,
97  * int* output,
98  * int output_size,
99  * Address stack_base,
100  * bool direct_call = false,
101  * Address secondary_return_address, // Only used by native call.
102  * Isolate* isolate)
103  * The call is performed by NativeRegExpMacroAssembler::Execute()
104  * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
105  * in arm64/simulator-arm64.h.
106  * When calling as a non-direct call (i.e., from C++ code), the return address
107  * area is overwritten with the LR register by the RegExp code. When doing a
108  * direct call from generated code, the return address is placed there by
109  * the calling code, as in a normal exit frame.
110  */
111 
112 #define __ ACCESS_MASM(masm_)
113 
115  Mode mode,
116  int registers_to_save,
117  Zone* zone)
118  : NativeRegExpMacroAssembler(zone),
119  masm_(new MacroAssembler(zone->isolate(), NULL, kRegExpCodeSize)),
120  mode_(mode),
121  num_registers_(registers_to_save),
122  num_saved_registers_(registers_to_save),
123  entry_label_(),
124  start_label_(),
125  success_label_(),
126  backtrack_label_(),
127  exit_label_() {
128  __ SetStackPointer(csp);
129  DCHECK_EQ(0, registers_to_save % 2);
130  // We can cache at most 16 W registers in x0-x7.
131  STATIC_ASSERT(kNumCachedRegisters <= 16);
132  STATIC_ASSERT((kNumCachedRegisters % 2) == 0);
133  __ B(&entry_label_); // We'll write the entry code later.
134  __ Bind(&start_label_); // And then continue from here.
135 }
136 
137 
138 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() {
139  delete masm_;
140  // Unuse labels in case we throw away the assembler without calling GetCode.
141  entry_label_.Unuse();
142  start_label_.Unuse();
143  success_label_.Unuse();
144  backtrack_label_.Unuse();
145  exit_label_.Unuse();
146  check_preempt_label_.Unuse();
147  stack_overflow_label_.Unuse();
148 }
149 
150 int RegExpMacroAssemblerARM64::stack_limit_slack() {
151  return RegExpStack::kStackLimitSlack;
152 }
153 
154 
155 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) {
156  if (by != 0) {
157  __ Add(current_input_offset(),
158  current_input_offset(), by * char_size());
159  }
160 }
161 
162 
163 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) {
164  DCHECK((reg >= 0) && (reg < num_registers_));
165  if (by != 0) {
166  Register to_advance;
167  RegisterState register_state = GetRegisterState(reg);
168  switch (register_state) {
169  case STACKED:
170  __ Ldr(w10, register_location(reg));
171  __ Add(w10, w10, by);
172  __ Str(w10, register_location(reg));
173  break;
174  case CACHED_LSW:
175  to_advance = GetCachedRegister(reg);
176  __ Add(to_advance, to_advance, by);
177  break;
178  case CACHED_MSW:
179  to_advance = GetCachedRegister(reg);
180  __ Add(to_advance, to_advance,
181  static_cast<int64_t>(by) << kWRegSizeInBits);
182  break;
183  default:
184  UNREACHABLE();
185  break;
186  }
187  }
188 }
189 
190 
191 void RegExpMacroAssemblerARM64::Backtrack() {
192  CheckPreemption();
193  Pop(w10);
194  __ Add(x10, code_pointer(), Operand(w10, UXTW));
195  __ Br(x10);
196 }
197 
198 
199 void RegExpMacroAssemblerARM64::Bind(Label* label) {
200  __ Bind(label);
201 }
202 
203 
204 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) {
205  CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal);
206 }
207 
208 
209 void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit,
210  Label* on_greater) {
211  CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater);
212 }
213 
214 
215 void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) {
216  Label not_at_start;
217  // Did we start the match at the start of the input string?
218  CompareAndBranchOrBacktrack(start_offset(), 0, ne, &not_at_start);
219  // If we did, are we still at the start of the input string?
220  __ Add(x10, input_end(), Operand(current_input_offset(), SXTW));
221  __ Cmp(x10, input_start());
222  BranchOrBacktrack(eq, on_at_start);
223  __ Bind(&not_at_start);
224 }
225 
226 
227 void RegExpMacroAssemblerARM64::CheckNotAtStart(Label* on_not_at_start) {
228  // Did we start the match at the start of the input string?
229  CompareAndBranchOrBacktrack(start_offset(), 0, ne, on_not_at_start);
230  // If we did, are we still at the start of the input string?
231  __ Add(x10, input_end(), Operand(current_input_offset(), SXTW));
232  __ Cmp(x10, input_start());
233  BranchOrBacktrack(ne, on_not_at_start);
234 }
235 
236 
237 void RegExpMacroAssemblerARM64::CheckCharacterLT(uc16 limit, Label* on_less) {
238  CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less);
239 }
240 
241 
242 void RegExpMacroAssemblerARM64::CheckCharacters(Vector<const uc16> str,
243  int cp_offset,
244  Label* on_failure,
245  bool check_end_of_string) {
246  // This method is only ever called from the cctests.
247 
248  if (check_end_of_string) {
249  // Is last character of required match inside string.
250  CheckPosition(cp_offset + str.length() - 1, on_failure);
251  }
252 
253  Register characters_address = x11;
254 
255  __ Add(characters_address,
256  input_end(),
257  Operand(current_input_offset(), SXTW));
258  if (cp_offset != 0) {
259  __ Add(characters_address, characters_address, cp_offset * char_size());
260  }
261 
262  for (int i = 0; i < str.length(); i++) {
263  if (mode_ == LATIN1) {
264  __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex));
265  DCHECK(str[i] <= String::kMaxOneByteCharCode);
266  } else {
267  __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex));
268  }
269  CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure);
270  }
271 }
272 
273 
274 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
275  __ Ldr(w10, MemOperand(backtrack_stackpointer()));
276  __ Cmp(current_input_offset(), w10);
277  __ Cset(x11, eq);
278  __ Add(backtrack_stackpointer(),
279  backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2));
280  BranchOrBacktrack(eq, on_equal);
281 }
282 
283 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
284  int start_reg,
285  Label* on_no_match) {
286  Label fallthrough;
287 
288  Register capture_start_offset = w10;
289  // Save the capture length in a callee-saved register so it will
290  // be preserved if we call a C helper.
291  Register capture_length = w19;
292  DCHECK(kCalleeSaved.IncludesAliasOf(capture_length));
293 
294  // Find length of back-referenced capture.
295  DCHECK((start_reg % 2) == 0);
296  if (start_reg < kNumCachedRegisters) {
297  __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg));
298  __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
299  } else {
300  __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10));
301  }
302  __ Sub(capture_length, w11, capture_start_offset); // Length to check.
303  // Succeed on empty capture (including no capture).
304  __ Cbz(capture_length, &fallthrough);
305 
306  // Check that there are enough characters left in the input.
307  __ Cmn(capture_length, current_input_offset());
308  BranchOrBacktrack(gt, on_no_match);
309 
310  if (mode_ == LATIN1) {
311  Label success;
312  Label fail;
313  Label loop_check;
314 
315  Register capture_start_address = x12;
316  Register capture_end_addresss = x13;
317  Register current_position_address = x14;
318 
319  __ Add(capture_start_address,
320  input_end(),
321  Operand(capture_start_offset, SXTW));
322  __ Add(capture_end_addresss,
323  capture_start_address,
324  Operand(capture_length, SXTW));
325  __ Add(current_position_address,
326  input_end(),
327  Operand(current_input_offset(), SXTW));
328 
329  Label loop;
330  __ Bind(&loop);
331  __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
332  __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
333  __ Cmp(w10, w11);
334  __ B(eq, &loop_check);
335 
336  // Mismatch, try case-insensitive match (converting letters to lower-case).
337  __ Orr(w10, w10, 0x20); // Convert capture character to lower-case.
338  __ Orr(w11, w11, 0x20); // Also convert input character.
339  __ Cmp(w11, w10);
340  __ B(ne, &fail);
341  __ Sub(w10, w10, 'a');
342  __ Cmp(w10, 'z' - 'a'); // Is w10 a lowercase letter?
343  __ B(ls, &loop_check); // In range 'a'-'z'.
344  // Latin-1: Check for values in range [224,254] but not 247.
345  __ Sub(w10, w10, 224 - 'a');
346  __ Cmp(w10, 254 - 224);
347  __ Ccmp(w10, 247 - 224, ZFlag, ls); // Check for 247.
348  __ B(eq, &fail); // Weren't Latin-1 letters.
349 
350  __ Bind(&loop_check);
351  __ Cmp(capture_start_address, capture_end_addresss);
352  __ B(lt, &loop);
353  __ B(&success);
354 
355  __ Bind(&fail);
356  BranchOrBacktrack(al, on_no_match);
357 
358  __ Bind(&success);
359  // Compute new value of character position after the matched part.
360  __ Sub(current_input_offset().X(), current_position_address, input_end());
361  if (masm_->emit_debug_code()) {
362  __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
363  __ Ccmp(current_input_offset(), 0, NoFlag, eq);
364  // The current input offset should be <= 0, and fit in a W register.
365  __ Check(le, kOffsetOutOfRange);
366  }
367  } else {
368  DCHECK(mode_ == UC16);
369  int argument_count = 4;
370 
371  // The cached registers need to be retained.
372  CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
373  DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters);
374  __ PushCPURegList(cached_registers);
375 
376  // Put arguments into arguments registers.
377  // Parameters are
378  // x0: Address byte_offset1 - Address captured substring's start.
379  // x1: Address byte_offset2 - Address of current character position.
380  // w2: size_t byte_length - length of capture in bytes(!)
381  // x3: Isolate* isolate
382 
383  // Address of start of capture.
384  __ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
385  // Length of capture.
386  __ Mov(w2, capture_length);
387  // Address of current input position.
388  __ Add(x1, input_end(), Operand(current_input_offset(), SXTW));
389  // Isolate.
390  __ Mov(x3, ExternalReference::isolate_address(isolate()));
391 
392  {
393  AllowExternalCallThatCantCauseGC scope(masm_);
394  ExternalReference function =
395  ExternalReference::re_case_insensitive_compare_uc16(isolate());
396  __ CallCFunction(function, argument_count);
397  }
398 
399  // Check if function returned non-zero for success or zero for failure.
400  // x0 is one of the registers used as a cache so it must be tested before
401  // the cache is restored.
402  __ Cmp(x0, 0);
403  __ PopCPURegList(cached_registers);
404  BranchOrBacktrack(eq, on_no_match);
405 
406  // On success, increment position by length of capture.
407  __ Add(current_input_offset(), current_input_offset(), capture_length);
408  }
409 
410  __ Bind(&fallthrough);
411 }
412 
413 void RegExpMacroAssemblerARM64::CheckNotBackReference(
414  int start_reg,
415  Label* on_no_match) {
416  Label fallthrough;
417 
418  Register capture_start_address = x12;
419  Register capture_end_address = x13;
420  Register current_position_address = x14;
421  Register capture_length = w15;
422 
423  // Find length of back-referenced capture.
424  DCHECK((start_reg % 2) == 0);
425  if (start_reg < kNumCachedRegisters) {
426  __ Mov(x10, GetCachedRegister(start_reg));
427  __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
428  } else {
429  __ Ldp(w11, w10, capture_location(start_reg, x10));
430  }
431  __ Sub(capture_length, w11, w10); // Length to check.
432  // Succeed on empty capture (including no capture).
433  __ Cbz(capture_length, &fallthrough);
434 
435  // Check that there are enough characters left in the input.
436  __ Cmn(capture_length, current_input_offset());
437  BranchOrBacktrack(gt, on_no_match);
438 
439  // Compute pointers to match string and capture string
440  __ Add(capture_start_address, input_end(), Operand(w10, SXTW));
441  __ Add(capture_end_address,
442  capture_start_address,
443  Operand(capture_length, SXTW));
444  __ Add(current_position_address,
445  input_end(),
446  Operand(current_input_offset(), SXTW));
447 
448  Label loop;
449  __ Bind(&loop);
450  if (mode_ == LATIN1) {
451  __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
452  __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
453  } else {
454  DCHECK(mode_ == UC16);
455  __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex));
456  __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex));
457  }
458  __ Cmp(w10, w11);
459  BranchOrBacktrack(ne, on_no_match);
460  __ Cmp(capture_start_address, capture_end_address);
461  __ B(lt, &loop);
462 
463  // Move current character position to position after match.
464  __ Sub(current_input_offset().X(), current_position_address, input_end());
465  if (masm_->emit_debug_code()) {
466  __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
467  __ Ccmp(current_input_offset(), 0, NoFlag, eq);
468  // The current input offset should be <= 0, and fit in a W register.
469  __ Check(le, kOffsetOutOfRange);
470  }
471  __ Bind(&fallthrough);
472 }
473 
474 
475 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c,
476  Label* on_not_equal) {
477  CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal);
478 }
479 
480 
481 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c,
482  uint32_t mask,
483  Label* on_equal) {
484  __ And(w10, current_character(), mask);
485  CompareAndBranchOrBacktrack(w10, c, eq, on_equal);
486 }
487 
488 
489 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c,
490  unsigned mask,
491  Label* on_not_equal) {
492  __ And(w10, current_character(), mask);
493  CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
494 }
495 
496 
497 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd(
498  uc16 c,
499  uc16 minus,
500  uc16 mask,
501  Label* on_not_equal) {
502  DCHECK(minus < String::kMaxUtf16CodeUnit);
503  __ Sub(w10, current_character(), minus);
504  __ And(w10, w10, mask);
505  CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
506 }
507 
508 
509 void RegExpMacroAssemblerARM64::CheckCharacterInRange(
510  uc16 from,
511  uc16 to,
512  Label* on_in_range) {
513  __ Sub(w10, current_character(), from);
514  // Unsigned lower-or-same condition.
515  CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range);
516 }
517 
518 
519 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange(
520  uc16 from,
521  uc16 to,
522  Label* on_not_in_range) {
523  __ Sub(w10, current_character(), from);
524  // Unsigned higher condition.
525  CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range);
526 }
527 
528 
529 void RegExpMacroAssemblerARM64::CheckBitInTable(
530  Handle<ByteArray> table,
531  Label* on_bit_set) {
532  __ Mov(x11, Operand(table));
533  if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) {
534  __ And(w10, current_character(), kTableMask);
535  __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag);
536  } else {
537  __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag);
538  }
539  __ Ldrb(w11, MemOperand(x11, w10, UXTW));
540  CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set);
541 }
542 
543 
544 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type,
545  Label* on_no_match) {
546  // Range checks (c in min..max) are generally implemented by an unsigned
547  // (c - min) <= (max - min) check
548  switch (type) {
549  case 's':
550  // Match space-characters
551  if (mode_ == LATIN1) {
552  // One byte space characters are '\t'..'\r', ' ' and \u00a0.
553  Label success;
554  // Check for ' ' or 0x00a0.
555  __ Cmp(current_character(), ' ');
556  __ Ccmp(current_character(), 0x00a0, ZFlag, ne);
557  __ B(eq, &success);
558  // Check range 0x09..0x0d.
559  __ Sub(w10, current_character(), '\t');
560  CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
561  __ Bind(&success);
562  return true;
563  }
564  return false;
565  case 'S':
566  // The emitted code for generic character classes is good enough.
567  return false;
568  case 'd':
569  // Match ASCII digits ('0'..'9').
570  __ Sub(w10, current_character(), '0');
571  CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
572  return true;
573  case 'D':
574  // Match ASCII non-digits.
575  __ Sub(w10, current_character(), '0');
576  CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
577  return true;
578  case '.': {
579  // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
580  // Here we emit the conditional branch only once at the end to make branch
581  // prediction more efficient, even though we could branch out of here
582  // as soon as a character matches.
583  __ Cmp(current_character(), 0x0a);
584  __ Ccmp(current_character(), 0x0d, ZFlag, ne);
585  if (mode_ == UC16) {
586  __ Sub(w10, current_character(), 0x2028);
587  // If the Z flag was set we clear the flags to force a branch.
588  __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
589  // ls -> !((C==1) && (Z==0))
590  BranchOrBacktrack(ls, on_no_match);
591  } else {
592  BranchOrBacktrack(eq, on_no_match);
593  }
594  return true;
595  }
596  case 'n': {
597  // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
598  // We have to check all 4 newline characters before emitting
599  // the conditional branch.
600  __ Cmp(current_character(), 0x0a);
601  __ Ccmp(current_character(), 0x0d, ZFlag, ne);
602  if (mode_ == UC16) {
603  __ Sub(w10, current_character(), 0x2028);
604  // If the Z flag was set we clear the flags to force a fall-through.
605  __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
606  // hi -> (C==1) && (Z==0)
607  BranchOrBacktrack(hi, on_no_match);
608  } else {
609  BranchOrBacktrack(ne, on_no_match);
610  }
611  return true;
612  }
613  case 'w': {
614  if (mode_ != LATIN1) {
615  // Table is 256 entries, so all Latin1 characters can be tested.
616  CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
617  }
618  ExternalReference map = ExternalReference::re_word_character_map();
619  __ Mov(x10, map);
620  __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
621  CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
622  return true;
623  }
624  case 'W': {
625  Label done;
626  if (mode_ != LATIN1) {
627  // Table is 256 entries, so all Latin1 characters can be tested.
628  __ Cmp(current_character(), 'z');
629  __ B(hi, &done);
630  }
631  ExternalReference map = ExternalReference::re_word_character_map();
632  __ Mov(x10, map);
633  __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
634  CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
635  __ Bind(&done);
636  return true;
637  }
638  case '*':
639  // Match any character.
640  return true;
641  // No custom implementation (yet): s(UC16), S(UC16).
642  default:
643  return false;
644  }
645 }
646 
647 
648 void RegExpMacroAssemblerARM64::Fail() {
649  __ Mov(w0, FAILURE);
650  __ B(&exit_label_);
651 }
652 
653 
654 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
655  Label return_w0;
656  // Finalize code - write the entry point code now we know how many
657  // registers we need.
658 
659  // Entry code:
660  __ Bind(&entry_label_);
661 
662  // Arguments on entry:
663  // x0: String* input
664  // x1: int start_offset
665  // x2: byte* input_start
666  // x3: byte* input_end
667  // x4: int* output array
668  // x5: int output array size
669  // x6: Address stack_base
670  // x7: int direct_call
671 
672  // The stack pointer should be csp on entry.
673  // csp[8]: address of the current isolate
674  // csp[0]: secondary link/return address used by native call
675 
676  // Tell the system that we have a stack frame. Because the type is MANUAL, no
677  // code is generated.
678  FrameScope scope(masm_, StackFrame::MANUAL);
679 
680  // Push registers on the stack, only push the argument registers that we need.
681  CPURegList argument_registers(x0, x5, x6, x7);
682 
683  CPURegList registers_to_retain = kCalleeSaved;
684  DCHECK(kCalleeSaved.Count() == 11);
685  registers_to_retain.Combine(lr);
686 
687  DCHECK(csp.Is(__ StackPointer()));
688  __ PushCPURegList(registers_to_retain);
689  __ PushCPURegList(argument_registers);
690 
691  // Set frame pointer in place.
692  __ Add(frame_pointer(), csp, argument_registers.Count() * kPointerSize);
693 
694  // Initialize callee-saved registers.
695  __ Mov(start_offset(), w1);
696  __ Mov(input_start(), x2);
697  __ Mov(input_end(), x3);
698  __ Mov(output_array(), x4);
699 
700  // Set the number of registers we will need to allocate, that is:
701  // - success_counter (X register)
702  // - (num_registers_ - kNumCachedRegisters) (W registers)
703  int num_wreg_to_allocate = num_registers_ - kNumCachedRegisters;
704  // Do not allocate registers on the stack if they can all be cached.
705  if (num_wreg_to_allocate < 0) { num_wreg_to_allocate = 0; }
706  // Make room for the success_counter.
707  num_wreg_to_allocate += 2;
708 
709  // Make sure the stack alignment will be respected.
710  int alignment = masm_->ActivationFrameAlignment();
711  DCHECK_EQ(alignment % 16, 0);
712  int align_mask = (alignment / kWRegSize) - 1;
713  num_wreg_to_allocate = (num_wreg_to_allocate + align_mask) & ~align_mask;
714 
715  // Check if we have space on the stack.
716  Label stack_limit_hit;
717  Label stack_ok;
718 
719  ExternalReference stack_limit =
720  ExternalReference::address_of_stack_limit(isolate());
721  __ Mov(x10, stack_limit);
722  __ Ldr(x10, MemOperand(x10));
723  __ Subs(x10, csp, x10);
724 
725  // Handle it if the stack pointer is already below the stack limit.
726  __ B(ls, &stack_limit_hit);
727 
728  // Check if there is room for the variable number of registers above
729  // the stack limit.
730  __ Cmp(x10, num_wreg_to_allocate * kWRegSize);
731  __ B(hs, &stack_ok);
732 
733  // Exit with OutOfMemory exception. There is not enough space on the stack
734  // for our working registers.
735  __ Mov(w0, EXCEPTION);
736  __ B(&return_w0);
737 
738  __ Bind(&stack_limit_hit);
739  CallCheckStackGuardState(x10);
740  // If returned value is non-zero, we exit with the returned value as result.
741  __ Cbnz(w0, &return_w0);
742 
743  __ Bind(&stack_ok);
744 
745  // Allocate space on stack.
746  __ Claim(num_wreg_to_allocate, kWRegSize);
747 
748  // Initialize success_counter with 0.
749  __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter));
750 
751  // Find negative length (offset of start relative to end).
752  __ Sub(x10, input_start(), input_end());
753  if (masm_->emit_debug_code()) {
754  // Check that the input string length is < 2^30.
755  __ Neg(x11, x10);
756  __ Cmp(x11, (1<<30) - 1);
757  __ Check(ls, kInputStringTooLong);
758  }
759  __ Mov(current_input_offset(), w10);
760 
761  // The non-position value is used as a clearing value for the
762  // capture registers, it corresponds to the position of the first character
763  // minus one.
764  __ Sub(non_position_value(), current_input_offset(), char_size());
765  __ Sub(non_position_value(), non_position_value(),
766  Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0));
767  // We can store this value twice in an X register for initializing
768  // on-stack registers later.
769  __ Orr(twice_non_position_value(),
770  non_position_value().X(),
771  Operand(non_position_value().X(), LSL, kWRegSizeInBits));
772 
773  // Initialize code pointer register.
774  __ Mov(code_pointer(), Operand(masm_->CodeObject()));
775 
776  Label load_char_start_regexp, start_regexp;
777  // Load newline if index is at start, previous character otherwise.
778  __ Cbnz(start_offset(), &load_char_start_regexp);
779  __ Mov(current_character(), '\n');
780  __ B(&start_regexp);
781 
782  // Global regexp restarts matching here.
783  __ Bind(&load_char_start_regexp);
784  // Load previous char as initial value of current character register.
785  LoadCurrentCharacterUnchecked(-1, 1);
786  __ Bind(&start_regexp);
787  // Initialize on-stack registers.
788  if (num_saved_registers_ > 0) {
789  ClearRegisters(0, num_saved_registers_ - 1);
790  }
791 
792  // Initialize backtrack stack pointer.
793  __ Ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackBase));
794 
795  // Execute
796  __ B(&start_label_);
797 
798  if (backtrack_label_.is_linked()) {
799  __ Bind(&backtrack_label_);
800  Backtrack();
801  }
802 
803  if (success_label_.is_linked()) {
804  Register first_capture_start = w15;
805 
806  // Save captures when successful.
807  __ Bind(&success_label_);
808 
809  if (num_saved_registers_ > 0) {
810  // V8 expects the output to be an int32_t array.
811  Register capture_start = w12;
812  Register capture_end = w13;
813  Register input_length = w14;
814 
815  // Copy captures to output.
816 
817  // Get string length.
818  __ Sub(x10, input_end(), input_start());
819  if (masm_->emit_debug_code()) {
820  // Check that the input string length is < 2^30.
821  __ Cmp(x10, (1<<30) - 1);
822  __ Check(ls, kInputStringTooLong);
823  }
824  // input_start has a start_offset offset on entry. We need to include
825  // it when computing the length of the whole string.
826  if (mode_ == UC16) {
827  __ Add(input_length, start_offset(), Operand(w10, LSR, 1));
828  } else {
829  __ Add(input_length, start_offset(), w10);
830  }
831 
832  // Copy the results to the output array from the cached registers first.
833  for (int i = 0;
834  (i < num_saved_registers_) && (i < kNumCachedRegisters);
835  i += 2) {
836  __ Mov(capture_start.X(), GetCachedRegister(i));
837  __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits);
838  if ((i == 0) && global_with_zero_length_check()) {
839  // Keep capture start for the zero-length check later.
840  __ Mov(first_capture_start, capture_start);
841  }
842  // Offsets need to be relative to the start of the string.
843  if (mode_ == UC16) {
844  __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
845  __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
846  } else {
847  __ Add(capture_start, input_length, capture_start);
848  __ Add(capture_end, input_length, capture_end);
849  }
850  // The output pointer advances for a possible global match.
851  __ Stp(capture_start,
852  capture_end,
853  MemOperand(output_array(), kPointerSize, PostIndex));
854  }
855 
856  // Only carry on if there are more than kNumCachedRegisters capture
857  // registers.
858  int num_registers_left_on_stack =
859  num_saved_registers_ - kNumCachedRegisters;
860  if (num_registers_left_on_stack > 0) {
861  Register base = x10;
862  // There are always an even number of capture registers. A couple of
863  // registers determine one match with two offsets.
864  DCHECK_EQ(0, num_registers_left_on_stack % 2);
865  __ Add(base, frame_pointer(), kFirstCaptureOnStack);
866 
867  // We can unroll the loop here, we should not unroll for less than 2
868  // registers.
869  STATIC_ASSERT(kNumRegistersToUnroll > 2);
870  if (num_registers_left_on_stack <= kNumRegistersToUnroll) {
871  for (int i = 0; i < num_registers_left_on_stack / 2; i++) {
872  __ Ldp(capture_end,
873  capture_start,
875  if ((i == 0) && global_with_zero_length_check()) {
876  // Keep capture start for the zero-length check later.
877  __ Mov(first_capture_start, capture_start);
878  }
879  // Offsets need to be relative to the start of the string.
880  if (mode_ == UC16) {
881  __ Add(capture_start,
882  input_length,
883  Operand(capture_start, ASR, 1));
884  __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
885  } else {
886  __ Add(capture_start, input_length, capture_start);
887  __ Add(capture_end, input_length, capture_end);
888  }
889  // The output pointer advances for a possible global match.
890  __ Stp(capture_start,
891  capture_end,
892  MemOperand(output_array(), kPointerSize, PostIndex));
893  }
894  } else {
895  Label loop, start;
896  __ Mov(x11, num_registers_left_on_stack);
897 
898  __ Ldp(capture_end,
899  capture_start,
901  if (global_with_zero_length_check()) {
902  __ Mov(first_capture_start, capture_start);
903  }
904  __ B(&start);
905 
906  __ Bind(&loop);
907  __ Ldp(capture_end,
908  capture_start,
910  __ Bind(&start);
911  if (mode_ == UC16) {
912  __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
913  __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
914  } else {
915  __ Add(capture_start, input_length, capture_start);
916  __ Add(capture_end, input_length, capture_end);
917  }
918  // The output pointer advances for a possible global match.
919  __ Stp(capture_start,
920  capture_end,
921  MemOperand(output_array(), kPointerSize, PostIndex));
922  __ Sub(x11, x11, 2);
923  __ Cbnz(x11, &loop);
924  }
925  }
926  }
927 
928  if (global()) {
929  Register success_counter = w0;
930  Register output_size = x10;
931  // Restart matching if the regular expression is flagged as global.
932 
933  // Increment success counter.
934  __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
935  __ Add(success_counter, success_counter, 1);
936  __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
937 
938  // Capture results have been stored, so the number of remaining global
939  // output registers is reduced by the number of stored captures.
940  __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize));
941  __ Sub(output_size, output_size, num_saved_registers_);
942  // Check whether we have enough room for another set of capture results.
943  __ Cmp(output_size, num_saved_registers_);
944  __ B(lt, &return_w0);
945 
946  // The output pointer is already set to the next field in the output
947  // array.
948  // Update output size on the frame before we restart matching.
949  __ Str(output_size, MemOperand(frame_pointer(), kOutputSize));
950 
951  if (global_with_zero_length_check()) {
952  // Special case for zero-length matches.
953  __ Cmp(current_input_offset(), first_capture_start);
954  // Not a zero-length match, restart.
955  __ B(ne, &load_char_start_regexp);
956  // Offset from the end is zero if we already reached the end.
957  __ Cbz(current_input_offset(), &return_w0);
958  // Advance current position after a zero-length match.
959  __ Add(current_input_offset(),
960  current_input_offset(),
961  Operand((mode_ == UC16) ? 2 : 1));
962  }
963 
964  __ B(&load_char_start_regexp);
965  } else {
966  __ Mov(w0, SUCCESS);
967  }
968  }
969 
970  if (exit_label_.is_linked()) {
971  // Exit and return w0
972  __ Bind(&exit_label_);
973  if (global()) {
974  __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter));
975  }
976  }
977 
978  __ Bind(&return_w0);
979 
980  // Set stack pointer back to first register to retain
981  DCHECK(csp.Is(__ StackPointer()));
982  __ Mov(csp, fp);
983  __ AssertStackConsistency();
984 
985  // Restore registers.
986  __ PopCPURegList(registers_to_retain);
987 
988  __ Ret();
989 
990  Label exit_with_exception;
991  // Registers x0 to x7 are used to store the first captures, they need to be
992  // retained over calls to C++ code.
993  CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
994  DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters);
995 
996  if (check_preempt_label_.is_linked()) {
997  __ Bind(&check_preempt_label_);
998  SaveLinkRegister();
999  // The cached registers need to be retained.
1000  __ PushCPURegList(cached_registers);
1001  CallCheckStackGuardState(x10);
1002  // Returning from the regexp code restores the stack (csp <- fp)
1003  // so we don't need to drop the link register from it before exiting.
1004  __ Cbnz(w0, &return_w0);
1005  // Reset the cached registers.
1006  __ PopCPURegList(cached_registers);
1007  RestoreLinkRegister();
1008  __ Ret();
1009  }
1010 
1011  if (stack_overflow_label_.is_linked()) {
1012  __ Bind(&stack_overflow_label_);
1013  SaveLinkRegister();
1014  // The cached registers need to be retained.
1015  __ PushCPURegList(cached_registers);
1016  // Call GrowStack(backtrack_stackpointer(), &stack_base)
1017  __ Mov(x2, ExternalReference::isolate_address(isolate()));
1018  __ Add(x1, frame_pointer(), kStackBase);
1019  __ Mov(x0, backtrack_stackpointer());
1020  ExternalReference grow_stack =
1021  ExternalReference::re_grow_stack(isolate());
1022  __ CallCFunction(grow_stack, 3);
1023  // If return NULL, we have failed to grow the stack, and
1024  // must exit with a stack-overflow exception.
1025  // Returning from the regexp code restores the stack (csp <- fp)
1026  // so we don't need to drop the link register from it before exiting.
1027  __ Cbz(w0, &exit_with_exception);
1028  // Otherwise use return value as new stack pointer.
1029  __ Mov(backtrack_stackpointer(), x0);
1030  // Reset the cached registers.
1031  __ PopCPURegList(cached_registers);
1032  RestoreLinkRegister();
1033  __ Ret();
1034  }
1035 
1036  if (exit_with_exception.is_linked()) {
1037  __ Bind(&exit_with_exception);
1038  __ Mov(w0, EXCEPTION);
1039  __ B(&return_w0);
1040  }
1041 
1042  CodeDesc code_desc;
1043  masm_->GetCode(&code_desc);
1044  Handle<Code> code = isolate()->factory()->NewCode(
1045  code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject());
1046  PROFILE(masm_->isolate(), RegExpCodeCreateEvent(*code, *source));
1047  return Handle<HeapObject>::cast(code);
1048 }
1049 
1050 
1051 void RegExpMacroAssemblerARM64::GoTo(Label* to) {
1052  BranchOrBacktrack(al, to);
1053 }
1054 
1055 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand,
1056  Label* if_ge) {
1057  Register to_compare = GetRegister(reg, w10);
1058  CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge);
1059 }
1060 
1061 
1062 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand,
1063  Label* if_lt) {
1064  Register to_compare = GetRegister(reg, w10);
1065  CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt);
1066 }
1067 
1068 
1069 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) {
1070  Register to_compare = GetRegister(reg, w10);
1071  __ Cmp(to_compare, current_input_offset());
1072  BranchOrBacktrack(eq, if_eq);
1073 }
1074 
1075 RegExpMacroAssembler::IrregexpImplementation
1076  RegExpMacroAssemblerARM64::Implementation() {
1077  return kARM64Implementation;
1078 }
1079 
1080 
1081 void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset,
1082  Label* on_end_of_input,
1083  bool check_bounds,
1084  int characters) {
1085  // TODO(pielan): Make sure long strings are caught before this, and not
1086  // just asserted in debug mode.
1087  DCHECK(cp_offset >= -1); // ^ and \b can look behind one character.
1088  // Be sane! (And ensure that an int32_t can be used to index the string)
1089  DCHECK(cp_offset < (1<<30));
1090  if (check_bounds) {
1091  CheckPosition(cp_offset + characters - 1, on_end_of_input);
1092  }
1093  LoadCurrentCharacterUnchecked(cp_offset, characters);
1094 }
1095 
1096 
1097 void RegExpMacroAssemblerARM64::PopCurrentPosition() {
1098  Pop(current_input_offset());
1099 }
1100 
1101 
1102 void RegExpMacroAssemblerARM64::PopRegister(int register_index) {
1103  Pop(w10);
1104  StoreRegister(register_index, w10);
1105 }
1106 
1107 
1108 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) {
1109  if (label->is_bound()) {
1110  int target = label->pos();
1111  __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag);
1112  } else {
1113  __ Adr(x10, label, MacroAssembler::kAdrFar);
1114  __ Sub(x10, x10, code_pointer());
1115  if (masm_->emit_debug_code()) {
1116  __ Cmp(x10, kWRegMask);
1117  // The code offset has to fit in a W register.
1118  __ Check(ls, kOffsetOutOfRange);
1119  }
1120  }
1121  Push(w10);
1122  CheckStackLimit();
1123 }
1124 
1125 
1126 void RegExpMacroAssemblerARM64::PushCurrentPosition() {
1127  Push(current_input_offset());
1128 }
1129 
1130 
1131 void RegExpMacroAssemblerARM64::PushRegister(int register_index,
1132  StackCheckFlag check_stack_limit) {
1133  Register to_push = GetRegister(register_index, w10);
1134  Push(to_push);
1135  if (check_stack_limit) CheckStackLimit();
1136 }
1137 
1138 
1139 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) {
1140  Register cached_register;
1141  RegisterState register_state = GetRegisterState(reg);
1142  switch (register_state) {
1143  case STACKED:
1144  __ Ldr(current_input_offset(), register_location(reg));
1145  break;
1146  case CACHED_LSW:
1147  cached_register = GetCachedRegister(reg);
1148  __ Mov(current_input_offset(), cached_register.W());
1149  break;
1150  case CACHED_MSW:
1151  cached_register = GetCachedRegister(reg);
1152  __ Lsr(current_input_offset().X(), cached_register, kWRegSizeInBits);
1153  break;
1154  default:
1155  UNREACHABLE();
1156  break;
1157  }
1158 }
1159 
1160 
1161 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) {
1162  Register read_from = GetRegister(reg, w10);
1163  __ Ldr(x11, MemOperand(frame_pointer(), kStackBase));
1164  __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW));
1165 }
1166 
1167 
1168 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) {
1169  Label after_position;
1170  __ Cmp(current_input_offset(), -by * char_size());
1171  __ B(ge, &after_position);
1172  __ Mov(current_input_offset(), -by * char_size());
1173  // On RegExp code entry (where this operation is used), the character before
1174  // the current position is expected to be already loaded.
1175  // We have advanced the position, so it's safe to read backwards.
1176  LoadCurrentCharacterUnchecked(-1, 1);
1177  __ Bind(&after_position);
1178 }
1179 
1180 
1181 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) {
1182  DCHECK(register_index >= num_saved_registers_); // Reserved for positions!
1183  Register set_to = wzr;
1184  if (to != 0) {
1185  set_to = w10;
1186  __ Mov(set_to, to);
1187  }
1188  StoreRegister(register_index, set_to);
1189 }
1190 
1191 
1192 bool RegExpMacroAssemblerARM64::Succeed() {
1193  __ B(&success_label_);
1194  return global();
1195 }
1196 
1197 
1198 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg,
1199  int cp_offset) {
1200  Register position = current_input_offset();
1201  if (cp_offset != 0) {
1202  position = w10;
1203  __ Add(position, current_input_offset(), cp_offset * char_size());
1204  }
1205  StoreRegister(reg, position);
1206 }
1207 
1208 
1209 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
1210  DCHECK(reg_from <= reg_to);
1211  int num_registers = reg_to - reg_from + 1;
1212 
1213  // If the first capture register is cached in a hardware register but not
1214  // aligned on a 64-bit one, we need to clear the first one specifically.
1215  if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) {
1216  StoreRegister(reg_from, non_position_value());
1217  num_registers--;
1218  reg_from++;
1219  }
1220 
1221  // Clear cached registers in pairs as far as possible.
1222  while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) {
1223  DCHECK(GetRegisterState(reg_from) == CACHED_LSW);
1224  __ Mov(GetCachedRegister(reg_from), twice_non_position_value());
1225  reg_from += 2;
1226  num_registers -= 2;
1227  }
1228 
1229  if ((num_registers % 2) == 1) {
1230  StoreRegister(reg_from, non_position_value());
1231  num_registers--;
1232  reg_from++;
1233  }
1234 
1235  if (num_registers > 0) {
1236  // If there are some remaining registers, they are stored on the stack.
1237  DCHECK(reg_from >= kNumCachedRegisters);
1238 
1239  // Move down the indexes of the registers on stack to get the correct offset
1240  // in memory.
1241  reg_from -= kNumCachedRegisters;
1242  reg_to -= kNumCachedRegisters;
1243  // We should not unroll the loop for less than 2 registers.
1244  STATIC_ASSERT(kNumRegistersToUnroll > 2);
1245  // We position the base pointer to (reg_from + 1).
1246  int base_offset = kFirstRegisterOnStack -
1247  kWRegSize - (kWRegSize * reg_from);
1248  if (num_registers > kNumRegistersToUnroll) {
1249  Register base = x10;
1250  __ Add(base, frame_pointer(), base_offset);
1251 
1252  Label loop;
1253  __ Mov(x11, num_registers);
1254  __ Bind(&loop);
1255  __ Str(twice_non_position_value(),
1256  MemOperand(base, -kPointerSize, PostIndex));
1257  __ Sub(x11, x11, 2);
1258  __ Cbnz(x11, &loop);
1259  } else {
1260  for (int i = reg_from; i <= reg_to; i += 2) {
1261  __ Str(twice_non_position_value(),
1262  MemOperand(frame_pointer(), base_offset));
1263  base_offset -= kWRegSize * 2;
1264  }
1265  }
1266  }
1267 }
1268 
1269 
1270 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) {
1271  __ Ldr(x10, MemOperand(frame_pointer(), kStackBase));
1272  __ Sub(x10, backtrack_stackpointer(), x10);
1273  if (masm_->emit_debug_code()) {
1274  __ Cmp(x10, Operand(w10, SXTW));
1275  // The stack offset needs to fit in a W register.
1276  __ Check(eq, kOffsetOutOfRange);
1277  }
1278  StoreRegister(reg, w10);
1279 }
1280 
1281 
1282 // Helper function for reading a value out of a stack frame.
1283 template <typename T>
1284 static T& frame_entry(Address re_frame, int frame_offset) {
1285  return *reinterpret_cast<T*>(re_frame + frame_offset);
1286 }
1287 
1288 
1289 int RegExpMacroAssemblerARM64::CheckStackGuardState(Address* return_address,
1290  Code* re_code,
1291  Address re_frame,
1292  int start_offset,
1293  const byte** input_start,
1294  const byte** input_end) {
1295  Isolate* isolate = frame_entry<Isolate*>(re_frame, kIsolate);
1296  StackLimitCheck check(isolate);
1297  if (check.JsHasOverflowed()) {
1298  isolate->StackOverflow();
1299  return EXCEPTION;
1300  }
1301 
1302  // If not real stack overflow the stack guard was used to interrupt
1303  // execution for another purpose.
1304 
1305  // If this is a direct call from JavaScript retry the RegExp forcing the call
1306  // through the runtime system. Currently the direct call cannot handle a GC.
1307  if (frame_entry<int>(re_frame, kDirectCall) == 1) {
1308  return RETRY;
1309  }
1310 
1311  // Prepare for possible GC.
1312  HandleScope handles(isolate);
1313  Handle<Code> code_handle(re_code);
1314 
1315  Handle<String> subject(frame_entry<String*>(re_frame, kInput));
1316 
1317  // Current string.
1318  bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
1319 
1320  DCHECK(re_code->instruction_start() <= *return_address);
1321  DCHECK(*return_address <=
1322  re_code->instruction_start() + re_code->instruction_size());
1323 
1324  Object* result = isolate->stack_guard()->HandleInterrupts();
1325 
1326  if (*code_handle != re_code) { // Return address no longer valid
1327  int delta = code_handle->address() - re_code->address();
1328  // Overwrite the return address on the stack.
1329  *return_address += delta;
1330  }
1331 
1332  if (result->IsException()) {
1333  return EXCEPTION;
1334  }
1335 
1336  Handle<String> subject_tmp = subject;
1337  int slice_offset = 0;
1338 
1339  // Extract the underlying string and the slice offset.
1340  if (StringShape(*subject_tmp).IsCons()) {
1341  subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
1342  } else if (StringShape(*subject_tmp).IsSliced()) {
1343  SlicedString* slice = SlicedString::cast(*subject_tmp);
1344  subject_tmp = Handle<String>(slice->parent());
1345  slice_offset = slice->offset();
1346  }
1347 
1348  // String might have changed.
1349  if (subject_tmp->IsOneByteRepresentation() != is_one_byte) {
1350  // If we changed between an Latin1 and an UC16 string, the specialized
1351  // code cannot be used, and we need to restart regexp matching from
1352  // scratch (including, potentially, compiling a new version of the code).
1353  return RETRY;
1354  }
1355 
1356  // Otherwise, the content of the string might have moved. It must still
1357  // be a sequential or external string with the same content.
1358  // Update the start and end pointers in the stack frame to the current
1359  // location (whether it has actually moved or not).
1360  DCHECK(StringShape(*subject_tmp).IsSequential() ||
1361  StringShape(*subject_tmp).IsExternal());
1362 
1363  // The original start address of the characters to match.
1364  const byte* start_address = *input_start;
1365 
1366  // Find the current start address of the same character at the current string
1367  // position.
1368  const byte* new_address = StringCharacterPosition(*subject_tmp,
1369  start_offset + slice_offset);
1370 
1371  if (start_address != new_address) {
1372  // If there is a difference, update the object pointer and start and end
1373  // addresses in the RegExp stack frame to match the new value.
1374  const byte* end_address = *input_end;
1375  int byte_length = static_cast<int>(end_address - start_address);
1376  frame_entry<const String*>(re_frame, kInput) = *subject;
1377  *input_start = new_address;
1378  *input_end = new_address + byte_length;
1379  } else if (frame_entry<const String*>(re_frame, kInput) != *subject) {
1380  // Subject string might have been a ConsString that underwent
1381  // short-circuiting during GC. That will not change start_address but
1382  // will change pointer inside the subject handle.
1383  frame_entry<const String*>(re_frame, kInput) = *subject;
1384  }
1385 
1386  return 0;
1387 }
1388 
1389 
1390 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset,
1391  Label* on_outside_input) {
1392  CompareAndBranchOrBacktrack(current_input_offset(),
1393  -cp_offset * char_size(),
1394  ge,
1395  on_outside_input);
1396 }
1397 
1398 
1399 bool RegExpMacroAssemblerARM64::CanReadUnaligned() {
1400  // TODO(pielan): See whether or not we should disable unaligned accesses.
1401  return !slow_safe();
1402 }
1403 
1404 
1405 // Private methods:
1406 
1407 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) {
1408  // Allocate space on the stack to store the return address. The
1409  // CheckStackGuardState C++ function will override it if the code
1410  // moved. Allocate extra space for 2 arguments passed by pointers.
1411  // AAPCS64 requires the stack to be 16 byte aligned.
1412  int alignment = masm_->ActivationFrameAlignment();
1413  DCHECK_EQ(alignment % 16, 0);
1414  int align_mask = (alignment / kXRegSize) - 1;
1415  int xreg_to_claim = (3 + align_mask) & ~align_mask;
1416 
1417  DCHECK(csp.Is(__ StackPointer()));
1418  __ Claim(xreg_to_claim);
1419 
1420  // CheckStackGuardState needs the end and start addresses of the input string.
1421  __ Poke(input_end(), 2 * kPointerSize);
1422  __ Add(x5, csp, 2 * kPointerSize);
1423  __ Poke(input_start(), kPointerSize);
1424  __ Add(x4, csp, kPointerSize);
1425 
1426  __ Mov(w3, start_offset());
1427  // RegExp code frame pointer.
1428  __ Mov(x2, frame_pointer());
1429  // Code* of self.
1430  __ Mov(x1, Operand(masm_->CodeObject()));
1431 
1432  // We need to pass a pointer to the return address as first argument.
1433  // The DirectCEntry stub will place the return address on the stack before
1434  // calling so the stack pointer will point to it.
1435  __ Mov(x0, csp);
1436 
1437  ExternalReference check_stack_guard_state =
1438  ExternalReference::re_check_stack_guard_state(isolate());
1439  __ Mov(scratch, check_stack_guard_state);
1440  DirectCEntryStub stub(isolate());
1441  stub.GenerateCall(masm_, scratch);
1442 
1443  // The input string may have been moved in memory, we need to reload it.
1444  __ Peek(input_start(), kPointerSize);
1445  __ Peek(input_end(), 2 * kPointerSize);
1446 
1447  DCHECK(csp.Is(__ StackPointer()));
1448  __ Drop(xreg_to_claim);
1449 
1450  // Reload the Code pointer.
1451  __ Mov(code_pointer(), Operand(masm_->CodeObject()));
1452 }
1453 
1454 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition,
1455  Label* to) {
1456  if (condition == al) { // Unconditional.
1457  if (to == NULL) {
1458  Backtrack();
1459  return;
1460  }
1461  __ B(to);
1462  return;
1463  }
1464  if (to == NULL) {
1465  to = &backtrack_label_;
1466  }
1467  __ B(condition, to);
1468 }
1469 
1470 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg,
1471  int immediate,
1472  Condition condition,
1473  Label* to) {
1474  if ((immediate == 0) && ((condition == eq) || (condition == ne))) {
1475  if (to == NULL) {
1476  to = &backtrack_label_;
1477  }
1478  if (condition == eq) {
1479  __ Cbz(reg, to);
1480  } else {
1481  __ Cbnz(reg, to);
1482  }
1483  } else {
1484  __ Cmp(reg, immediate);
1485  BranchOrBacktrack(condition, to);
1486  }
1487 }
1488 
1489 
1490 void RegExpMacroAssemblerARM64::CheckPreemption() {
1491  // Check for preemption.
1492  ExternalReference stack_limit =
1493  ExternalReference::address_of_stack_limit(isolate());
1494  __ Mov(x10, stack_limit);
1495  __ Ldr(x10, MemOperand(x10));
1496  DCHECK(csp.Is(__ StackPointer()));
1497  __ Cmp(csp, x10);
1498  CallIf(&check_preempt_label_, ls);
1499 }
1500 
1501 
1502 void RegExpMacroAssemblerARM64::CheckStackLimit() {
1503  ExternalReference stack_limit =
1504  ExternalReference::address_of_regexp_stack_limit(isolate());
1505  __ Mov(x10, stack_limit);
1506  __ Ldr(x10, MemOperand(x10));
1507  __ Cmp(backtrack_stackpointer(), x10);
1508  CallIf(&stack_overflow_label_, ls);
1509 }
1510 
1511 
1512 void RegExpMacroAssemblerARM64::Push(Register source) {
1513  DCHECK(source.Is32Bits());
1514  DCHECK(!source.is(backtrack_stackpointer()));
1515  __ Str(source,
1516  MemOperand(backtrack_stackpointer(),
1517  -static_cast<int>(kWRegSize),
1518  PreIndex));
1519 }
1520 
1521 
1522 void RegExpMacroAssemblerARM64::Pop(Register target) {
1523  DCHECK(target.Is32Bits());
1524  DCHECK(!target.is(backtrack_stackpointer()));
1525  __ Ldr(target,
1526  MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex));
1527 }
1528 
1529 
1530 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) {
1531  DCHECK(register_index < kNumCachedRegisters);
1532  return Register::Create(register_index / 2, kXRegSizeInBits);
1533 }
1534 
1535 
1536 Register RegExpMacroAssemblerARM64::GetRegister(int register_index,
1537  Register maybe_result) {
1538  DCHECK(maybe_result.Is32Bits());
1539  DCHECK(register_index >= 0);
1540  if (num_registers_ <= register_index) {
1541  num_registers_ = register_index + 1;
1542  }
1543  Register result;
1544  RegisterState register_state = GetRegisterState(register_index);
1545  switch (register_state) {
1546  case STACKED:
1547  __ Ldr(maybe_result, register_location(register_index));
1548  result = maybe_result;
1549  break;
1550  case CACHED_LSW:
1551  result = GetCachedRegister(register_index).W();
1552  break;
1553  case CACHED_MSW:
1554  __ Lsr(maybe_result.X(), GetCachedRegister(register_index),
1555  kWRegSizeInBits);
1556  result = maybe_result;
1557  break;
1558  default:
1559  UNREACHABLE();
1560  break;
1561  }
1562  DCHECK(result.Is32Bits());
1563  return result;
1564 }
1565 
1566 
1567 void RegExpMacroAssemblerARM64::StoreRegister(int register_index,
1568  Register source) {
1569  DCHECK(source.Is32Bits());
1570  DCHECK(register_index >= 0);
1571  if (num_registers_ <= register_index) {
1572  num_registers_ = register_index + 1;
1573  }
1574 
1575  Register cached_register;
1576  RegisterState register_state = GetRegisterState(register_index);
1577  switch (register_state) {
1578  case STACKED:
1579  __ Str(source, register_location(register_index));
1580  break;
1581  case CACHED_LSW:
1582  cached_register = GetCachedRegister(register_index);
1583  if (!source.Is(cached_register.W())) {
1584  __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits);
1585  }
1586  break;
1587  case CACHED_MSW:
1588  cached_register = GetCachedRegister(register_index);
1589  __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits);
1590  break;
1591  default:
1592  UNREACHABLE();
1593  break;
1594  }
1595 }
1596 
1597 
1598 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) {
1599  Label skip_call;
1600  if (condition != al) __ B(&skip_call, NegateCondition(condition));
1601  __ Bl(to);
1602  __ Bind(&skip_call);
1603 }
1604 
1605 
1606 void RegExpMacroAssemblerARM64::RestoreLinkRegister() {
1607  DCHECK(csp.Is(__ StackPointer()));
1608  __ Pop(lr, xzr);
1609  __ Add(lr, lr, Operand(masm_->CodeObject()));
1610 }
1611 
1612 
1613 void RegExpMacroAssemblerARM64::SaveLinkRegister() {
1614  DCHECK(csp.Is(__ StackPointer()));
1615  __ Sub(lr, lr, Operand(masm_->CodeObject()));
1616  __ Push(xzr, lr);
1617 }
1618 
1619 
1620 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) {
1621  DCHECK(register_index < (1<<30));
1622  DCHECK(register_index >= kNumCachedRegisters);
1623  if (num_registers_ <= register_index) {
1624  num_registers_ = register_index + 1;
1625  }
1626  register_index -= kNumCachedRegisters;
1627  int offset = kFirstRegisterOnStack - register_index * kWRegSize;
1628  return MemOperand(frame_pointer(), offset);
1629 }
1630 
1631 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index,
1632  Register scratch) {
1633  DCHECK(register_index < (1<<30));
1634  DCHECK(register_index < num_saved_registers_);
1635  DCHECK(register_index >= kNumCachedRegisters);
1636  DCHECK_EQ(register_index % 2, 0);
1637  register_index -= kNumCachedRegisters;
1638  int offset = kFirstCaptureOnStack - register_index * kWRegSize;
1639  // capture_location is used with Stp instructions to load/store 2 registers.
1640  // The immediate field in the encoding is limited to 7 bits (signed).
1641  if (is_int7(offset)) {
1642  return MemOperand(frame_pointer(), offset);
1643  } else {
1644  __ Add(scratch, frame_pointer(), offset);
1645  return MemOperand(scratch);
1646  }
1647 }
1648 
1649 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset,
1650  int characters) {
1651  Register offset = current_input_offset();
1652 
1653  // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
1654  // and the operating system running on the target allow it.
1655  // If unaligned load/stores are not supported then this function must only
1656  // be used to load a single character at a time.
1657 
1658  // ARMv8 supports unaligned accesses but V8 or the kernel can decide to
1659  // disable it.
1660  // TODO(pielan): See whether or not we should disable unaligned accesses.
1661  if (!CanReadUnaligned()) {
1662  DCHECK(characters == 1);
1663  }
1664 
1665  if (cp_offset != 0) {
1666  if (masm_->emit_debug_code()) {
1667  __ Mov(x10, cp_offset * char_size());
1668  __ Add(x10, x10, Operand(current_input_offset(), SXTW));
1669  __ Cmp(x10, Operand(w10, SXTW));
1670  // The offset needs to fit in a W register.
1671  __ Check(eq, kOffsetOutOfRange);
1672  } else {
1673  __ Add(w10, current_input_offset(), cp_offset * char_size());
1674  }
1675  offset = w10;
1676  }
1677 
1678  if (mode_ == LATIN1) {
1679  if (characters == 4) {
1680  __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1681  } else if (characters == 2) {
1682  __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1683  } else {
1684  DCHECK(characters == 1);
1685  __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW));
1686  }
1687  } else {
1688  DCHECK(mode_ == UC16);
1689  if (characters == 2) {
1690  __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1691  } else {
1692  DCHECK(characters == 1);
1693  __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1694  }
1695  }
1696 }
1697 
1698 #endif // V8_INTERPRETED_REGEXP
1699 
1700 }} // namespace v8::internal
1701 
1702 #endif // V8_TARGET_ARCH_ARM64
#define kCalleeSaved
RegExpMacroAssemblerARM64(Mode mode, int registers_to_save, Zone *zone)
#define PROFILE(IsolateGetter, Call)
Definition: cpu-profiler.h:181
#define __
enable harmony numeric enable harmony object literal extensions Optimize object Array DOM strings and string trace pretenuring decisions of HAllocate instructions Enables optimizations which favor memory size over execution speed maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining trace the tracking of allocation sites deoptimize every n garbage collections perform array bounds checks elimination analyze liveness of environment slots and zap dead values flushes the cache of optimized code for closures on every GC allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms do not emit check maps for constant values that have a leaf map
enable harmony numeric enable harmony object literal extensions Optimize object Array DOM strings and string trace pretenuring decisions of HAllocate instructions Enables optimizations which favor memory size over execution speed maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining trace the tracking of allocation sites deoptimize every n garbage collections perform array bounds checks elimination analyze liveness of environment slots and zap dead values flushes the cache of optimized code for closures on every GC allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes enable context specialization in TurboFan execution budget before interrupt is triggered max percentage of megamorphic generic ICs to allow optimization enable use of SAHF instruction if enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable use of MLS instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long enable alignment of csp to bytes on platforms which prefer the register to always be expose gc extension under the specified name show built in functions in stack traces use random jit cookie to mask large constants minimum length for automatic enable preparsing CPU profiler sampling interval in microseconds trace out of bounds accesses to external arrays default size of stack region v8 is allowed to maximum length of function source code printed in a stack trace min size of a semi the new space consists of two semi spaces print one trace line following each garbage collection do not print trace line after scavenger collection print cumulative GC statistics in only print modified registers Trace simulator debug messages Implied by trace sim abort randomize hashes to avoid predictable hash Fixed seed to use to hash property Print the time it takes to deserialize the snapshot A filename with extra code to be included in the A file to write the raw snapshot bytes to(mksnapshot only)") DEFINE_STRING(raw_context_file
enable harmony numeric enable harmony object literal extensions Optimize object Array DOM strings and string trace pretenuring decisions of HAllocate instructions Enables optimizations which favor memory size over execution speed maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining trace the tracking of allocation sites deoptimize every n garbage collections perform array bounds checks elimination analyze liveness of environment slots and zap dead values flushes the cache of optimized code for closures on every GC allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes enable context specialization in TurboFan execution budget before interrupt is triggered max percentage of megamorphic generic ICs to allow optimization enable use of SAHF instruction if enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable use of MLS instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long mode(MIPS only)") DEFINE_BOOL(enable_always_align_csp
enable harmony numeric enable harmony object literal extensions Optimize object Array DOM strings and string trace pretenuring decisions of HAllocate instructions Enables optimizations which favor memory size over execution speed maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining trace the tracking of allocation sites deoptimize every n garbage collections perform array bounds checks elimination analyze liveness of environment slots and zap dead values flushes the cache of optimized code for closures on every GC allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes enable context specialization in TurboFan execution budget before interrupt is triggered max percentage of megamorphic generic ICs to allow optimization enable use of SAHF instruction if enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable use of MLS instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long enable alignment of csp to bytes on platforms which prefer the register to always be NULL
#define UNREACHABLE()
Definition: logging.h:30
#define DCHECK(condition)
Definition: logging.h:205
#define DCHECK_EQ(v1, v2)
Definition: logging.h:206
#define STATIC_ASSERT(test)
Definition: macros.h:311
#define X
static int Push(SpecialRPOStackFrame *stack, int depth, BasicBlock *child, int unvisited)
Definition: scheduler.cc:773
const int kPointerSize
Definition: globals.h:129
const unsigned kWRegSizeLog2
const unsigned kXRegSizeInBits
const int64_t kWRegMask
const Register fp
const unsigned kWRegSizeInBits
kSerializedDataOffset Object
Definition: objects-inl.h:5322
Condition NegateCondition(Condition cond)
Definition: constants-arm.h:86
const Register lr
byte * Address
Definition: globals.h:101
const unsigned kXRegSize
const int kHeapObjectTag
Definition: v8.h:5737
const unsigned kWRegSize
uint16_t uc16
Definition: globals.h:184
Debugger support for the V8 JavaScript engine.
Definition: accessors.cc:20
#define T(name, string, precedence)
Definition: token.cc:25
Definitions and convenience functions for working with unicode.