V8 Project
runtime-uri.cc
Go to the documentation of this file.
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/v8.h"
6 
7 #include "src/arguments.h"
8 #include "src/conversions.h"
9 #include "src/runtime/runtime.h"
11 #include "src/string-search.h"
12 #include "src/utils.h"
13 
14 
15 namespace v8 {
16 namespace internal {
17 
18 template <typename Char>
20 
21 
22 template <>
24  String::FlatContent flat = string->GetFlatContent();
25  DCHECK(flat.IsOneByte());
26  return flat.ToOneByteVector();
27 }
28 
29 
30 template <>
31 Vector<const uc16> GetCharVector(Handle<String> string) {
32  String::FlatContent flat = string->GetFlatContent();
33  DCHECK(flat.IsTwoByte());
34  return flat.ToUC16Vector();
35 }
36 
37 
38 class URIUnescape : public AllStatic {
39  public:
40  template <typename Char>
42  Handle<String> source);
43 
44  private:
45  static const signed char kHexValue['g'];
46 
47  template <typename Char>
49  Handle<String> string,
50  int start_index);
51 
52  static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2));
53 
54  template <typename Char>
55  static INLINE(int UnescapeChar(Vector<const Char> vector, int i, int length,
56  int* step));
57 };
58 
59 
60 const signed char URIUnescape::kHexValue[] = {
61  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
62  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
63  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -0, 1, 2, 3, 4, 5,
64  6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1,
65  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
66  -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15};
67 
68 
69 template <typename Char>
71  Handle<String> source) {
72  int index;
73  {
74  DisallowHeapAllocation no_allocation;
75  StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%"));
76  index = search.Search(GetCharVector<Char>(source), 0);
77  if (index < 0) return source;
78  }
79  return UnescapeSlow<Char>(isolate, source, index);
80 }
81 
82 
83 template <typename Char>
85  Handle<String> string,
86  int start_index) {
87  bool one_byte = true;
88  int length = string->length();
89 
90  int unescaped_length = 0;
91  {
92  DisallowHeapAllocation no_allocation;
93  Vector<const Char> vector = GetCharVector<Char>(string);
94  for (int i = start_index; i < length; unescaped_length++) {
95  int step;
96  if (UnescapeChar(vector, i, length, &step) >
98  one_byte = false;
99  }
100  i += step;
101  }
102  }
103 
104  DCHECK(start_index < length);
105  Handle<String> first_part =
106  isolate->factory()->NewProperSubString(string, 0, start_index);
107 
108  int dest_position = 0;
109  Handle<String> second_part;
110  DCHECK(unescaped_length <= String::kMaxLength);
111  if (one_byte) {
112  Handle<SeqOneByteString> dest = isolate->factory()
113  ->NewRawOneByteString(unescaped_length)
114  .ToHandleChecked();
115  DisallowHeapAllocation no_allocation;
116  Vector<const Char> vector = GetCharVector<Char>(string);
117  for (int i = start_index; i < length; dest_position++) {
118  int step;
119  dest->SeqOneByteStringSet(dest_position,
120  UnescapeChar(vector, i, length, &step));
121  i += step;
122  }
123  second_part = dest;
124  } else {
125  Handle<SeqTwoByteString> dest = isolate->factory()
126  ->NewRawTwoByteString(unescaped_length)
127  .ToHandleChecked();
128  DisallowHeapAllocation no_allocation;
129  Vector<const Char> vector = GetCharVector<Char>(string);
130  for (int i = start_index; i < length; dest_position++) {
131  int step;
132  dest->SeqTwoByteStringSet(dest_position,
133  UnescapeChar(vector, i, length, &step));
134  i += step;
135  }
136  second_part = dest;
137  }
138  return isolate->factory()->NewConsString(first_part, second_part);
139 }
140 
141 
142 int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) {
143  if (character1 > 'f') return -1;
144  int hi = kHexValue[character1];
145  if (hi == -1) return -1;
146  if (character2 > 'f') return -1;
147  int lo = kHexValue[character2];
148  if (lo == -1) return -1;
149  return (hi << 4) + lo;
150 }
151 
152 
153 template <typename Char>
154 int URIUnescape::UnescapeChar(Vector<const Char> vector, int i, int length,
155  int* step) {
156  uint16_t character = vector[i];
157  int32_t hi = 0;
158  int32_t lo = 0;
159  if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' &&
160  (hi = TwoDigitHex(vector[i + 2], vector[i + 3])) != -1 &&
161  (lo = TwoDigitHex(vector[i + 4], vector[i + 5])) != -1) {
162  *step = 6;
163  return (hi << 8) + lo;
164  } else if (character == '%' && i <= length - 3 &&
165  (lo = TwoDigitHex(vector[i + 1], vector[i + 2])) != -1) {
166  *step = 3;
167  return lo;
168  } else {
169  *step = 1;
170  return character;
171  }
172 }
173 
174 
175 class URIEscape : public AllStatic {
176  public:
177  template <typename Char>
179  Handle<String> string);
180 
181  private:
182  static const char kHexChars[17];
183  static const char kNotEscaped[256];
184 
185  static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; }
186 };
187 
188 
189 const char URIEscape::kHexChars[] = "0123456789ABCDEF";
190 
191 
192 // kNotEscaped is generated by the following:
193 //
194 // #!/bin/perl
195 // for (my $i = 0; $i < 256; $i++) {
196 // print "\n" if $i % 16 == 0;
197 // my $c = chr($i);
198 // my $escaped = 1;
199 // $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
200 // print $escaped ? "0, " : "1, ";
201 // }
202 
203 const char URIEscape::kNotEscaped[] = {
204  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
205  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
206  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
207  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
208  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
209  1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
210  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
215 
216 
217 template <typename Char>
219  DCHECK(string->IsFlat());
220  int escaped_length = 0;
221  int length = string->length();
222 
223  {
224  DisallowHeapAllocation no_allocation;
225  Vector<const Char> vector = GetCharVector<Char>(string);
226  for (int i = 0; i < length; i++) {
227  uint16_t c = vector[i];
228  if (c >= 256) {
229  escaped_length += 6;
230  } else if (IsNotEscaped(c)) {
231  escaped_length++;
232  } else {
233  escaped_length += 3;
234  }
235 
236  // We don't allow strings that are longer than a maximal length.
237  DCHECK(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow.
238  if (escaped_length > String::kMaxLength) break; // Provoke exception.
239  }
240  }
241 
242  // No length change implies no change. Return original string if no change.
243  if (escaped_length == length) return string;
244 
247  isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length),
248  String);
249  int dest_position = 0;
250 
251  {
252  DisallowHeapAllocation no_allocation;
253  Vector<const Char> vector = GetCharVector<Char>(string);
254  for (int i = 0; i < length; i++) {
255  uint16_t c = vector[i];
256  if (c >= 256) {
257  dest->SeqOneByteStringSet(dest_position, '%');
258  dest->SeqOneByteStringSet(dest_position + 1, 'u');
259  dest->SeqOneByteStringSet(dest_position + 2, kHexChars[c >> 12]);
260  dest->SeqOneByteStringSet(dest_position + 3, kHexChars[(c >> 8) & 0xf]);
261  dest->SeqOneByteStringSet(dest_position + 4, kHexChars[(c >> 4) & 0xf]);
262  dest->SeqOneByteStringSet(dest_position + 5, kHexChars[c & 0xf]);
263  dest_position += 6;
264  } else if (IsNotEscaped(c)) {
265  dest->SeqOneByteStringSet(dest_position, c);
266  dest_position++;
267  } else {
268  dest->SeqOneByteStringSet(dest_position, '%');
269  dest->SeqOneByteStringSet(dest_position + 1, kHexChars[c >> 4]);
270  dest->SeqOneByteStringSet(dest_position + 2, kHexChars[c & 0xf]);
271  dest_position += 3;
272  }
273  }
274  }
275 
276  return dest;
277 }
278 
279 
280 RUNTIME_FUNCTION(Runtime_URIEscape) {
281  HandleScope scope(isolate);
282  DCHECK(args.length() == 1);
284  Handle<String> string = String::Flatten(source);
285  DCHECK(string->IsFlat());
286  Handle<String> result;
288  isolate, result, string->IsOneByteRepresentationUnderneath()
289  ? URIEscape::Escape<uint8_t>(isolate, source)
290  : URIEscape::Escape<uc16>(isolate, source));
291  return *result;
292 }
293 
294 
295 RUNTIME_FUNCTION(Runtime_URIUnescape) {
296  HandleScope scope(isolate);
297  DCHECK(args.length() == 1);
299  Handle<String> string = String::Flatten(source);
300  DCHECK(string->IsFlat());
301  Handle<String> result;
303  isolate, result, string->IsOneByteRepresentationUnderneath()
304  ? URIUnescape::Unescape<uint8_t>(isolate, source)
305  : URIUnescape::Unescape<uc16>(isolate, source));
306  return *result;
307 }
308 }
309 } // namespace v8::internal
An object reference managed by the v8 garbage collector.
Definition: v8.h:198
Factory * factory()
Definition: isolate.h:982
int Search(Vector< const SubjectChar > subject, int index)
Definition: string-search.h:78
Vector< const uint8_t > ToOneByteVector()
Definition: objects.h:8639
static const int32_t kMaxOneByteCharCode
Definition: objects.h:8811
static const int kMaxLength
Definition: objects.h:8820
static Handle< String > Flatten(Handle< String > string, PretenureFlag pretenure=NOT_TENURED)
Definition: objects-inl.h:3354
static bool IsNotEscaped(uint16_t c)
Definition: runtime-uri.cc:185
static const char kHexChars[17]
Definition: runtime-uri.cc:182
static MUST_USE_RESULT MaybeHandle< String > Escape(Isolate *isolate, Handle< String > string)
static const char kNotEscaped[256]
Definition: runtime-uri.cc:183
static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2))
static MUST_USE_RESULT MaybeHandle< String > Unescape(Isolate *isolate, Handle< String > source)
static MUST_USE_RESULT MaybeHandle< String > UnescapeSlow(Isolate *isolate, Handle< String > string, int start_index)
static INLINE(int UnescapeChar(Vector< const Char > vector, int i, int length, int *step))
static const signed char kHexValue['g']
Definition: runtime-uri.cc:45
#define ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, dst, call)
Definition: isolate.h:131
#define ASSIGN_RETURN_ON_EXCEPTION(isolate, dst, call, T)
Definition: isolate.h:135
#define DCHECK(condition)
Definition: logging.h:205
#define MUST_USE_RESULT
Definition: macros.h:266
unsigned short uint16_t
Definition: unicode.cc:23
int int32_t
Definition: unicode.cc:24
Vector< const uint8_t > GetCharVector(Handle< String > string)
Definition: runtime-uri.cc:23
INLINE(static HeapObject *EnsureDoubleAligned(Heap *heap, HeapObject *object, int size))
@ RUNTIME_FUNCTION
Definition: serialize.h:23
Debugger support for the V8 JavaScript engine.
Definition: accessors.cc:20
#define CONVERT_ARG_HANDLE_CHECKED(Type, name, index)
Definition: runtime-utils.h:28
#define STATIC_CHAR_VECTOR(x)
Definition: vector.h:154