fix urldispatch matching and generation to cope with various inputs
| | |
| | | |
| | | .. code-block:: text |
| | | |
| | | foo/La%20Pe%C3%B1a |
| | | http://example.com/foo/La%20Pe%C3%B1a |
| | | |
| | | The matchdict will look like so (the value is URL-decoded / UTF-8 decoded): |
| | | |
| | | .. code-block:: text |
| | | |
| | | {'bar':u'La Pe\xf1a'} |
| | | |
| | | Literal strings in the path segment should represent the *decoded* value of |
| | | the ``PATH_INFO`` provided to Pyramid. You don't want to use a URL-encoded |
| | | value or a bytestring representing the literal's UTF-8 in the pattern. For |
| | | example, rather than this: |
| | | |
| | | .. code-block:: text |
| | | |
| | | /Foo%20Bar/{baz} |
| | | |
| | | You'll want to use something like this: |
| | | |
| | | .. code-block:: text |
| | | |
| | | /Foo Bar/{baz} |
| | | |
| | | For patterns that contain "high-order" characters in its literals, you'll |
| | | want to use a Unicode value as the pattern as opposed to any URL-encoded or |
| | | UTF-8-encoded value. For example, you might be tempted to use a bytestring |
| | | pattern like this: |
| | | |
| | | .. code-block:: text |
| | | |
| | | /La Pe\xc3\xb1a/{x} |
| | | |
| | | But that probably won't match as you expect it to. You'll want to use a |
| | | Unicode value as the pattern instead rather than raw bytestring escapes. You |
| | | can use a high-order Unicode value as the pattern by using `Python source |
| | | file encoding <http://www.python.org/dev/peps/pep-0263/>`_ plus the "real" |
| | | character in the Unicode pattern in the source, like so: |
| | | |
| | | .. code-block:: text |
| | | |
| | | /La Peña/{x} |
| | | |
| | | Or you can ignore source file encoding and use equivalent Unicode escape |
| | | characters in the pattern. |
| | | |
| | | .. code-block:: text |
| | | |
| | | /La Pe\xf1a/{x} |
| | | |
| | | Dynamic segment names cannot contain high-order characters, so this applies |
| | | only to literals in the pattern. |
| | | |
| | | If the pattern has a ``*`` in it, the name which follows it is considered a |
| | | "remainder match". A remainder match *must* come at the end of the pattern. |
| | |
| | | based on route patterns. For example, if you've configured a route with the |
| | | ``name`` "foo" and the ``pattern`` "{a}/{b}/{c}", you might do this. |
| | | |
| | | .. ignore-next-block |
| | | .. code-block:: python |
| | | :linenos: |
| | | |
| | |
| | | |
| | | This would return something like the string ``http://example.com/1/2/3`` (at |
| | | least if the current protocol and hostname implied ``http://example.com``). |
| | | See the :meth:`~pyramid.request.Request.route_url` API documentation for more |
| | | |
| | | To get only the *path* of a route, use the |
| | | :meth:`pyramid.request.Request.route_path` API instead of |
| | | :meth:`~pyramid.request.Request.route_url`. |
| | | |
| | | .. code-block:: python |
| | | |
| | | url = request.route_path('foo', a='1', b='2', c='3') |
| | | |
| | | This will return the string ``/1/2/3`` rather than a full URL. |
| | | |
| | | Note that URLs and paths generated by ``route_path`` and ``route_url`` are |
| | | always URL-quoted string types (which contain no non-ASCII characters). |
| | | Therefore, if you've added a route like so: |
| | | |
| | | .. code-block:: python |
| | | |
| | | config.add_route('la', u'/La Peña/{city}') |
| | | |
| | | And you later generate a URL using ``route_path`` or ``route_url`` like so: |
| | | |
| | | .. code-block:: python |
| | | |
| | | url = request.route_path('la', city=u'Québec') |
| | | |
| | | You will wind up with the path encoded to UTF-8 and URL quoted like so: |
| | | |
| | | .. code-block:: python |
| | | |
| | | /La%20Pe%C3%B1a/Qu%C3%A9bec |
| | | |
| | | .. note:: |
| | | |
| | | Generating URL-quoted URLs and paths is new as of Pyramid 1.3 (and Pyramid |
| | | 1.2 after 1.2.6). Previous versions generated unquoted URLs and paths |
| | | (which was broken). |
| | | |
| | | See the :meth:`~pyramid.request.Request.route_url` and |
| | | :meth:`~pyramid.request.Request.route_path` API documentation for more |
| | | information. |
| | | |
| | | .. index:: |
| | |
| | | self.assertEqual(matcher('foo/baz/biz/buz/bar'), None) |
| | | self.assertEqual(generator({'baz':1, 'buz':2}), '/foo/1/biz/2/bar') |
| | | |
| | | def test_url_decode_error(self): |
| | | from pyramid.exceptions import URLDecodeError |
| | | matcher, generator = self._callFUT('/:foo') |
| | | self.assertRaises(URLDecodeError, matcher, |
| | | native_(b'/\xff\xfe\x8b\x00')) |
| | | |
| | | def test_custom_regex(self): |
| | | matcher, generator = self._callFUT('foo/{baz}/biz/{buz:[^/\.]+}.{bar}') |
| | | self.assertEqual(matcher('/foo/baz/biz/buz.bar'), |
| | |
| | | self.assertEqual(generator({'buz':2001}), '/2001') |
| | | |
| | | def test_custom_regex_with_embedded_squigglies3(self): |
| | | matcher, generator = self._callFUT('/{buz:(\d{2}|\d{4})-[a-zA-Z]{3,4}-\d{2}}') |
| | | matcher, generator = self._callFUT( |
| | | '/{buz:(\d{2}|\d{4})-[a-zA-Z]{3,4}-\d{2}}') |
| | | self.assertEqual(matcher('/2001-Nov-15'), {'buz':'2001-Nov-15'}) |
| | | self.assertEqual(matcher('/99-June-10'), {'buz':'99-June-10'}) |
| | | self.assertEqual(matcher('/2-Nov-15'), None) |
| | |
| | | self.assertEqual(matcher('/2001-No-15'), None) |
| | | self.assertEqual(generator({'buz':'2001-Nov-15'}), '/2001-Nov-15') |
| | | self.assertEqual(generator({'buz':'99-June-10'}), '/99-June-10') |
| | | |
| | | def test_pattern_with_high_order_literal(self): |
| | | pattern = text_(b'/La Pe\xc3\xb1a/{x}', 'utf-8') |
| | | matcher, generator = self._callFUT(pattern) |
| | | self.assertEqual(matcher(text_(b'/La Pe\xc3\xb1a/x', 'utf-8')), |
| | | {'x':'x'}) |
| | | self.assertEqual(generator({'x':'1'}), '/La%20Pe%C3%B1a/1') |
| | | |
| | | def test_pattern_generate_with_high_order_dynamic(self): |
| | | pattern = '/{x}' |
| | | _, generator = self._callFUT(pattern) |
| | | self.assertEqual( |
| | | generator({'x':text_(b'La Pe\xc3\xb1a', 'utf-8')}), |
| | | '/La%20Pe%C3%B1a') |
| | | |
| | | def test_docs_sample_generate(self): |
| | | # sample from urldispatch.rst |
| | | pattern = text_(b'/La Pe\xc3\xb1a/{city}', 'utf-8') |
| | | _, generator = self._callFUT(pattern) |
| | | self.assertEqual( |
| | | generator({'city':text_(b'Qu\xc3\xa9bec', 'utf-8')}), |
| | | '/La%20Pe%C3%B1a/Qu%C3%A9bec') |
| | | |
| | | def test_generate_with_mixedtype_values(self): |
| | | pattern = '/{city}/{state}' |
| | | _, generator = self._callFUT(pattern) |
| | | result = generator( |
| | | {'city': text_(b'Qu\xc3\xa9bec', 'utf-8'), |
| | | 'state': b'La Pe\xc3\xb1a'} |
| | | ) |
| | | self.assertEqual(result, '/Qu%C3%A9bec/La%20Pe%C3%B1a') |
| | | # should be a native string |
| | | self.assertEqual(type(result), str) |
| | | |
| | | class TestCompileRouteFunctional(unittest.TestCase): |
| | | def matches(self, pattern, path, expected): |
| | |
| | | self.matches('*traverse', '/zzz/abc', {'traverse':('zzz', 'abc')}) |
| | | self.matches('*traverse', '/zzz/ abc', {'traverse':('zzz', ' abc')}) |
| | | #'/La%20Pe%C3%B1a' |
| | | self.matches('{x}', native_(b'/La Pe\xc3\xb1a'), |
| | | {'x':text_(b'La Pe\xf1a')}) |
| | | self.matches('{x}', text_(b'/La Pe\xc3\xb1a', 'utf-8'), |
| | | {'x':text_(b'La Pe\xc3\xb1a', 'utf-8')}) |
| | | # '/La%20Pe%C3%B1a/x' |
| | | self.matches('*traverse', native_(b'/La Pe\xc3\xb1a/x'), |
| | | {'traverse':(text_(b'La Pe\xf1a'), 'x')}) |
| | | self.matches('*traverse', text_(b'/La Pe\xc3\xb1a/x'), |
| | | {'traverse':(text_(b'La Pe\xc3\xb1a'), 'x')}) |
| | | self.matches('/foo/{id}.html', '/foo/bar.html', {'id':'bar'}) |
| | | self.matches('/{num:[0-9]+}/*traverse', '/555/abc/def', |
| | | {'num':'555', 'traverse':('abc', 'def')}) |
| | |
| | | self.matches('*traverse', '/zzz/abc', {'traverse':('zzz', 'abc')}) |
| | | self.matches('*traverse', '/zzz/ abc', {'traverse':('zzz', ' abc')}) |
| | | #'/La%20Pe%C3%B1a' |
| | | self.matches(':x', native_(b'/La Pe\xc3\xb1a'), |
| | | {'x':text_(b'La Pe\xf1a')}) |
| | | # pattern, path, expected |
| | | self.matches(':x', text_(b'/La Pe\xc3\xb1a', 'utf-8'), |
| | | {'x':text_(b'La Pe\xc3\xb1a', 'utf-8')}) |
| | | # '/La%20Pe%C3%B1a/x' |
| | | self.matches('*traverse', native_(b'/La Pe\xc3\xb1a/x'), |
| | | {'traverse':(text_(b'La Pe\xf1a'), 'x')}) |
| | | self.matches('*traverse', text_(b'/La Pe\xc3\xb1a/x', 'utf-8'), |
| | | {'traverse':(text_(b'La Pe\xc3\xb1a', 'utf-8'), 'x')}) |
| | | self.matches('/foo/:id.html', '/foo/bar.html', {'id':'bar'}) |
| | | self.matches('/foo/:id_html', '/foo/bar_html', {'id_html':'bar_html'}) |
| | | self.matches('zzz/:_', '/zzz/abc', {'_':'abc'}) |
| | |
| | | ) |
| | | |
| | | from pyramid.compat import ( |
| | | PY3, |
| | | native_, |
| | | bytes_, |
| | | text_, |
| | | text_type, |
| | | string_types, |
| | | binary_type, |
| | | is_nonstr_iter, |
| | | url_quote, |
| | | ) |
| | |
| | | return '{%s}' % name[1:] |
| | | |
| | | def _compile_route(route): |
| | | # This function really wants to consume Unicode patterns natively, but if |
| | | # someone passes us a bytestring, we allow it by converting it to Unicode |
| | | # using the ASCII decoding. We decode it using ASCII because we dont |
| | | # want to accept bytestrings with high-order characters in them here as |
| | | # we have no idea what the encoding represents. |
| | | if route.__class__ is not text_type: |
| | | route = text_(route, 'ascii') |
| | | |
| | | if old_route_re.search(route) and not route_re.search(route): |
| | | route = old_route_re.sub(update_pattern, route) |
| | | |
| | | if not route.startswith('/'): |
| | | route = '/' + route |
| | | |
| | | star = None |
| | | remainder = None |
| | | if star_at_end.search(route): |
| | | route, star = route.rsplit('*', 1) |
| | | route, remainder = route.rsplit('*', 1) |
| | | |
| | | pat = route_re.split(route) |
| | | |
| | | # every element in "pat" will be Unicode (regardless of whether the |
| | | # route_re regex pattern is itself Unicode or str) |
| | | pat.reverse() |
| | | rpat = [] |
| | | gen = [] |
| | | prefix = pat.pop() # invar: always at least one element (route='/'+route) |
| | | rpat.append(re.escape(prefix)) |
| | | gen.append(prefix) |
| | | |
| | | # We want to generate URL-encoded URLs, so we url-quote the prefix, being |
| | | # careful not to quote any embedded slashes. We have to replace '%' with |
| | | # '%%' afterwards, as the strings that go into "gen" are used as string |
| | | # replacement targets. |
| | | gen.append(quote_path_segment(prefix, safe='/').replace('%', '%%')) # native |
| | | rpat.append(re.escape(prefix)) # unicode |
| | | |
| | | while pat: |
| | | name = pat.pop() |
| | | name = pat.pop() # unicode |
| | | name = name[1:-1] |
| | | if ':' in name: |
| | | name, reg = name.split(':') |
| | | else: |
| | | reg = '[^/]+' |
| | | gen.append('%%(%s)s' % name) |
| | | name = '(?P<%s>%s)' % (name, reg) |
| | | gen.append('%%(%s)s' % native_(name)) # native |
| | | name = '(?P<%s>%s)' % (name, reg) # unicode |
| | | rpat.append(name) |
| | | s = pat.pop() |
| | | s = pat.pop() # unicode |
| | | if s: |
| | | rpat.append(re.escape(s)) |
| | | gen.append(s) |
| | | rpat.append(re.escape(s)) # unicode |
| | | # We want to generate URL-encoded URLs, so we url-quote this |
| | | # literal in the pattern, being careful not to quote the embedded |
| | | # slashes. We have to replace '%' with '%%' afterwards, as the |
| | | # strings that go into "gen" are used as string replacement |
| | | # targets. What is appended to gen is a native string. |
| | | gen.append(quote_path_segment(s, safe='/').replace('%', '%%')) |
| | | |
| | | if star: |
| | | rpat.append('(?P<%s>.*?)' % star) |
| | | gen.append('%%(%s)s' % star) |
| | | if remainder: |
| | | rpat.append('(?P<%s>.*?)' % remainder) # unicode |
| | | gen.append('%%(%s)s' % native_(remainder)) # native |
| | | |
| | | pattern = ''.join(rpat) + '$' |
| | | pattern = ''.join(rpat) + '$' # unicode |
| | | |
| | | match = re.compile(pattern).match |
| | | def matcher(path): |
| | | # This function really wants to consume Unicode patterns natively, |
| | | # but if someone passes us a bytestring, we allow it by converting it |
| | | # to Unicode using the ASCII decoding. We decode it using ASCII |
| | | # because we dont want to accept bytestrings with high-order |
| | | # characters in them here as we have no idea what the encoding |
| | | # represents. |
| | | if path.__class__ is not text_type: |
| | | path = text_(path, 'ascii') |
| | | m = match(path) |
| | | if m is None: |
| | | return m |
| | | return None |
| | | d = {} |
| | | for k, v in m.groupdict().items(): |
| | | if k == star: |
| | | d[k] = split_path_info(v) |
| | | # k and v will be Unicode 2.6.4 and lower doesnt accept unicode |
| | | # kwargs as **kw, so we explicitly cast the keys to native |
| | | # strings in case someone wants to pass the result as **kw |
| | | nk = native_(k, 'ascii') |
| | | if k == remainder: |
| | | d[nk] = split_path_info(v) |
| | | else: |
| | | d[k] = v |
| | | d[nk] = v |
| | | return d |
| | | |
| | | |
| | | gen = ''.join(gen) |
| | | def generator(dict): |
| | | newdict = {} |
| | | for k, v in dict.items(): |
| | | if v.__class__ is text_type: |
| | | v = native_(v, 'utf-8') |
| | | if k == star and is_nonstr_iter(v): |
| | | v = '/'.join([quote_path_segment(x) for x in v]) |
| | | elif k != star: |
| | | if PY3: |
| | | if v.__class__ is binary_type: |
| | | # url_quote below needs a native string, not bytes on Py3 |
| | | v = v.decode('utf-8') |
| | | else: |
| | | if v.__class__ is text_type: |
| | | # url_quote below needs bytes, not unicode on Py2 |
| | | v = v.encode('utf-8') |
| | | if k == remainder and is_nonstr_iter(v): |
| | | v = '/'.join([quote_path_segment(x) for x in v]) # native |
| | | elif k != remainder: |
| | | if v.__class__ not in string_types: |
| | | v = str(v) |
| | | v = url_quote(v, safe='') |
| | | # v may be bytes (py2) or native string (py3) |
| | | v = url_quote(v, safe='') # defaults to utf8 encoding on py3 |
| | | |
| | | # at this point, the value will be a native string |
| | | newdict[k] = v |
| | | return gen % newdict |
| | | |
| | | result = gen % newdict # native string result |
| | | return result |
| | | |
| | | return matcher, generator |