# -*- coding: utf-8 -*- import re from petrel_client.common.exception import InvalidS3UriError # (?:...) # A non-capturing version of regular parentheses. Matches whatever regular expression is inside the parentheses, but the substring matched by the group cannot be retrieved after performing a match or referenced later in the pattern. # *?, +?, ?? # The '*', '+', and '?' qualifiers are all greedy; they match as much text as possible. Sometimes this behaviour isn’t desired; if the RE <.*> is matched against b , it will match the entire string, and not just . Adding ? after the qualifier makes it perform the match in non-greedy or minimal fashion; as few characters as possible will be matched. Using the RE <.*?> will match only . # re.I # re.IGNORECASE # Perform case-insensitive matching; expressions like [A-Z] will match lowercase letters, too. This is not affected by the current locale. To get this effect on non-ASCII Unicode characters such as ü and Ü, add the UNICODE flag. PATTERN = re.compile(r'^(?:([^:]+):)?s3://([^/]+)/(.+?)/?$', re.I) def parse_s3_uri(uri): m = PATTERN.match(uri) if m: return (m.group(1), m.group(2), m.group(3)) else: raise InvalidS3UriError(uri)