/ Published in: Regular Expression
(Supported by JavaScript, maybe other languages)
Expand |
Embed | Plain Text
Copy this code and paste it in your HTML
//replace() can be used to parse the URI. For example, to get the path: // path = uri.replace(regexUri, "$7$9"); //****************************************************// //***************** Validate a URI *******************// //****************************************************// //- The different parts are kept in their own groups and can be recombined // depending on the scheme: // - http as $1://$2$7?$11#$12 or $1://$5:$6$7?$11#$12 // - ftp as $1://$2$7 or $1://$4@$5:$6$7 // - mailto as $1:$9?$11 //- groups are as follows: // 1 == scheme // 2 == authority // 4 == userinfo // 5 == host (loose check to allow for IPv6 addresses) // 6 == port // 7,9 == path (7 if it has an authority, 9 if it doesn't) // 11 == query // 12 == fragment var regexUri = /^([a-z][a-z0-9+.-]*):(?:\/\/((?:(?=((?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*))(\3)@)?(?=(\[[0-9A-F:.]{2,}\]|(?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*))\5(?::(?=(\d*))\6)?)(\/(?=((?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9A-F]{2})*))\8)?|(\/?(?!\/)(?=((?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9A-F]{2})*))\10)?)(?:\?(?=((?:[a-z0-9-._~!$&'()*+,;=:@\/?]|%[0-9A-F]{2})*))\11)?(?:#(?=((?:[a-z0-9-._~!$&'()*+,;=:@\/?]|%[0-9A-F]{2})*))\12)?$/i; /*composed as follows: ^ ([a-z][a-z0-9+.-]*): #1 scheme (?: \/\/ it has an authority: ( #2 authority (?:(?=((?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*))(\3)@)? #4 userinfo (?=(\[[0-9A-F:.]{2,}\]|(?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*))\5 #5 host (loose check to allow for IPv6 addresses) (?::(?=(\d*))\6)? #6 port ) (\/(?=((?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9A-F]{2})*))\8)? #7 path | it doesn't have an authority: (\/?(?!\/)(?=((?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9A-F]{2})*))\10)? #9 path ) (?: \?(?=((?:[a-z0-9-._~!$&'()*+,;=:@\/?]|%[0-9A-F]{2})*))\11 #11 query string )? (?: #(?=((?:[a-z0-9-._~!$&'()*+,;=:@\/?]|%[0-9A-F]{2})*))\12 #12 fragment )? $ */ //****************************************************// //** Validate a URI (includes delimiters in groups) **// //****************************************************// //- The different parts--along with their delimiters--are kept in their own // groups and can be recombined as $1$6$2$3$4$5$7$8$9 //- groups are as follows: // 1,6 == scheme:// or scheme: // 2 == userinfo@ // 3 == host // 4 == :port // 5,7 == path (5 if it has an authority, 7 if it doesn't) // 8 == ?query // 9 == #fragment var regexUriDelim = /^(?:([a-z0-9+.-]+:\/\/)((?:(?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*)@)?((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*)(:(?:\d*))?(\/(?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9A-F]{2})*)?|([a-z0-9+.-]+:)(\/?(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9A-F]{2})*)?)(\?(?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*)?(#(?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*)?$/i; //****************************************************// //***************** Validate a URL *******************// //****************************************************// //Validates a URI with an http or https scheme. //- The different parts are kept in their own groups and can be recombined as // $1://$2:$3$4?$5#$6 //- Does not validate the host portion (domain); just makes sure the string // consists of valid characters (does not include IPv6 nor IPvFuture // addresses as valid). var regexUrl = /^(https?):\/\/((?:[a-z0-9.-]|%[0-9A-F]{2}){3,})(?::(\d+))?((?:\/(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})*)*)(?:\?((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?(?:#((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?$/i; //****************************************************// //**************** Validate a Mailto *****************// //****************************************************// //Validates a URI with a mailto scheme. //- The different parts are kept in their own groups and can be recombined as // $1:$2?$3 //- Does not validate the email addresses themselves. var regexMailto = /^(mailto):((?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+)?(?:\?((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9A-F]{2})*))?$/i;