Created
March 26, 2022 21:36
-
-
Save itzg/485da354d38fdfe91ff886741a748175 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.net.URI; | |
import java.net.URLEncoder; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
public class LenientUriConverter { | |
final static Pattern URL_PATTERN = Pattern.compile("(.+?://.+?)(/+.*?)?([?#].+)?"); | |
// NOTE: /+ normalizes any double slashes in the path | |
final static Pattern PATHS_PATTERN = Pattern.compile("/+([^/]+)"); | |
public URI convert(String s) throws Exception { | |
final Matcher m = URL_PATTERN.matcher(s); | |
if (!m.matches()) { | |
throw new IllegalArgumentException("Failed to parse url: " + s); | |
} | |
StringBuilder sb = new StringBuilder(m.group(1)); | |
final String pathsPart = m.group(2); | |
if (pathsPart != null) { | |
final Matcher pathsMatcher = PATHS_PATTERN.matcher(pathsPart); | |
while (pathsMatcher.find()) { | |
final String content = pathsMatcher.group(1); | |
sb.append("/") | |
.append(URLEncoder.encode( | |
// flip back any pre-encoded spaces | |
content.replace('+', ' '), | |
"utf-8" | |
)); | |
} | |
} | |
final String trailer = m.group(3); | |
if (trailer != null) { | |
sb.append(trailer); | |
} | |
return URI.create(sb.toString()); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Needed this to handle mod URLs from CurseForge such as:
https://media.forgecdn.net/files/3482/169/Valhelsia+3-3.4.4-SERVER.zip
https://files.forgecdn.net/files/2320/259/[1.10.x]FenceOverhaul-1.2.1.jar
where
java.net.URI
itself did not like the square brackets and would re-encode the+
s