網址的編解碼

對於網址列的編解碼,常常困擾我,總是得找工具程式來編解碼,例如 若是有個須要下載的檔案 "下載檔案.pdf" 就要想辦法轉成 "%E4%B8%8B%E8%BC%89%E6%AA%94%E6%A1%88.pdf",以保證所有的瀏器都能夠下載。
有時候我也會從別人的網頁中抓取一些檔案來下載,碰到上述編碼過的網址,就得想辦法還原出原來的檔名,所幸網路上已經有人提供了好用的工具,讓我們可以直接線上手動轉換各種編碼。
因為經常要用的緣故,所以還是得改寫成Java 代碼
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RFC3986 {
private static final Pattern ptn = Pattern.compile("%[A-Fa-f0-9]{2}");
private static final String character = "字元值[";
private static final String overRange = "]超出範圍";
private static final String string = "字串[";
private static final String containInvalidEncode = "]含有無效編碼";
private static String dec2hex2(int charcode) {
String[] hexequiv = new String[]{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"};
return hexequiv[(charcode >> 4) & 0xF] + hexequiv[charcode & 0xF];
}
private static String dec2char(int n) {
if (n < 0xFFFF) {
return String.valueOf(Character.toChars(n));
} else if (n <= 0x10FFF) {
StringBuilder sb = new StringBuilder();
sb.append(Character.toChars(0xD800 | (n >> 10))).append(0xDC00 | (n & 0x3FF));
return sb.toString();
}
throw new IllegalArgumentException(character + n + overRange);
}
public static String encode(String textString) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < textString.length(); i++) {
int n = textString.codePointAt(i);
if (n == 0x20) {
sb.append("%20");
} else if ((n >= 0x41 && n <= 0x5A) || (n >= 0x61 && n <= 0x7A) || (n >= 0x30 && n <= 0x39)
|| n == 0x2D || n == 0x2E || n == 0x5F || n == 0x7E) {
sb.append(textString.charAt(i));
} else if (n <= 0x7F) {
sb.append('%').append(dec2hex2(n));
} else if (n <= 0x7FF) {
sb.append('%').append(dec2hex2(0xC0 | ((n >> 6) & 0x1F)))
.append('%').append(dec2hex2(0x80 | (n & 0x3F)));
} else if (n <= 0xFFFF) {
sb.append('%').append(dec2hex2(0xE0 | ((n >> 12) & 0x0F)))
.append('%').append(dec2hex2(0x80 | ((n >> 6) & 0x3F)))
.append('%').append(dec2hex2(0x80 | (n & 0x3F)));
} else if (n <= 0x10FFFF) {
sb.append('%').append(dec2hex2(0xF0 | ((n >> 18) & 0x07)))
.append('%').append(dec2hex2(0x80 | ((n >> 12) & 0x3F)))
.append('%').append(dec2hex2(0x80 | ((n >> 6) & 0x3F)))
.append('%').append(dec2hex2(0x80 | (n & 0x3F)));
}
}
return sb.toString();
}
public static String decode(String urlString) {
StringBuilder sb = new StringBuilder();
Matcher m = ptn.matcher(urlString);
int pos = 0, counter = 0, n = 0;
while (m.find()) {
sb.append(urlString.substring(pos, m.start()));
pos = m.end();
String code = urlString.substring(m.start() + 1, m.end());
Integer b = Integer.parseInt(code, 16);
switch (counter) {
case 0:
if (0 <= b && b <= 0x7F) {
sb.append(dec2char(b));
} else if (0xC0 <= b && b <= 0xDF) {
counter = 1;
n = b & 0x1F;
} else if (0xE0 <= b && b <= 0xEF) { // 1110xxxx
counter = 2;
n = b & 0xF;
} else if (0xF0 <= b && b <= 0xF7) { // 11110xxx
counter = 3;
n = b & 0x7;
} else {
throw new IllegalArgumentException(string.concat(urlString).concat(containInvalidEncode));
}
break;
case 1:
if (b < 0x80 || b > 0xBF) {
throw new IllegalArgumentException(string.concat(urlString).concat(containInvalidEncode));
}
counter--;
sb.append(dec2char((n << 6) | (b - 0x80)));
n = 0;
break;
case 2:
case 3:
if (b < 0x80 || b > 0xBF) {
throw new IllegalArgumentException(string.concat(urlString).concat(containInvalidEncode));
}
n = (n << 6) | (b - 0x80);
counter--;
break;
}
}
if (pos < urlString.length()) {
sb.append(urlString.substring(pos));
}
return sb.toString();
}
}
view raw RFC3986.java hosted with ❤ by GitHub

注意: + (加號) 在古老的瀏覽器或處理程式代表%20(空白),若是您會處理到這類網址(例如這個網頁內用+代替空白),請不要忘了要先把 + 轉換成 %20

留言

這個網誌中的熱門文章

企業人員的統一管理-FreeIPA學習筆記

證交所最佳五檔的程式解析

Postgresql HA