获取汉字首字母(Java)

2021-07-27

GBK、GB2312

ISO08859-1

 



//    汉字区位码 对应 -- 首字母
    private final static int[] li_SecPosValue = {1601, 1637, 1833, 2078, 2274, 2302, 2433, 2594, 2787, 3106, 3212, 3472, 3635, 3722, 3730, 3858, 4027, 4086, 4390, 4558, 4684, 4925, 5249, 5590};
    private final static String[] lc_FirstLetter = {"a", "b", "c", "d", "e", "f", "g", "h", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "w", "x", "y", "z"};

    public static void main(String[] args) throws UnsupportedEncodingException {
        String s = new String("恩恩恩".getBytes("GBK"),"ISO8859-1");
        int a = s.charAt(0);
        int b = s.charAt(1);
//        GB2312/GBK 是 双字节编码
        //https://baike.baidu.com/item/%E4%BF%A1%E6%81%AF%E4%BA%A4%E6%8D%A2%E7%94%A8%E6%B1%89%E5%AD%97%E7%BC%96%E7%A0%81%E5%AD%97%E7%AC%A6%E9%9B%86?fromtitle=GB2312&fromid=483170
//        ISO8859 是单字节编码
        System.out.println(a+"  "+b);
        System.out.println(getFirstLetter("恩恩恩"));
    }

    public static String getFirstLetter(String str) throws UnsupportedEncodingException {
        if (str == null || str.trim().length() == 0) {
            return "其他";
        }
        str = new String(str.substring(0,1).getBytes("GBK"),"ISO8859-1");
        if (str.length() > 1) {// 判断是不是汉字
            int li_SectorCode = str.charAt(0);
            int li_PositionCode = str.charAt(1);
            li_SectorCode = li_SectorCode - 160;
            li_PositionCode = li_PositionCode - 160;
            int li_SecPosCode = li_SectorCode * 100 + li_PositionCode; // 计算汉字区位码
            if (li_SecPosCode > 1600 && li_SecPosCode < 5590) {
                for (int i = 0; i < 23; i++) {
                    if (li_SecPosCode >= li_SecPosValue[i] && li_SecPosCode < li_SecPosValue[i + 1]) {
                        str = lc_FirstLetter[i];
                        break;
                    }
                }
            }
        }
        char c = str.charAt(0);
        if ((c>='a'&&c<='z')||(c>='A'&&c<='Z')){
            str = str.toLowerCase();
            return str;
        }
        return "其他";
    }