ABOUT ME

-

  • Rust: 한글 종성 유니코드 변경하기
    컴퓨터/Rust 2023. 4. 17. 22:18
    728x90
    반응형

    할 것: 종성이 'ㄲ'인 것을 'ㄴ'으로 바꾸기

    (pdf를 txt로 변환하다 보니 "깐"인데 "깎" 이런 식으로 되어 있어서 만들게 됨)

     

    한글 유니코드:

    초성: (음절 - 0xAC00) / (21 * 28)

    중성: (음절 - 0xAC00) % (21 * 28) / 28

    종성: (음절 - 0xAC00) % 28

    음절의 종성이 'ㄲ'(02)인 경우, 이를 'ㄴ'(04)으로 바꿔야 한다.

     

    rust
    fn main() { let input = "갂깡깔깐깎..."; let result = replace_gg_with_n(input); println!("Result: {:}", result); // Result: 간깡깔깐깐... } /// 받침 -> ㄴ으로 바꿈 fn replace_gg_with_n(text: &str) -> String { let mut output = String::new(); for ch in text.chars() { let unicode_value = ch as u32; if unicode_value >= 0xAC00 && unicode_value <= 0xD7A3 { let initial = (unicode_value - 0xAC00) / (21 * 28); let medial = (unicode_value - 0xAC00) % (21 * 28) / 28; let final_consonant = (unicode_value - 0xAC00) % 28; if final_consonant == 2 { let new_final_consonant = 4; let new_unicode_value = 0xAC00 + (initial * 21 * 28) + (medial * 28) + new_final_consonant; output.push(std::char::from_u32(new_unicode_value).unwrap()); } else { output.push(ch); } } else { output.push(ch); } } output }

     

    좀 더 응용하면 원하는 종성을 지정해서 아래처럼 만들 수 있을 것 같다.

    rust
    use std::collections::HashMap; fn get_jongseong_index(jongseong: char) -> Option { let jongseong_map: HashMap<char, u16> = [ (' ', 0), ('', 1), ('', 2), ('', 3), ('', 4), ('', 5), ('', 6), ('', 7), ('', 8), ('', 9), ('', 10), ('', 11), ('', 12), ('', 13), ('', 14), ('', 15), ('', 16), ('', 17), ('', 18), ('', 19), ('', 20), ('', 21), ('', 22), ('', 23), ('', 24), ('', 25), ('', 26), ('', 27), ].iter().cloned().collect(); jongseong_map.get(&jongseong).cloned() } fn replace_jongseong(s: &str, original: char, replacement: char) -> String { let original_index = get_jongseong_index(original).unwrap(); let replacement_index = get_jongseong_index(replacement).unwrap(); s.chars() .map(|c| { let code = c as u32; // Check if it's a Hangul syllable if 0xAC00 <= code && code <= 0xD7A3 { // Decompose the syllable let syllable_index = code - 0xAC00; let jongseong_index = syllable_index as u16 % 28; if jongseong_index == original_index { // If the jongseong matches the original, replace it let new_code = code - (original_index as u32) + (replacement_index as u32); std::char::from_u32(new_code).unwrap() } else { // If the jongseong doesn't match, leave it alone c } } else { // If it's not a Hangul syllable, leave it alone c } }) .collect() } fn main() { let hey = "깔깘깎꾼"; let result = replace_jongseong(hey, '', ''); let result = replace_jongseong(&result, '', ''); println!("{result}"); // 깔깤깐꾼 }
    728x90

    '컴퓨터 > Rust' 카테고리의 다른 글

    Rust: async의 늪  (0) 2023.04.18
    Rust: cargo something 만들기  (0) 2023.03.28
    Rust: 카카오 Karlo API wrapper  (0) 2023.03.24

    댓글