-
Rust: 한글 종성 유니코드 변경하기컴퓨터/Rust 2023. 4. 17. 22:18728x90반응형
할 것: 종성이 'ㄲ'인 것을 'ㄴ'으로 바꾸기
(pdf를 txt로 변환하다 보니 "깐"인데 "깎" 이런 식으로 되어 있어서 만들게 됨)
한글 유니코드:
초성: (음절 - 0xAC00) / (21 * 28)
중성: (음절 - 0xAC00) % (21 * 28) / 28
종성: (음절 - 0xAC00) % 28
음절의 종성이 'ㄲ'(02)인 경우, 이를 'ㄴ'(04)으로 바꿔야 한다.
rustfn main() { let input = "갂깡깔깐깎..."; let result = replace_gg_with_n(input); println!("Result: {:}", result); // Result: 간깡깔깐깐... } /// ㄲ 받침 -> ㄴ으로 바꿈 fn replace_gg_with_n(text: &str) -> String { let mut output = String::new(); for ch in text.chars() { let unicode_value = ch as u32; if unicode_value >= 0xAC00 && unicode_value <= 0xD7A3 { let initial = (unicode_value - 0xAC00) / (21 * 28); let medial = (unicode_value - 0xAC00) % (21 * 28) / 28; let final_consonant = (unicode_value - 0xAC00) % 28; if final_consonant == 2 { let new_final_consonant = 4; let new_unicode_value = 0xAC00 + (initial * 21 * 28) + (medial * 28) + new_final_consonant; output.push(std::char::from_u32(new_unicode_value).unwrap()); } else { output.push(ch); } } else { output.push(ch); } } output }
좀 더 응용하면 원하는 종성을 지정해서 아래처럼 만들 수 있을 것 같다.
rustuse std::collections::HashMap; fn get_jongseong_index(jongseong: char) -> Option { let jongseong_map: HashMap<char, u16> = [ (' ', 0), ('ㄱ', 1), ('ㄲ', 2), ('ㄳ', 3), ('ㄴ', 4), ('ㄵ', 5), ('ㄶ', 6), ('ㄷ', 7), ('ㄹ', 8), ('ㄺ', 9), ('ㄻ', 10), ('ㄼ', 11), ('ㄽ', 12), ('ㄾ', 13), ('ㄿ', 14), ('ㅀ', 15), ('ㅁ', 16), ('ㅂ', 17), ('ㅄ', 18), ('ㅅ', 19), ('ㅆ', 20), ('ㅇ', 21), ('ㅈ', 22), ('ㅊ', 23), ('ㅋ', 24), ('ㅌ', 25), ('ㅍ', 26), ('ㅎ', 27), ].iter().cloned().collect(); jongseong_map.get(&jongseong).cloned() } fn replace_jongseong(s: &str, original: char, replacement: char) -> String { let original_index = get_jongseong_index(original).unwrap(); let replacement_index = get_jongseong_index(replacement).unwrap(); s.chars() .map(|c| { let code = c as u32; // Check if it's a Hangul syllable if 0xAC00 <= code && code <= 0xD7A3 { // Decompose the syllable let syllable_index = code - 0xAC00; let jongseong_index = syllable_index as u16 % 28; if jongseong_index == original_index { // If the jongseong matches the original, replace it let new_code = code - (original_index as u32) + (replacement_index as u32); std::char::from_u32(new_code).unwrap() } else { // If the jongseong doesn't match, leave it alone c } } else { // If it's not a Hangul syllable, leave it alone c } }) .collect() } fn main() { let hey = "깔깘깎꾼"; let result = replace_jongseong(hey, 'ㄽ', 'ㅋ'); let result = replace_jongseong(&result, 'ㄲ', 'ㄴ'); println!("{result}"); // 깔깤깐꾼 }
728x90'컴퓨터 > Rust' 카테고리의 다른 글
Rust: async의 늪 (0) 2023.04.18 Rust: cargo something 만들기 (0) 2023.03.28 Rust: 카카오 Karlo API wrapper (0) 2023.03.24