unicode <- read.csv("UnicodeData.txt", sep = ";", header = F, stringsAsFactors = F) unicode[1] <- strtoi(unicode[[1]], base = 16) unicode[13] <- strtoi(unicode[[13]], base = 16) digitsDF <- data.frame(codepoint = 48:57, char = as.character(0:9), stringsAsFactors = F) lowerDF <- data.frame(codepoint = 97:122, char = letters, stringsAsFactors = F) upperDF <- data.frame(codepoint = 65:90, char = LETTERS, stringsAsFactors = F) charDF <- rbind(digitsDF, upperDF, lowerDF) codeToLetter <- function(code) { return (charDF[charDF$codepoint == code,"char"]) } letterToCode <- function(letter) { return (charDF[charDF$char == letter,"codepoint"]) } myToUpper <- function(char) { code <- letterToCode(char) upperCode <- unicode[unicode[1] == code,13] if(is.na(upperCode)) { return (char) } codeToLetter(upperCode) }