System/Win32/Encoding.hs
{-# LANGUAGE CPP #-}
{- |
Module : System.Win32.Encoding
Copyright : 2012 shelarcy
License : BSD-style
Maintainer : shelarcy@gmail.com
Stability : Provisional
Portability : Non-portable (Win32 API)
Enocode/Decode mutibyte character using Win32 API.
-}
module System.Win32.Encoding
( getCurrentCodePage
, encodeMultiByte
, encodeMultiByteIO
, decodeMultiByte
, decodeMultiByteIO
, wideCharToMultiByte
, multiByteToWideChar
) where
import Foreign.C.Types (CInt(..))
import Foreign.C.String (peekCAStringLen, withCWStringLen)
import Foreign.Marshal.Array (allocaArray)
import Foreign.Marshal.Unsafe (unsafeLocalState)
import System.Win32.Console
import System.Win32.NLS
import System.Win32.Types
#include "windows_cconv.h"
-- note CodePage = UInt which might not work on Win64. But the Win32 package
-- also has this issue.
getCurrentCodePage :: IO DWORD
getCurrentCodePage = do
conCP <- getConsoleCP
if conCP > 0
then return conCP
else getACP
-- | The "System.IO" output functions (e.g., `putStr`) don't
-- automatically convert to multibyte string on Windows, so this
-- function is provided to make the conversion from a Unicode string
-- in the given code page to a proper multibyte string. To get the
-- code page for the console, use `getCurrentCodePage`.
--
encodeMultiByte :: CodePage -> String -> String
encodeMultiByte cp = unsafeLocalState . encodeMultiByteIO cp
encodeMultiByteIO :: CodePage -> String -> IO String
encodeMultiByteIO _ "" = return ""
-- WideCharToMultiByte doesn't handle empty strings
encodeMultiByteIO cp wstr =
withCWStringLen wstr $ \(cwstr,len) -> do
mbchars' <- failIfZero "WideCharToMultiByte" $ wideCharToMultiByte
cp
0
cwstr
(fromIntegral len)
nullPtr 0
nullPtr nullPtr
-- mbchar' is the length of buffer required
allocaArray (fromIntegral mbchars') $ \mbstr -> do
mbchars <- failIfZero "WideCharToMultiByte" $ wideCharToMultiByte
cp
0
cwstr
(fromIntegral len)
mbstr mbchars'
nullPtr nullPtr
peekCAStringLen (mbstr,fromIntegral mbchars) -- converts [Char] to UTF-16
foreign import WINDOWS_CCONV "WideCharToMultiByte"
wideCharToMultiByte
:: CodePage
-> DWORD -- dwFlags,
-> LPCWSTR -- lpWideCharStr
-> CInt -- cchWideChar
-> LPSTR -- lpMultiByteStr
-> CInt -- cbMultiByte
-> LPCSTR -- lpMultiByteStr
-> LPBOOL -- lpbFlags
-> IO CInt
-- | The "System.IO" input functions (e.g. `getLine`) don't
-- automatically convert to Unicode, so this function is provided to
-- make the conversion from a multibyte string in the given code page
-- to a proper Unicode string. To get the code page for the console,
-- use `getCurrentCodePage`.
decodeMultiByte :: CodePage -> String -> String
decodeMultiByte cp = unsafeLocalState . decodeMultiByteIO cp
-- | Because of `stringToUnicode` is unclear name, we use `decodeMultiByteIO`
-- for alias of `stringToUnicode`.
decodeMultiByteIO :: CodePage -> String -> IO String
decodeMultiByteIO = stringToUnicode
{-# INLINE decodeMultiByteIO #-}