LISPUSER

LISPMEMOLisp is like a ball of mud - you can throw anything you want into it, and it's still Lisp. -- Anonymous

(top)  (memo)  (rss)

CFFI で簡単 FFI - libcharguess, NKF32.DLL

id:rubikitch さんのところに C のライブラリを FFI 経由で呼びたい、というネタがありましたので解答しておきます。

[追記] 別ページ: http://lispuser.net/commonlisp/japanese.html へ移動。

static VALUE cg_s_guess(VALUE klass, VALUE str) {
  const char*ptr;
  int ret;
  Check_Type(str, T_STRING);
  ret = CharGuessInit();
  ptr = GuessChardet((const char *)RSTRING(str)->ptr);
  ret = CharGuessDone();
  return ptr ? rb_str_new2(ptr) : Qnil;
}

上記のようなモジュールを、素直に CFFI で表現すると次のようになります。

libcharguess

パッケージの定義と共有ライブラリのロード

(defpackage :charguess.cffi (:use :cl :cffi) (:export :guess))
(in-package :charguess.cffi)

(load-foreign-library "/home/onjo/lisp/user/charguess/libcharguess/libcharguess.so")

処理本体

これにはいくつかやり方があって、FFI 経由で関数を呼び出す Lisp 関数を定義する方法と、

;; パターン1: Lisp 関数を定義して使う
(defcfun ("CharGuessInit" char-guess-init) :int)
(defcfun ("GuessChardet" guess-chardet) :string
  (str (:pointer :uchar)))
(defcfun ("CharGuessDone" char-guess-done) :int)

(defun guess (vec &aux (len (length vec)))
  (check-type vec (array (unsigned-byte 8) *) "error")
  (with-foreign-string (ptr vec)
    (prog2
        (char-guess-init)
        (guess-chardet ptr)
      (char-guess-done))))

foreign-funcall / foreign-funcall-ptr 関数をつかって直接関数を呼び出す方法です。

;; パターン2: 直接呼び出す
(defun guessl (vec &aux (len (length vec)))
  (check-type vec (array (unsigned-byte 8) *) "error")
  (with-foreign-string (ptr vec)
    (prog2
        (foreign-funcall "CharGuessInit" :int)
        (foreign-funcall "GuessChardet" :pointer ptr :string)
      (foreign-funcall "CharGuessDone" :int))))

あるいは、libcharguess.a を使って、もっとシンプルなインターフェースのライブラリを作る という手もあります。

#include "charguess.h"

char* guess(char *str)
{
    char *ret;
    CharGuessInit();
    ret = GuessChardet(str);
    CharGuessDone();
    return ret;
}

これを libguess.so にコンパイルします。すると CFFI によるインターフェースは次のようになります。

 (defpackage :libguess (:use :cl :cffi) (:export :guess))
 (in-package :libguess)
 
 (load-foreign-library "/home/onjo/lisp/user/charguess/libcharguess/libguess.so")
 
 (defcfun ("guess" %guess) :string
   (str (:pointer :uchar)))
 
 (defun guess (vec &aux (len (length vec)))
   (check-type vec (array (unsigned-byte 8) *) "error")
   (with-foreign-string (ptr vec)
     (%guess ptr)))

実際にはライブラリの探索等でもう少し技が必要になりますが、その辺は CFFI のマニュアルを見てください。

とか手抜きかつスマートに終ろうかとおもったらトラブルが。SBCL 1.0.8.18 と CFFI-0.9.2 で試したら エラーがでますね…。

(defpackage :libcharguess (:use :cl :cffi) (:export :guess))
(in-package :libcharguess)

(load-foreign-library "/home/onjo/lisp/user/charguess/libcharguess/libcharguess.so")

(defcfun ("CharGuessInit" char-guess-init) :int)
(defcfun ("GuessChardet" guess-chardet) :string
  (str (:pointer :uchar)))
(defcfun ("CharGuessDone" char-guess-done) :int)

(defun guess (vec &aux (len (length vec)))
  (check-type vec (array (unsigned-byte 8) *) "error")
  (with-foreign-string (ptr vec)
    (prog2
       (char-guess-init)
       (guess-chardet ptr)
      (char-guess-done))))
    
(defun test ()
  (dolist (str '("hello" "日本語" "日本語のテスト"))
    (dolist (encoding #+clisp '(charset:utf-8 charset:euc-jp charset:shift-jis charset:iso-2022-jp)
                      #+sbcl '(:utf-8 :euc-jp :sjis))
       (let ((vec #+clisp (ext:convert-string-to-bytes str encoding)
                  #+sbcl (sb-ext:string-to-octets str :external-format encoding)))
        format t "~A => ~A => ~A~%" str encoding (guess vec)))))

CLISP だと動いたんですが、SBCL だと↓のようなエラーが。

  1. SBCL のバグ 2. CFFI のバグ 3. CFFI の使い方をまちがっている のどれかだろうなー。

比較しようにもメインの Windows 環境には libcharguess.dll ないですし、そもそも CFFI はまだ 最新の AllegroCL 8.1 の FFI に追従してないので試しようがない。眠いのでこのへんで。

arithmetic error FLOATING-POINT-INVALID-OPERATION signalled
   [Condition of type FLOATING-POINT-INVALID-OPERATION]

Restarts:
 0: [ABORT] Return to SLIME's top level.
 1: [TERMINATE-THREAD] Terminate this thread (#<THREAD "repl-thread" {10149CE1}>)

Backtrace:
  0: ((FLET SB-UNIX::WITH-INTERRUPTS-THUNK))
      [No Locals]
  1: ((FLET SB-UNIX::WITH-INTERRUPTS-THUNK))
      Locals:
        SB-DEBUG::ARG-0 = 0
  2: (SB-UNIX::CALL-WITH-INTERRUPTS
      #<CLOSURE (FLET SB-UNIX::WITH-INTERRUPTS-THUNK) {B61583B5}>
      T)
      Locals:
        SB-DEBUG::ARG-0 = #<CLOSURE (FLET SB-UNIX::WITH-INTERRUPTS-THUNK) {B61583B5}>
        SB-DEBUG::ARG-1 = T
  3: (SB-VM:SIGFPE-HANDLER
      #<unavailable argument>
      #.(SB-SYS:INT-SAP #XB615872C)
      #<unavailable argument>)
  4: (SB-UNIX::CALL-ALLOWING-WITH-INTERRUPTS
      #<CLOSURE (FLET SB-UNIX::ALLOW-WITH-INTERRUPTS-THUNK) {B6158405}>
      T)
  5: ((FLET SB-UNIX::WITHOUT-INTERRUPTS-THUNK) T)
  6: ((FLET SB-UNIX::RUN-WITHOUT-INTERRUPTS))
  7: (SB-UNIX::CALL-WITHOUT-INTERRUPTS
      #<CLOSURE (FLET SB-UNIX::WITHOUT-INTERRUPTS-THUNK) {B61584B5}>)
  8: (SB-SYS:INVOKE-INTERRUPTION
      #<CLOSURE (FLET SB-UNIX::INTERRUPTION) {B61584E5}>)
  9: ((FLET SB-UNIX::RUN-HANDLER)
      8
      #.(SB-SYS:INT-SAP #XB615872C)
      #.(SB-SYS:INT-SAP #XB61587AC))
 10: ("foreign function: call_into_lisp")
 11: ("foreign function: funcall3")
 12: ("foreign function: interrupt_handle_now")
 13: ("bogus stack frame")
 14: ("foreign function: _ZN19nsUniversalDetector7DataEndEv")
 15: ("foreign function: GuessChardet")

8/9 追記: で、朝おきて見なおすと、ん?バグでもなんでもねぇ。SIGFPE って浮動小数点例外じゃないか…。 で、 INVALID-OPERATION とでているので、いわゆる NaN になる演算やらかして例外がおきたんですね。 SBCL ですと sb-int:with-float-traps-masked で :invalid をマスクしてやれば普通に NaN が返された状態で動作します。 スタックトレースからみても発生しているのはおそらく libcharguess.so の中でしょう。

(defpackage :libcharguess (:use :cl :cffi) (:export :guess))
(in-package :libcharguess)

(define-foreign-library libcharguess
  (:unix "libcharguess.so")
  (:windows "libcharguess.dll"))

(use-foreign-library libcharguess)

(defmacro without-fp-trap (&body body)
  #+sbcl
  `(sb-int:with-float-traps-masked (:invalid)
     ,@body)
  #-sbcl
  `(progn ,@body))

(defcfun ("CharGuessInit" char-guess-init) :int)
(defcfun ("GuessChardet" guess-chardet) :string
  (str (:pointer :uchar)))
(defcfun ("CharGuessDone" char-guess-done) :int)

(defun guess (vec &aux (len (length vec)))
  (check-type vec (array (unsigned-byte 8) *) "error")
  (with-foreign-string (ptr vec)
    (without-fp-trap
      (prog2
          (char-guess-init)
          (guess-chardet ptr)
        (char-guess-done)))))
(defpackage :libguess (:use :cl :cffi) (:export :guess))
(in-package :libguess)


(defcfun ("guess" %guess) :string
  (str (:pointer :uchar)))

(defmacro without-fp-trap (&body body)
  #+sbcl
  `(sb-int:with-float-traps-masked (:invalid)
     ,@body)
  #-sbcl
  `(progn ,@body))

(defun guess (vec &aux (len (length vec)))
  (check-type vec (array (unsigned-byte 8) *) "error")
  (with-foreign-string (ptr vec)
    (without-fp-trap
      (%guess ptr))))

これで無事動作しました。以下テスト。

 CL-USER> (defun test ()
  (format t "[~A ~A]~%" (lisp-implementation-type) (lisp-implementation-version))
  (dolist (str '("hello" "日本語" "日本語のテスト"))
    (dolist (encoding #+clisp '(charset:utf-8 charset:euc-jp charset:shift-jis charset:iso-2022-jp)
                      #+sbcl '(:utf-8 :euc-jp :sjis))
      (let ((vec #+clisp (ext:convert-string-to-bytes str encoding)
                 #+sbcl (sb-ext:string-to-octets str :external-format encoding)))
        (format t "libcharguess:guess ~A => ~A => ~A~%" str encoding (libcharguess:guess vec))
        (format t "libguess:guess ~A => ~A => ~A~%" str encoding (libguess:guess vec))))))
 STYLE-WARNING: redefining TEST in DEFUN
 TEST
 CL-USER> (test)
 [SBCL 1.0.8.18]
 libcharguess:guess hello => UTF-8 => NIL
 libguess:guess hello => UTF-8 => NIL
 libcharguess:guess hello => EUC-JP => NIL
 libguess:guess hello => EUC-JP => NIL
 libcharguess:guess hello => SJIS => NIL
 libguess:guess hello => SJIS => NIL
 libcharguess:guess 日本語 => UTF-8 => UTF-8
 libguess:guess 日本語 => UTF-8 => UTF-8
 libcharguess:guess 日本語 => EUC-JP => EUC-JP
 libguess:guess 日本語 => EUC-JP => EUC-JP
 libcharguess:guess 日本語 => SJIS => Shift_JIS
 libguess:guess 日本語 => SJIS => Shift_JIS
 libcharguess:guess 日本語のテスト => UTF-8 => UTF-8
 libguess:guess 日本語のテスト => UTF-8 => UTF-8
 libcharguess:guess 日本語のテスト => EUC-JP => EUC-JP
 libguess:guess 日本語のテスト => EUC-JP => EUC-JP
 libcharguess:guess 日本語のテスト => SJIS => Shift_JIS
 libguess:guess 日本語のテスト => SJIS => Shift_JIS
 NIL
 
 CL-USER> (libguess::test)
 [CLISP 2.41 (2006-10-13) (built 3392233200) (memory 3394623069)]
 libcharguess:guess hello => UTF-8 => NIL
 libguess:guess hello => UTF-8 => NIL
 libcharguess:guess hello => EUC-JP => NIL
 libguess:guess hello => EUC-JP => NIL
 libcharguess:guess hello => SHIFT-JIS => NIL
 libguess:guess hello => SHIFT-JIS => NIL
 libcharguess:guess hello => ISO-2022-JP => NIL
 libguess:guess hello => ISO-2022-JP => NIL
 libcharguess:guess 日本語 => UTF-8 => UTF-8
 libguess:guess 日本語 => UTF-8 => UTF-8
 libcharguess:guess 日本語 => EUC-JP => EUC-JP
 libguess:guess 日本語 => EUC-JP => EUC-JP
 libcharguess:guess 日本語 => SHIFT-JIS => Shift_JIS
 libguess:guess 日本語 => SHIFT-JIS => Shift_JIS
 libcharguess:guess 日本語 => ISO-2022-JP => ISO-2022-JP
 libguess:guess 日本語 => ISO-2022-JP => ISO-2022-JP
 libcharguess:guess 日本語のテスト => UTF-8 => UTF-8
 libguess:guess 日本語のテスト => UTF-8 => UTF-8
 libcharguess:guess 日本語のテスト => EUC-JP => EUC-JP
 libguess:guess 日本語のテスト => EUC-JP => EUC-JP
 libcharguess:guess 日本語のテスト => SHIFT-JIS => Shift_JIS
 libguess:guess 日本語のテスト => SHIFT-JIS => Shift_JIS
 libcharguess:guess 日本語のテスト => ISO-2022-JP => ISO-2022-JP
 libguess:guess 日本語のテスト => ISO-2022-JP => ISO-2022-JP
 NIL

NKF32.DLL for AllegroCL binding

ついでに NKF32.DLL の CFFI バインディングを書いてみた。

:
(nkf:convert "-j" (excl:string-to-octets "日本語" :external-format :euc-jp))
=> iso-2022-jp でエンコードされた配列が返る
(nkf:convert "-s" (excl:string-to-octets "日本語" :external-format :euc-jp))
=> Shift_JIS でエンコードされた配列が返る
(nkf:convert "-w8" (excl:string-to-octets "日本語" :external-format :euc-jp))
=> UTF-8 でエンコードされた配列が返る

とやると、

:
;;;
;;; NFK32.DLL binding for CFFI  by Masayuki Onjo <onjo@lispuser.net>
;;;

(defpackage :nkf (:use :cl :cffi) (:export :version :convert :guess))
(in-package :nkf)

;;; Define Library

(define-foreign-library libnkf
  (:unix "nkf.so")
  (:windows "nkf32.dll"))

(use-foreign-library libnkf)

;;; C Functions

;; BOOL WINAPI GetNkfVersionSafe(LPTSTR verStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/);
;; int CALLBACK CLASS_DECLSPEC SetNkfOption(LPCSTR optStr);
;;  BOOL WINAPI CLASS_DECLSPEC NkfConvertSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/, LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/);
;; BOOL WINAPI CLASS_DECLSPEC GetNkfGuessA(LPWSTR outStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/);

(defcfun ("GetNkfVersionSafeA" %get-nkf-version-safe) :int
  (verStr :string)
  (nBufferLength :int)
  (lpTCHARsReturned :pointer))

(defcfun ("SetNkfOption" %set-nfk-option) :int
  (optStr :string))

(defcfun ("NkfConvertSafe" %nkf-convert-safe) :int
  (outStr :string)
  (nBufferLength :long)
  (lpBytesReturned (:pointer :long))
  (inStr :string)
  (nInBufferLength :long))

(defcfun ("GetNkfGuessA" %get-nkf-guess) :int
  (outStr :string)
  (nBufferLength :int)
  (lpTCHARsReturned :pointer))

;;; Lisp level API

(defun version ()
  "バージョン情報の取得"
  (with-foreign-objects ((verStr :uchar 256)
                        (nBufferLength :long)
                        (lpTCHARsReturned :long))
    (%get-nkf-version-safe verStr nBufferLength lpTCHARsReturned)
    (foreign-string-to-lisp verStr (mem-ref lpTCHARsReturned :long))))

(defun convert (option string &optional (size (* 2 (length string))) &aux (len (length string)))
  "NKF のオプションに沿って文字列エンコーディングを変換"
  (with-foreign-string (inStr string)
    (with-foreign-objects ((outStr :uchar size)
                           (lpBytesReturned :long))
      (when option
        (%set-nfk-option option))
      (%nkf-convert-safe outStr size lpBytesReturned inStr len)
      (let ((new-size (mem-ref lpBytesReturned :long)))
        (if (> new-size size)
            ;; 出力バッファ長不足のためリトライ
            (convert option string new-size) 
            ;; バッファ長に収まったのでベクタに戻す
            (let ((vec (make-array (1- new-size) :element-type '(unsigned-byte 8) :initial-element 0)))
              (loop for i from 0 below (1- new-size)
                    do (setf (aref vec i) (mem-ref outStr :uchar i))
                    finally (return vec))))))))

(defun guess (string)
  "エンコーディングを推定"
  (with-foreign-objects ((outStr :uchar 256)
                        (nBufferLength :long)
                        (lpTCHARsReturned :long))
    (convert "-g" string)
    (%get-nkf-guess outStr nBufferLength lpTCHARsReturned)
    (foreign-string-to-lisp outStr (mem-ref lpTCHARsReturned :long))))

posted: 2007/08/08 23:53 | permanent link to this entry | Tags: LISP

(top)  (memo)  (rss)