[Fencommits] fenfire-hs: patch utf-8 checking, support timestamped logs in irc2rdf
Tuukka Hastrup
Tuukka.Hastrup at iki.fi
Thu Mar 15 03:50:11 EET 2007
Thu Mar 15 03:50:22 EET 2007 Tuukka Hastrup <Tuukka.Hastrup at iki.fi>
* patch utf-8 checking, support timestamped logs in irc2rdf
diff -rN -u old-fenfire-hs-1/Fenfire/Irc2RDF.hs new-fenfire-hs-1/Fenfire/Irc2RDF.hs
--- old-fenfire-hs-1/Fenfire/Irc2RDF.hs 2007-03-15 03:50:11.000000000 +0200
+++ new-fenfire-hs-1/Fenfire/Irc2RDF.hs 2007-03-15 03:50:11.000000000 +0200
@@ -1,6 +1,14 @@
{-# OPTIONS_GHC -fffi #-}
module Fenfire.Irc2RDF where
+-- Irc2RDF: An IRC to SIOC RDF converter
+-- Standalone compiling:
+-- ghc --make -o Irc2RDF -main-is Fenfire.Irc2RDF.main Irc2RDF.hs
+-- Usage with IRC protocol lines in real-time from stdin:
+-- Irc2RDF http://base-uri/ file-path/ .file-extension
+-- Usage with IRC protocol lines with prepended timestamps from stdin:
+-- Irc2RDF -t http://base-uri/ file-path/ .file-extension
+
-- Copyright (c) 2006-2007, Benja Fallenstein, Tuukka Hastrup
-- This file is part of Fenfire.
--
@@ -19,15 +27,18 @@
-- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
-- MA 02111-1307 USA
-import System.Time (getClockTime, toUTCTime, CalendarTime(..), ClockTime(..))
-import System.Environment (getArgs)
+import System.Time (getClockTime, toUTCTime, CalendarTime(..), ClockTime(..),
+ toClockTime)
+import System.Environment (getArgs, getProgName)
import System.IO (hFlush, stdout)
import System.IO.Unsafe (unsafeInterleaveIO)
import Data.Char (toUpper, toLower)
import Data.Char (ord)
import Data.Bits ((.&.))
+import Data.List (isPrefixOf)
+import Control.Monad (when)
import qualified Control.Exception
import System.Glib.UTFString (newUTFString, readCString,
@@ -45,8 +56,9 @@
fromUTF :: String -> String
fromUTF s = unsafePerformIO $ Control.Exception.catch
(withCString s $ \cstr -> peekUTFString cstr >>= \s' ->
- if (valid cstr (-1) nullPtr) then return s' -- force any exceptions
- else return s )
+ if (valid cstr (-1) nullPtr && all isUnicode s') -- force any exceptions
+ then return s' -- it really was utf-8
+ else return s ) -- it really wasn't utf-8
(\_e -> return s) -- if any, keep the local encoding
-- from gutf8.c used in g_utf8_validate
@@ -67,17 +79,47 @@
turtle_escaped c ('\t':xs) = '\\': 't':turtle_escaped c xs
turtle_escaped c ( x:xs) = x:turtle_escaped c xs
-main = do [root,filepath,extension] <- getArgs
- 'h':'t':'t':'p':':':'/':'/':_ <- return root
- irc <- getContents
- timestamps <- getTimeStamps
+main = do (root,filepath,extension,parseTimeStamps) <- do
+ args <- getArgs
+ case args of
+ [root,filepath,extension] ->
+ return (root,filepath,extension,False)
+ ["-t",root,filepath,extension] ->
+ return (root,filepath,extension,True)
+ _ -> getProgName >>=
+ error . (++": [-t] root filepath extension")
+ when (not $ "http://" `isPrefixOf` root)
+ $ error "The root doesn't start with http://"
+ (irclines,timestamps) <- case parseTimeStamps of
+ False -> do
+ irc <- getContents
+ timestamps <- getTimeStamps
+ return (lines irc,timestamps)
+ True -> do
+ irc <- getContents >>= return . lines
+ let (firsts, rests) = unzip $ map (span (/= ' ')) irc
+ return (map (drop 1) rests, map parseTime firsts)
+
mapM_ (uncurry $ handle root filepath extension)
- $ zip (map fromUTF$lines irc) (uniquify timestamps)
+ $ zip (map fromUTF irclines) (uniquify timestamps)
getTimeStamps = do ~(TOD secs _picos) <- unsafeInterleaveIO getClockTime
xs <- unsafeInterleaveIO getTimeStamps
return (TOD secs 0:xs)
+parseTime :: String -> ClockTime
+parseTime str = toClockTime $ CalendarTime
+ (read year) (toEnum $ read month-1) (read day)
+ (read hour) (read minute) (read second)
+ 0 (toEnum 0) 0 "" ((read tzh*60+read tzm)*60) False
+ where (year, rest0) = span (/= '-') str
+ (month, rest1) = span (/= '-') $ drop 1 rest0
+ (day, rest2) = span (/= 'T') $ drop 1 rest1
+ (hour, rest3) = span (/= ':') $ drop 1 rest2
+ (minute, rest4) = span (/= ':') $ drop 1 rest3
+ (second, rest5) = span (/= '+') $ drop 1 rest4
+ (tzh, tzm ) = splitAt 2 $ drop 1 rest5
+
uniquify [] = []
uniquify (x:xs) = (x,Nothing):uniquify' (x,Nothing) xs
More information about the Fencommits
mailing list