inherit Service.Service;

Thread.Queue oQueue = Thread.Queue();
Thread.Mutex mBuffer = Thread.Mutex();
string sStripBuffer;

object tStripDemon;
mapping mStripFilter = ([ "text/html": "html2text" ]);

#include <events.h>
#include <attributes.h>

class Doc {
  int content_id;
  int id;
  object doc;
  string mime;
  string name;
  
  void create(object o) { 
    doc = o; 
    mime = o->query_attribute(DOC_MIME_TYPE);
    name = o->get_identifier();
    werror("Mimetype for %s is %O\n", name, mime);
    id = o->get_object_id();
    content_id = o->get_content_id();
  }
}

void notify(mixed args)
{
    werror("EVENT_UPLOAD in fulltext.pike [%O]\n" , args);
    if (!tStripDemon)
        tStripDemon = thread_create(strip_demon);
    Doc doc = Doc(args[0]);
    oQueue->write(doc);
}

void call_service(mixed args)
{
    werror("Service called with %O\n", args);
}

static void run()
{
}

void strip_demon() {
    werror("Content Strip Service Demon started.\n");
    while (1) {
        Doc oDocument = oQueue->read();
        werror("Indexing Document !\n");
        string sMime = oDocument->mime;
        string sStripHandler;
        if (sStripHandler = mStripFilter[sMime])
        {
            int iContentID = oDocument->content_id;
            int iObID = oDocument->id;
            Stdio.File temp = Stdio.File("buffer.file", "rwct");
            Sql.Sql handle = Sql.Sql(serverCfg["database"]);
            Sql.sql_result res =
                handle->big_query("select rec_data from doc_data where "+
                                  "doc_id = "+iContentID+
                                  " order by rec_order");
            while (mixed data = res->fetch_row())
                temp->write(data[0]);
            temp->close();
            Stdio.File infile = Stdio.File("buffer.file");
            Stdio.File outfile = Stdio.File("strip.file","wct");

            mixed err = catch {
                int PCode = Process.create_process(
                    ({ sStripHandler }),
                    ([ "stdin" : infile,
                       "stdout" : outfile]))->wait();
                werror("Fulltext - stripped content length is %d\n",
                       Stdio.file_size("strip.file"));
            };
            if (err)
                werror("Error during content stripping:\n"+
                       master()->describe_backtrace(err));
            
            outfile->close();
            outfile->open("strip.file","r");
            string query = "replace into doc_ft values("+iObID+","+
                iContentID+",\""+
                handle->quote(outfile->read())+"\")";
            handle->big_query(query);
        }
        else
            werror("No Striphandler configured for %O\n", sMime);
    }
}

mixed search_documents(string pattern)
{
    object handle = Sql.Sql(serverCfg["database"]);
    object result =  handle->big_query("select ob_id, match(doc_data) against(%s) from doc_ft where match(doc_data) against(%s)", pattern, pattern);
    array res = ({});
    mixed row;
    while (row = result->fetch_row())
        res += ({ row });

    return res;
}


static void create_table(string dbhandle)
{
    Sql.Sql handle = Sql.Sql(dbhandle);
    handle->query("create table if not exists doc_ft (ob_id int, "+
                  "doc_id int, doc_data TEXT, FULLTEXT(doc_data))");
}


/*
 * create temporary table ft_id (ob_id int primary key, count int);
 * insert into ft_id select ob_id, 0 from ob_data where ob_attr='DOC_MIME_TYPE' and ob_data='"text/html"';
 * replace into ft_id select ob_id, 1 from doc_ft;
 * delete from ft_id where count = 1;
 */
static void check_ft_integrity()
{
    Sql.Sql handle = Sql.Sql(serverCfg["database"]);
    handle->query("create temporary table ft_id "+
                  "(ob_id int primary key, count int)");
    handle->query("insert into ft_id select distinct "+
                  "ob_id,0 from ob_data where ob_attr='DOC_MIME_TYPE'"+
                  "and ob_data='\""+
                  indices(mStripFilter)*"\"' or ob_data='\""+"\"'");
    handle->query("replace into ft_id select distinct ob_id, 1 from doc_ft");
    handle->query("delete from ft_id where count =1");
    array missing = handle->query("select distinct ob_id from ft_id");
    handle->query("drop table ft_id");
    foreach (missing, mixed a)
    {
        object o = connection->find_object((int)a["ob_id"]);
        if (objectp(o))
            oQueue->write(Doc(o));
    }
}

static private void got_kill(int sig)
{
    _exit(1);  
}

int main(int argc, array argv)
{

    if (catch{mStripFilter = read_config_file("config/fulltext.cfg");})
        mStripFilter = ([ "text/html" : "html2text" ]);
    
    signal(signum("QUIT"), got_kill);
    
    mixed err = catch{
        create_table(serverCfg["database"]);
        start(argv[1], "fulltext", EVENT_UPLOAD);
    };
    if (err) {
        werror("Startup of fulltext service failed.\n"+
               master()->describe_backtrace(err)+"\n");
	return 0;
    }
    tStripDemon = thread_create(strip_demon);
    thread_create(check_ft_integrity);


    return -17;
}

mapping read_config_file(string fname)
{
    Parser.XML.Tree.Node node = Parser.XML.Tree.parse_file(fname);
    if (!objectp(node))
        error("Failed to parse config file %s\n", fname);

    mapping data = ([]);
    node = node->get_first_element("config");
    foreach(node->get_elements(), Parser.XML.Tree.Node n)
    {
        data[n->get_attributes()["mime"]] = n->get_last_child()->get_text();
    }

    return data;
}
